LLVM  14.0.0git
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetOperations.h"
38 #include "llvm/ADT/SetVector.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
67 #include "llvm/Config/llvm-config.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/Function.h"
71 #include "llvm/MC/LaneBitmask.h"
72 #include "llvm/MC/MCInstrDesc.h"
74 #include "llvm/MC/MCRegisterInfo.h"
75 #include "llvm/Pass.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <climits>
84 #include <cstdint>
85 #include <deque>
86 #include <functional>
87 #include <iterator>
88 #include <map>
89 #include <memory>
90 #include <tuple>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 
96 #define DEBUG_TYPE "pipeliner"
97 
98 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
99 STATISTIC(NumPipelined, "Number of loops software pipelined");
100 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
101 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
102 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
103 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
104 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
105 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
106 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
107 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
108 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
109 
110 /// A command line option to turn software pipelining on or off.
111 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
113  cl::desc("Enable Software Pipelining"));
114 
115 /// A command line option to enable SWP at -Os.
116 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
117  cl::desc("Enable SWP at Os."), cl::Hidden,
118  cl::init(false));
119 
120 /// A command line argument to limit minimum initial interval for pipelining.
121 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
122  cl::desc("Size limit for the MII."),
123  cl::Hidden, cl::init(27));
124 
125 /// A command line argument to limit the number of stages in the pipeline.
126 static cl::opt<int>
127  SwpMaxStages("pipeliner-max-stages",
128  cl::desc("Maximum stages allowed in the generated scheduled."),
129  cl::Hidden, cl::init(3));
130 
131 /// A command line option to disable the pruning of chain dependences due to
132 /// an unrelated Phi.
133 static cl::opt<bool>
134  SwpPruneDeps("pipeliner-prune-deps",
135  cl::desc("Prune dependences between unrelated Phi nodes."),
136  cl::Hidden, cl::init(true));
137 
138 /// A command line option to disable the pruning of loop carried order
139 /// dependences.
140 static cl::opt<bool>
141  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
142  cl::desc("Prune loop carried order dependences."),
143  cl::Hidden, cl::init(true));
144 
145 #ifndef NDEBUG
146 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
147 #endif
148 
149 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
150  cl::ReallyHidden, cl::init(false),
151  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
152 
153 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
154  cl::init(false));
155 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
156  cl::init(false));
157 
159  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
160  cl::desc("Instead of emitting the pipelined code, annotate instructions "
161  "with the generated schedule for feeding into the "
162  "-modulo-schedule-test pass"));
163 
165  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
166  cl::desc(
167  "Use the experimental peeling code generator for software pipelining"));
168 
169 namespace llvm {
170 
171 // A command line option to enable the CopyToPhi DAG mutation.
173  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
174  cl::init(true), cl::ZeroOrMore,
175  cl::desc("Enable CopyToPhi DAG Mutation"));
176 
177 } // end namespace llvm
178 
179 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
180 char MachinePipeliner::ID = 0;
181 #ifndef NDEBUG
183 #endif
185 
187  "Modulo Software Pipelining", false, false)
193  "Modulo Software Pipelining", false, false)
194 
195 /// The "main" function for implementing Swing Modulo Scheduling.
196 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
197  if (skipFunction(mf.getFunction()))
198  return false;
199 
200  if (!EnableSWP)
201  return false;
202 
203  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
205  return false;
206 
207  if (!mf.getSubtarget().enableMachinePipeliner())
208  return false;
209 
210  // Cannot pipeline loops without instruction itineraries if we are using
211  // DFA for the pipeliner.
212  if (mf.getSubtarget().useDFAforSMS() &&
213  (!mf.getSubtarget().getInstrItineraryData() ||
214  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
215  return false;
216 
217  MF = &mf;
218  MLI = &getAnalysis<MachineLoopInfo>();
219  MDT = &getAnalysis<MachineDominatorTree>();
220  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
221  TII = MF->getSubtarget().getInstrInfo();
222  RegClassInfo.runOnMachineFunction(*MF);
223 
224  for (auto &L : *MLI)
225  scheduleLoop(*L);
226 
227  return false;
228 }
229 
230 /// Attempt to perform the SMS algorithm on the specified loop. This function is
231 /// the main entry point for the algorithm. The function identifies candidate
232 /// loops, calculates the minimum initiation interval, and attempts to schedule
233 /// the loop.
234 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
235  bool Changed = false;
236  for (auto &InnerLoop : L)
237  Changed |= scheduleLoop(*InnerLoop);
238 
239 #ifndef NDEBUG
240  // Stop trying after reaching the limit (if any).
241  int Limit = SwpLoopLimit;
242  if (Limit >= 0) {
243  if (NumTries >= SwpLoopLimit)
244  return Changed;
245  NumTries++;
246  }
247 #endif
248 
249  setPragmaPipelineOptions(L);
250  if (!canPipelineLoop(L)) {
251  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
252  ORE->emit([&]() {
253  return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
254  L.getStartLoc(), L.getHeader())
255  << "Failed to pipeline loop";
256  });
257 
258  return Changed;
259  }
260 
261  ++NumTrytoPipeline;
262 
263  Changed = swingModuloScheduler(L);
264 
265  return Changed;
266 }
267 
268 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
269  // Reset the pragma for the next loop in iteration.
270  disabledByPragma = false;
271  II_setByPragma = 0;
272 
273  MachineBasicBlock *LBLK = L.getTopBlock();
274 
275  if (LBLK == nullptr)
276  return;
277 
278  const BasicBlock *BBLK = LBLK->getBasicBlock();
279  if (BBLK == nullptr)
280  return;
281 
282  const Instruction *TI = BBLK->getTerminator();
283  if (TI == nullptr)
284  return;
285 
286  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
287  if (LoopID == nullptr)
288  return;
289 
290  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
291  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
292 
293  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
294  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
295 
296  if (MD == nullptr)
297  continue;
298 
299  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
300 
301  if (S == nullptr)
302  continue;
303 
304  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
305  assert(MD->getNumOperands() == 2 &&
306  "Pipeline initiation interval hint metadata should have two operands.");
308  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
309  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
310  } else if (S->getString() == "llvm.loop.pipeline.disable") {
311  disabledByPragma = true;
312  }
313  }
314 }
315 
316 /// Return true if the loop can be software pipelined. The algorithm is
317 /// restricted to loops with a single basic block. Make sure that the
318 /// branch in the loop can be analyzed.
319 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
320  if (L.getNumBlocks() != 1) {
321  ORE->emit([&]() {
322  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
323  L.getStartLoc(), L.getHeader())
324  << "Not a single basic block: "
325  << ore::NV("NumBlocks", L.getNumBlocks());
326  });
327  return false;
328  }
329 
330  if (disabledByPragma) {
331  ORE->emit([&]() {
332  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
333  L.getStartLoc(), L.getHeader())
334  << "Disabled by Pragma.";
335  });
336  return false;
337  }
338 
339  // Check if the branch can't be understood because we can't do pipelining
340  // if that's the case.
341  LI.TBB = nullptr;
342  LI.FBB = nullptr;
343  LI.BrCond.clear();
344  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
345  LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
346  NumFailBranch++;
347  ORE->emit([&]() {
348  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
349  L.getStartLoc(), L.getHeader())
350  << "The branch can't be understood";
351  });
352  return false;
353  }
354 
355  LI.LoopInductionVar = nullptr;
356  LI.LoopCompare = nullptr;
358  LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
359  NumFailLoop++;
360  ORE->emit([&]() {
361  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
362  L.getStartLoc(), L.getHeader())
363  << "The loop structure is not supported";
364  });
365  return false;
366  }
367 
368  if (!L.getLoopPreheader()) {
369  LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
370  NumFailPreheader++;
371  ORE->emit([&]() {
372  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
373  L.getStartLoc(), L.getHeader())
374  << "No loop preheader found";
375  });
376  return false;
377  }
378 
379  // Remove any subregisters from inputs to phi nodes.
380  preprocessPhiNodes(*L.getHeader());
381  return true;
382 }
383 
384 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
386  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
387 
388  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
389  MachineOperand &DefOp = PI.getOperand(0);
390  assert(DefOp.getSubReg() == 0);
391  auto *RC = MRI.getRegClass(DefOp.getReg());
392 
393  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
394  MachineOperand &RegOp = PI.getOperand(i);
395  if (RegOp.getSubReg() == 0)
396  continue;
397 
398  // If the operand uses a subregister, replace it with a new register
399  // without subregisters, and generate a copy to the new register.
400  Register NewReg = MRI.createVirtualRegister(RC);
401  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
403  const DebugLoc &DL = PredB.findDebugLoc(At);
404  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
405  .addReg(RegOp.getReg(), getRegState(RegOp),
406  RegOp.getSubReg());
407  Slots.insertMachineInstrInMaps(*Copy);
408  RegOp.setReg(NewReg);
409  RegOp.setSubReg(0);
410  }
411  }
412 }
413 
414 /// The SMS algorithm consists of the following main steps:
415 /// 1. Computation and analysis of the dependence graph.
416 /// 2. Ordering of the nodes (instructions).
417 /// 3. Attempt to Schedule the loop.
418 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
419  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
420 
421  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
423 
425  // The kernel should not include any terminator instructions. These
426  // will be added back later.
427  SMS.startBlock(MBB);
428 
429  // Compute the number of 'real' instructions in the basic block by
430  // ignoring terminators.
431  unsigned size = MBB->size();
433  E = MBB->instr_end();
434  I != E; ++I, --size)
435  ;
436 
437  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
438  SMS.schedule();
439  SMS.exitRegion();
440 
441  SMS.finishBlock();
442  return SMS.hasNewSchedule();
443 }
444 
453 }
454 
455 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
456  if (II_setByPragma > 0)
457  MII = II_setByPragma;
458  else
459  MII = std::max(ResMII, RecMII);
460 }
461 
462 void SwingSchedulerDAG::setMAX_II() {
463  if (II_setByPragma > 0)
464  MAX_II = II_setByPragma;
465  else
466  MAX_II = MII + 10;
467 }
468 
469 /// We override the schedule function in ScheduleDAGInstrs to implement the
470 /// scheduling part of the Swing Modulo Scheduling algorithm.
472  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
473  buildSchedGraph(AA);
474  addLoopCarriedDependences(AA);
475  updatePhiDependences();
477  changeDependences();
478  postprocessDAG();
479  LLVM_DEBUG(dump());
480 
481  NodeSetType NodeSets;
482  findCircuits(NodeSets);
483  NodeSetType Circuits = NodeSets;
484 
485  // Calculate the MII.
486  unsigned ResMII = calculateResMII();
487  unsigned RecMII = calculateRecMII(NodeSets);
488 
489  fuseRecs(NodeSets);
490 
491  // This flag is used for testing and can cause correctness problems.
492  if (SwpIgnoreRecMII)
493  RecMII = 0;
494 
495  setMII(ResMII, RecMII);
496  setMAX_II();
497 
498  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
499  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
500 
501  // Can't schedule a loop without a valid MII.
502  if (MII == 0) {
503  LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
504  NumFailZeroMII++;
505  Pass.ORE->emit([&]() {
507  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
508  << "Invalid Minimal Initiation Interval: 0";
509  });
510  return;
511  }
512 
513  // Don't pipeline large loops.
514  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
515  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
516  << ", we don't pipleline large loops\n");
517  NumFailLargeMaxMII++;
518  Pass.ORE->emit([&]() {
520  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
521  << "Minimal Initiation Interval too large: "
522  << ore::NV("MII", (int)MII) << " > "
523  << ore::NV("SwpMaxMii", SwpMaxMii) << "."
524  << "Refer to -pipeliner-max-mii.";
525  });
526  return;
527  }
528 
529  computeNodeFunctions(NodeSets);
530 
531  registerPressureFilter(NodeSets);
532 
533  colocateNodeSets(NodeSets);
534 
535  checkNodeSets(NodeSets);
536 
537  LLVM_DEBUG({
538  for (auto &I : NodeSets) {
539  dbgs() << " Rec NodeSet ";
540  I.dump();
541  }
542  });
543 
544  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
545 
546  groupRemainingNodes(NodeSets);
547 
548  removeDuplicateNodes(NodeSets);
549 
550  LLVM_DEBUG({
551  for (auto &I : NodeSets) {
552  dbgs() << " NodeSet ";
553  I.dump();
554  }
555  });
556 
557  computeNodeOrder(NodeSets);
558 
559  // check for node order issues
560  checkValidNodeOrder(Circuits);
561 
562  SMSchedule Schedule(Pass.MF);
563  Scheduled = schedulePipeline(Schedule);
564 
565  if (!Scheduled){
566  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
567  NumFailNoSchedule++;
568  Pass.ORE->emit([&]() {
570  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
571  << "Unable to find schedule";
572  });
573  return;
574  }
575 
576  unsigned numStages = Schedule.getMaxStageCount();
577  // No need to generate pipeline if there are no overlapped iterations.
578  if (numStages == 0) {
579  LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
580  NumFailZeroStage++;
581  Pass.ORE->emit([&]() {
583  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
584  << "No need to pipeline - no overlapped iterations in schedule.";
585  });
586  return;
587  }
588  // Check that the maximum stage count is less than user-defined limit.
589  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
590  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
591  << " : too many stages, abort\n");
592  NumFailLargeMaxStage++;
593  Pass.ORE->emit([&]() {
595  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
596  << "Too many stages in schedule: "
597  << ore::NV("numStages", (int)numStages) << " > "
598  << ore::NV("SwpMaxStages", SwpMaxStages)
599  << ". Refer to -pipeliner-max-stages.";
600  });
601  return;
602  }
603 
604  Pass.ORE->emit([&]() {
606  Loop.getHeader())
607  << "Pipelined succesfully!";
608  });
609 
610  // Generate the schedule as a ModuloSchedule.
611  DenseMap<MachineInstr *, int> Cycles, Stages;
612  std::vector<MachineInstr *> OrderedInsts;
613  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
614  ++Cycle) {
615  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
616  OrderedInsts.push_back(SU->getInstr());
617  Cycles[SU->getInstr()] = Cycle;
618  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
619  }
620  }
622  for (auto &KV : NewMIs) {
623  Cycles[KV.first] = Cycles[KV.second];
624  Stages[KV.first] = Stages[KV.second];
625  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
626  }
627 
628  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
629  std::move(Stages));
630  if (EmitTestAnnotations) {
631  assert(NewInstrChanges.empty() &&
632  "Cannot serialize a schedule with InstrChanges!");
634  MSTI.annotate();
635  return;
636  }
637  // The experimental code generator can't work if there are InstChanges.
638  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
639  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
640  MSE.expand();
641  } else {
642  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
643  MSE.expand();
644  MSE.cleanup();
645  }
646  ++NumPipelined;
647 }
648 
649 /// Clean up after the software pipeliner runs.
651  for (auto &KV : NewMIs)
652  MF.DeleteMachineInstr(KV.second);
653  NewMIs.clear();
654 
655  // Call the superclass.
657 }
658 
659 /// Return the register values for the operands of a Phi instruction.
660 /// This function assume the instruction is a Phi.
662  unsigned &InitVal, unsigned &LoopVal) {
663  assert(Phi.isPHI() && "Expecting a Phi.");
664 
665  InitVal = 0;
666  LoopVal = 0;
667  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
668  if (Phi.getOperand(i + 1).getMBB() != Loop)
669  InitVal = Phi.getOperand(i).getReg();
670  else
671  LoopVal = Phi.getOperand(i).getReg();
672 
673  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
674 }
675 
676 /// Return the Phi register value that comes the loop block.
677 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
678  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
679  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
680  return Phi.getOperand(i).getReg();
681  return 0;
682 }
683 
684 /// Return true if SUb can be reached from SUa following the chain edges.
685 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
686  SmallPtrSet<SUnit *, 8> Visited;
687  SmallVector<SUnit *, 8> Worklist;
688  Worklist.push_back(SUa);
689  while (!Worklist.empty()) {
690  const SUnit *SU = Worklist.pop_back_val();
691  for (auto &SI : SU->Succs) {
692  SUnit *SuccSU = SI.getSUnit();
693  if (SI.getKind() == SDep::Order) {
694  if (Visited.count(SuccSU))
695  continue;
696  if (SuccSU == SUb)
697  return true;
698  Worklist.push_back(SuccSU);
699  Visited.insert(SuccSU);
700  }
701  }
702  }
703  return false;
704 }
705 
706 /// Return true if the instruction causes a chain between memory
707 /// references before and after it.
709  return MI.isCall() || MI.mayRaiseFPException() ||
710  MI.hasUnmodeledSideEffects() ||
711  (MI.hasOrderedMemoryRef() &&
712  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
713 }
714 
715 /// Return the underlying objects for the memory references of an instruction.
716 /// This function calls the code in ValueTracking, but first checks that the
717 /// instruction has a memory operand.
720  if (!MI->hasOneMemOperand())
721  return;
722  MachineMemOperand *MM = *MI->memoperands_begin();
723  if (!MM->getValue())
724  return;
725  getUnderlyingObjects(MM->getValue(), Objs);
726  for (const Value *V : Objs) {
727  if (!isIdentifiedObject(V)) {
728  Objs.clear();
729  return;
730  }
731  Objs.push_back(V);
732  }
733 }
734 
735 /// Add a chain edge between a load and store if the store can be an
736 /// alias of the load on a subsequent iteration, i.e., a loop carried
737 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
738 /// but that code doesn't create loop carried dependences.
739 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
743  for (auto &SU : SUnits) {
744  MachineInstr &MI = *SU.getInstr();
745  if (isDependenceBarrier(MI, AA))
746  PendingLoads.clear();
747  else if (MI.mayLoad()) {
749  ::getUnderlyingObjects(&MI, Objs);
750  if (Objs.empty())
751  Objs.push_back(UnknownValue);
752  for (auto V : Objs) {
753  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
754  SUs.push_back(&SU);
755  }
756  } else if (MI.mayStore()) {
758  ::getUnderlyingObjects(&MI, Objs);
759  if (Objs.empty())
760  Objs.push_back(UnknownValue);
761  for (auto V : Objs) {
763  PendingLoads.find(V);
764  if (I == PendingLoads.end())
765  continue;
766  for (auto Load : I->second) {
767  if (isSuccOrder(Load, &SU))
768  continue;
769  MachineInstr &LdMI = *Load->getInstr();
770  // First, perform the cheaper check that compares the base register.
771  // If they are the same and the load offset is less than the store
772  // offset, then mark the dependence as loop carried potentially.
773  const MachineOperand *BaseOp1, *BaseOp2;
774  int64_t Offset1, Offset2;
775  bool Offset1IsScalable, Offset2IsScalable;
776  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
777  Offset1IsScalable, TRI) &&
778  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
779  Offset2IsScalable, TRI)) {
780  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
781  Offset1IsScalable == Offset2IsScalable &&
782  (int)Offset1 < (int)Offset2) {
784  "What happened to the chain edge?");
785  SDep Dep(Load, SDep::Barrier);
786  Dep.setLatency(1);
787  SU.addPred(Dep);
788  continue;
789  }
790  }
791  // Second, the more expensive check that uses alias analysis on the
792  // base registers. If they alias, and the load offset is less than
793  // the store offset, the mark the dependence as loop carried.
794  if (!AA) {
795  SDep Dep(Load, SDep::Barrier);
796  Dep.setLatency(1);
797  SU.addPred(Dep);
798  continue;
799  }
800  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
801  MachineMemOperand *MMO2 = *MI.memoperands_begin();
802  if (!MMO1->getValue() || !MMO2->getValue()) {
803  SDep Dep(Load, SDep::Barrier);
804  Dep.setLatency(1);
805  SU.addPred(Dep);
806  continue;
807  }
808  if (MMO1->getValue() == MMO2->getValue() &&
809  MMO1->getOffset() <= MMO2->getOffset()) {
810  SDep Dep(Load, SDep::Barrier);
811  Dep.setLatency(1);
812  SU.addPred(Dep);
813  continue;
814  }
815  if (!AA->isNoAlias(
816  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
818  MMO2->getAAInfo()))) {
819  SDep Dep(Load, SDep::Barrier);
820  Dep.setLatency(1);
821  SU.addPred(Dep);
822  }
823  }
824  }
825  }
826  }
827 }
828 
829 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
830 /// processes dependences for PHIs. This function adds true dependences
831 /// from a PHI to a use, and a loop carried dependence from the use to the
832 /// PHI. The loop carried dependence is represented as an anti dependence
833 /// edge. This function also removes chain dependences between unrelated
834 /// PHIs.
835 void SwingSchedulerDAG::updatePhiDependences() {
836  SmallVector<SDep, 4> RemoveDeps;
838 
839  // Iterate over each DAG node.
840  for (SUnit &I : SUnits) {
841  RemoveDeps.clear();
842  // Set to true if the instruction has an operand defined by a Phi.
843  unsigned HasPhiUse = 0;
844  unsigned HasPhiDef = 0;
845  MachineInstr *MI = I.getInstr();
846  // Iterate over each operand, and we process the definitions.
847  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
848  MOE = MI->operands_end();
849  MOI != MOE; ++MOI) {
850  if (!MOI->isReg())
851  continue;
852  Register Reg = MOI->getReg();
853  if (MOI->isDef()) {
854  // If the register is used by a Phi, then create an anti dependence.
856  UI = MRI.use_instr_begin(Reg),
857  UE = MRI.use_instr_end();
858  UI != UE; ++UI) {
859  MachineInstr *UseMI = &*UI;
860  SUnit *SU = getSUnit(UseMI);
861  if (SU != nullptr && UseMI->isPHI()) {
862  if (!MI->isPHI()) {
863  SDep Dep(SU, SDep::Anti, Reg);
864  Dep.setLatency(1);
865  I.addPred(Dep);
866  } else {
867  HasPhiDef = Reg;
868  // Add a chain edge to a dependent Phi that isn't an existing
869  // predecessor.
870  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
871  I.addPred(SDep(SU, SDep::Barrier));
872  }
873  }
874  }
875  } else if (MOI->isUse()) {
876  // If the register is defined by a Phi, then create a true dependence.
878  if (DefMI == nullptr)
879  continue;
880  SUnit *SU = getSUnit(DefMI);
881  if (SU != nullptr && DefMI->isPHI()) {
882  if (!MI->isPHI()) {
883  SDep Dep(SU, SDep::Data, Reg);
884  Dep.setLatency(0);
885  ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
886  I.addPred(Dep);
887  } else {
888  HasPhiUse = Reg;
889  // Add a chain edge to a dependent Phi that isn't an existing
890  // predecessor.
891  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
892  I.addPred(SDep(SU, SDep::Barrier));
893  }
894  }
895  }
896  }
897  // Remove order dependences from an unrelated Phi.
898  if (!SwpPruneDeps)
899  continue;
900  for (auto &PI : I.Preds) {
901  MachineInstr *PMI = PI.getSUnit()->getInstr();
902  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
903  if (I.getInstr()->isPHI()) {
904  if (PMI->getOperand(0).getReg() == HasPhiUse)
905  continue;
906  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
907  continue;
908  }
909  RemoveDeps.push_back(PI);
910  }
911  }
912  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
913  I.removePred(RemoveDeps[i]);
914  }
915 }
916 
917 /// Iterate over each DAG node and see if we can change any dependences
918 /// in order to reduce the recurrence MII.
919 void SwingSchedulerDAG::changeDependences() {
920  // See if an instruction can use a value from the previous iteration.
921  // If so, we update the base and offset of the instruction and change
922  // the dependences.
923  for (SUnit &I : SUnits) {
924  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
925  int64_t NewOffset = 0;
926  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
927  NewOffset))
928  continue;
929 
930  // Get the MI and SUnit for the instruction that defines the original base.
931  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
932  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
933  if (!DefMI)
934  continue;
935  SUnit *DefSU = getSUnit(DefMI);
936  if (!DefSU)
937  continue;
938  // Get the MI and SUnit for the instruction that defins the new base.
939  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
940  if (!LastMI)
941  continue;
942  SUnit *LastSU = getSUnit(LastMI);
943  if (!LastSU)
944  continue;
945 
946  if (Topo.IsReachable(&I, LastSU))
947  continue;
948 
949  // Remove the dependence. The value now depends on a prior iteration.
951  for (const SDep &P : I.Preds)
952  if (P.getSUnit() == DefSU)
953  Deps.push_back(P);
954  for (int i = 0, e = Deps.size(); i != e; i++) {
955  Topo.RemovePred(&I, Deps[i].getSUnit());
956  I.removePred(Deps[i]);
957  }
958  // Remove the chain dependence between the instructions.
959  Deps.clear();
960  for (auto &P : LastSU->Preds)
961  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
962  Deps.push_back(P);
963  for (int i = 0, e = Deps.size(); i != e; i++) {
964  Topo.RemovePred(LastSU, Deps[i].getSUnit());
965  LastSU->removePred(Deps[i]);
966  }
967 
968  // Add a dependence between the new instruction and the instruction
969  // that defines the new base.
970  SDep Dep(&I, SDep::Anti, NewBase);
971  Topo.AddPred(LastSU, &I);
972  LastSU->addPred(Dep);
973 
974  // Remember the base and offset information so that we can update the
975  // instruction during code generation.
976  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
977  }
978 }
979 
980 namespace {
981 
982 // FuncUnitSorter - Comparison operator used to sort instructions by
983 // the number of functional unit choices.
984 struct FuncUnitSorter {
985  const InstrItineraryData *InstrItins;
986  const MCSubtargetInfo *STI;
988 
989  FuncUnitSorter(const TargetSubtargetInfo &TSI)
990  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
991 
992  // Compute the number of functional unit alternatives needed
993  // at each stage, and take the minimum value. We prioritize the
994  // instructions by the least number of choices first.
995  unsigned minFuncUnits(const MachineInstr *Inst,
996  InstrStage::FuncUnits &F) const {
997  unsigned SchedClass = Inst->getDesc().getSchedClass();
998  unsigned min = UINT_MAX;
999  if (InstrItins && !InstrItins->isEmpty()) {
1000  for (const InstrStage &IS :
1001  make_range(InstrItins->beginStage(SchedClass),
1002  InstrItins->endStage(SchedClass))) {
1003  InstrStage::FuncUnits funcUnits = IS.getUnits();
1004  unsigned numAlternatives = countPopulation(funcUnits);
1005  if (numAlternatives < min) {
1006  min = numAlternatives;
1007  F = funcUnits;
1008  }
1009  }
1010  return min;
1011  }
1012  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1013  const MCSchedClassDesc *SCDesc =
1014  STI->getSchedModel().getSchedClassDesc(SchedClass);
1015  if (!SCDesc->isValid())
1016  // No valid Schedule Class Desc for schedClass, should be
1017  // Pseudo/PostRAPseudo
1018  return min;
1019 
1020  for (const MCWriteProcResEntry &PRE :
1021  make_range(STI->getWriteProcResBegin(SCDesc),
1022  STI->getWriteProcResEnd(SCDesc))) {
1023  if (!PRE.Cycles)
1024  continue;
1025  const MCProcResourceDesc *ProcResource =
1026  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1027  unsigned NumUnits = ProcResource->NumUnits;
1028  if (NumUnits < min) {
1029  min = NumUnits;
1030  F = PRE.ProcResourceIdx;
1031  }
1032  }
1033  return min;
1034  }
1035  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1036  }
1037 
1038  // Compute the critical resources needed by the instruction. This
1039  // function records the functional units needed by instructions that
1040  // must use only one functional unit. We use this as a tie breaker
1041  // for computing the resource MII. The instrutions that require
1042  // the same, highly used, functional unit have high priority.
1043  void calcCriticalResources(MachineInstr &MI) {
1044  unsigned SchedClass = MI.getDesc().getSchedClass();
1045  if (InstrItins && !InstrItins->isEmpty()) {
1046  for (const InstrStage &IS :
1047  make_range(InstrItins->beginStage(SchedClass),
1048  InstrItins->endStage(SchedClass))) {
1049  InstrStage::FuncUnits FuncUnits = IS.getUnits();
1050  if (countPopulation(FuncUnits) == 1)
1051  Resources[FuncUnits]++;
1052  }
1053  return;
1054  }
1055  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1056  const MCSchedClassDesc *SCDesc =
1057  STI->getSchedModel().getSchedClassDesc(SchedClass);
1058  if (!SCDesc->isValid())
1059  // No valid Schedule Class Desc for schedClass, should be
1060  // Pseudo/PostRAPseudo
1061  return;
1062 
1063  for (const MCWriteProcResEntry &PRE :
1064  make_range(STI->getWriteProcResBegin(SCDesc),
1065  STI->getWriteProcResEnd(SCDesc))) {
1066  if (!PRE.Cycles)
1067  continue;
1068  Resources[PRE.ProcResourceIdx]++;
1069  }
1070  return;
1071  }
1072  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1073  }
1074 
1075  /// Return true if IS1 has less priority than IS2.
1076  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1077  InstrStage::FuncUnits F1 = 0, F2 = 0;
1078  unsigned MFUs1 = minFuncUnits(IS1, F1);
1079  unsigned MFUs2 = minFuncUnits(IS2, F2);
1080  if (MFUs1 == MFUs2)
1081  return Resources.lookup(F1) < Resources.lookup(F2);
1082  return MFUs1 > MFUs2;
1083  }
1084 };
1085 
1086 } // end anonymous namespace
1087 
1088 /// Calculate the resource constrained minimum initiation interval for the
1089 /// specified loop. We use the DFA to model the resources needed for
1090 /// each instruction, and we ignore dependences. A different DFA is created
1091 /// for each cycle that is required. When adding a new instruction, we attempt
1092 /// to add it to each existing DFA, until a legal space is found. If the
1093 /// instruction cannot be reserved in an existing DFA, we create a new one.
1094 unsigned SwingSchedulerDAG::calculateResMII() {
1095 
1096  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1099  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1100 
1101  // Sort the instructions by the number of available choices for scheduling,
1102  // least to most. Use the number of critical resources as the tie breaker.
1103  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1105  E = MBB->getFirstTerminator();
1106  I != E; ++I)
1107  FUS.calcCriticalResources(*I);
1109  FuncUnitOrder(FUS);
1110 
1112  E = MBB->getFirstTerminator();
1113  I != E; ++I)
1114  FuncUnitOrder.push(&*I);
1115 
1116  while (!FuncUnitOrder.empty()) {
1117  MachineInstr *MI = FuncUnitOrder.top();
1118  FuncUnitOrder.pop();
1119  if (TII->isZeroCost(MI->getOpcode()))
1120  continue;
1121  // Attempt to reserve the instruction in an existing DFA. At least one
1122  // DFA is needed for each cycle.
1123  unsigned NumCycles = getSUnit(MI)->Latency;
1124  unsigned ReservedCycles = 0;
1125  SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
1126  SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
1127  LLVM_DEBUG({
1128  dbgs() << "Trying to reserve resource for " << NumCycles
1129  << " cycles for \n";
1130  MI->dump();
1131  });
1132  for (unsigned C = 0; C < NumCycles; ++C)
1133  while (RI != RE) {
1134  if ((*RI)->canReserveResources(*MI)) {
1135  (*RI)->reserveResources(*MI);
1136  ++ReservedCycles;
1137  break;
1138  }
1139  RI++;
1140  }
1141  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1142  << ", NumCycles:" << NumCycles << "\n");
1143  // Add new DFAs, if needed, to reserve resources.
1144  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1146  << "NewResource created to reserve resources"
1147  << "\n");
1148  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1149  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1150  NewResource->reserveResources(*MI);
1151  Resources.push_back(NewResource);
1152  }
1153  }
1154  int Resmii = Resources.size();
1155  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
1156  // Delete the memory for each of the DFAs that were created earlier.
1157  for (ResourceManager *RI : Resources) {
1158  ResourceManager *D = RI;
1159  delete D;
1160  }
1161  Resources.clear();
1162  return Resmii;
1163 }
1164 
1165 /// Calculate the recurrence-constrainted minimum initiation interval.
1166 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1167 /// for each circuit. The II needs to satisfy the inequality
1168 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1169 /// II that satisfies the inequality, and the RecMII is the maximum
1170 /// of those values.
1171 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1172  unsigned RecMII = 0;
1173 
1174  for (NodeSet &Nodes : NodeSets) {
1175  if (Nodes.empty())
1176  continue;
1177 
1178  unsigned Delay = Nodes.getLatency();
1179  unsigned Distance = 1;
1180 
1181  // ii = ceil(delay / distance)
1182  unsigned CurMII = (Delay + Distance - 1) / Distance;
1183  Nodes.setRecMII(CurMII);
1184  if (CurMII > RecMII)
1185  RecMII = CurMII;
1186  }
1187 
1188  return RecMII;
1189 }
1190 
1191 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1192 /// but we do this to find the circuits, and then change them back.
1193 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1195  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1196  SUnit *SU = &SUnits[i];
1197  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1198  IP != EP; ++IP) {
1199  if (IP->getKind() != SDep::Anti)
1200  continue;
1201  DepsAdded.push_back(std::make_pair(SU, *IP));
1202  }
1203  }
1204  for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1205  // Remove this anti dependency and add one in the reverse direction.
1206  SUnit *SU = P.first;
1207  SDep &D = P.second;
1208  SUnit *TargetSU = D.getSUnit();
1209  unsigned Reg = D.getReg();
1210  unsigned Lat = D.getLatency();
1211  SU->removePred(D);
1212  SDep Dep(SU, SDep::Anti, Reg);
1213  Dep.setLatency(Lat);
1214  TargetSU->addPred(Dep);
1215  }
1216 }
1217 
1218 /// Create the adjacency structure of the nodes in the graph.
1219 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1220  SwingSchedulerDAG *DAG) {
1221  BitVector Added(SUnits.size());
1222  DenseMap<int, int> OutputDeps;
1223  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1224  Added.reset();
1225  // Add any successor to the adjacency matrix and exclude duplicates.
1226  for (auto &SI : SUnits[i].Succs) {
1227  // Only create a back-edge on the first and last nodes of a dependence
1228  // chain. This records any chains and adds them later.
1229  if (SI.getKind() == SDep::Output) {
1230  int N = SI.getSUnit()->NodeNum;
1231  int BackEdge = i;
1232  auto Dep = OutputDeps.find(BackEdge);
1233  if (Dep != OutputDeps.end()) {
1234  BackEdge = Dep->second;
1235  OutputDeps.erase(Dep);
1236  }
1237  OutputDeps[N] = BackEdge;
1238  }
1239  // Do not process a boundary node, an artificial node.
1240  // A back-edge is processed only if it goes to a Phi.
1241  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1242  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1243  continue;
1244  int N = SI.getSUnit()->NodeNum;
1245  if (!Added.test(N)) {
1246  AdjK[i].push_back(N);
1247  Added.set(N);
1248  }
1249  }
1250  // A chain edge between a store and a load is treated as a back-edge in the
1251  // adjacency matrix.
1252  for (auto &PI : SUnits[i].Preds) {
1253  if (!SUnits[i].getInstr()->mayStore() ||
1254  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1255  continue;
1256  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1257  int N = PI.getSUnit()->NodeNum;
1258  if (!Added.test(N)) {
1259  AdjK[i].push_back(N);
1260  Added.set(N);
1261  }
1262  }
1263  }
1264  }
1265  // Add back-edges in the adjacency matrix for the output dependences.
1266  for (auto &OD : OutputDeps)
1267  if (!Added.test(OD.second)) {
1268  AdjK[OD.first].push_back(OD.second);
1269  Added.set(OD.second);
1270  }
1271 }
1272 
1273 /// Identify an elementary circuit in the dependence graph starting at the
1274 /// specified node.
1275 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1276  bool HasBackedge) {
1277  SUnit *SV = &SUnits[V];
1278  bool F = false;
1279  Stack.insert(SV);
1280  Blocked.set(V);
1281 
1282  for (auto W : AdjK[V]) {
1283  if (NumPaths > MaxPaths)
1284  break;
1285  if (W < S)
1286  continue;
1287  if (W == S) {
1288  if (!HasBackedge)
1289  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1290  F = true;
1291  ++NumPaths;
1292  break;
1293  } else if (!Blocked.test(W)) {
1294  if (circuit(W, S, NodeSets,
1295  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1296  F = true;
1297  }
1298  }
1299 
1300  if (F)
1301  unblock(V);
1302  else {
1303  for (auto W : AdjK[V]) {
1304  if (W < S)
1305  continue;
1306  if (B[W].count(SV) == 0)
1307  B[W].insert(SV);
1308  }
1309  }
1310  Stack.pop_back();
1311  return F;
1312 }
1313 
1314 /// Unblock a node in the circuit finding algorithm.
1315 void SwingSchedulerDAG::Circuits::unblock(int U) {
1316  Blocked.reset(U);
1317  SmallPtrSet<SUnit *, 4> &BU = B[U];
1318  while (!BU.empty()) {
1320  assert(SI != BU.end() && "Invalid B set.");
1321  SUnit *W = *SI;
1322  BU.erase(W);
1323  if (Blocked.test(W->NodeNum))
1324  unblock(W->NodeNum);
1325  }
1326 }
1327 
1328 /// Identify all the elementary circuits in the dependence graph using
1329 /// Johnson's circuit algorithm.
1330 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1331  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1332  // but we do this to find the circuits, and then change them back.
1334 
1335  Circuits Cir(SUnits, Topo);
1336  // Create the adjacency structure.
1337  Cir.createAdjacencyStructure(this);
1338  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1339  Cir.reset();
1340  Cir.circuit(i, i, NodeSets);
1341  }
1342 
1343  // Change the dependences back so that we've created a DAG again.
1345 }
1346 
1347 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1348 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1349 // additional copies that are needed across iterations. An artificial dependence
1350 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1351 
1352 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1353 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1354 // PHI-------True-Dep------> USEOfPhi
1355 
1356 // The mutation creates
1357 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1358 
1359 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1360 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1361 // late to avoid additional copies across iterations. The possible scheduling
1362 // order would be
1363 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1364 
1366  for (SUnit &SU : DAG->SUnits) {
1367  // Find the COPY/REG_SEQUENCE instruction.
1368  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1369  continue;
1370 
1371  // Record the loop carried PHIs.
1372  SmallVector<SUnit *, 4> PHISUs;
1373  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1374  SmallVector<SUnit *, 4> SrcSUs;
1375 
1376  for (auto &Dep : SU.Preds) {
1377  SUnit *TmpSU = Dep.getSUnit();
1378  MachineInstr *TmpMI = TmpSU->getInstr();
1379  SDep::Kind DepKind = Dep.getKind();
1380  // Save the loop carried PHI.
1381  if (DepKind == SDep::Anti && TmpMI->isPHI())
1382  PHISUs.push_back(TmpSU);
1383  // Save the source of COPY/REG_SEQUENCE.
1384  // If the source has no pre-decessors, we will end up creating cycles.
1385  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1386  SrcSUs.push_back(TmpSU);
1387  }
1388 
1389  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1390  continue;
1391 
1392  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1393  // SUnit to the container.
1394  SmallVector<SUnit *, 8> UseSUs;
1395  // Do not use iterator based loop here as we are updating the container.
1396  for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1397  for (auto &Dep : PHISUs[Index]->Succs) {
1398  if (Dep.getKind() != SDep::Data)
1399  continue;
1400 
1401  SUnit *TmpSU = Dep.getSUnit();
1402  MachineInstr *TmpMI = TmpSU->getInstr();
1403  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1404  PHISUs.push_back(TmpSU);
1405  continue;
1406  }
1407  UseSUs.push_back(TmpSU);
1408  }
1409  }
1410 
1411  if (UseSUs.size() == 0)
1412  continue;
1413 
1414  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1415  // Add the artificial dependencies if it does not form a cycle.
1416  for (auto I : UseSUs) {
1417  for (auto Src : SrcSUs) {
1418  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1419  Src->addPred(SDep(I, SDep::Artificial));
1420  SDAG->Topo.AddPred(Src, I);
1421  }
1422  }
1423  }
1424  }
1425 }
1426 
1427 /// Return true for DAG nodes that we ignore when computing the cost functions.
1428 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1429 /// in the calculation of the ASAP, ALAP, etc functions.
1430 static bool ignoreDependence(const SDep &D, bool isPred) {
1431  if (D.isArtificial())
1432  return true;
1433  return D.getKind() == SDep::Anti && isPred;
1434 }
1435 
1436 /// Compute several functions need to order the nodes for scheduling.
1437 /// ASAP - Earliest time to schedule a node.
1438 /// ALAP - Latest time to schedule a node.
1439 /// MOV - Mobility function, difference between ALAP and ASAP.
1440 /// D - Depth of each node.
1441 /// H - Height of each node.
1442 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1443  ScheduleInfo.resize(SUnits.size());
1444 
1445  LLVM_DEBUG({
1446  for (int I : Topo) {
1447  const SUnit &SU = SUnits[I];
1448  dumpNode(SU);
1449  }
1450  });
1451 
1452  int maxASAP = 0;
1453  // Compute ASAP and ZeroLatencyDepth.
1454  for (int I : Topo) {
1455  int asap = 0;
1456  int zeroLatencyDepth = 0;
1457  SUnit *SU = &SUnits[I];
1458  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1459  EP = SU->Preds.end();
1460  IP != EP; ++IP) {
1461  SUnit *pred = IP->getSUnit();
1462  if (IP->getLatency() == 0)
1463  zeroLatencyDepth =
1464  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1465  if (ignoreDependence(*IP, true))
1466  continue;
1467  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1468  getDistance(pred, SU, *IP) * MII));
1469  }
1470  maxASAP = std::max(maxASAP, asap);
1471  ScheduleInfo[I].ASAP = asap;
1472  ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1473  }
1474 
1475  // Compute ALAP, ZeroLatencyHeight, and MOV.
1477  E = Topo.rend();
1478  I != E; ++I) {
1479  int alap = maxASAP;
1480  int zeroLatencyHeight = 0;
1481  SUnit *SU = &SUnits[*I];
1482  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1483  ES = SU->Succs.end();
1484  IS != ES; ++IS) {
1485  SUnit *succ = IS->getSUnit();
1486  if (IS->getLatency() == 0)
1487  zeroLatencyHeight =
1488  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1489  if (ignoreDependence(*IS, true))
1490  continue;
1491  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1492  getDistance(SU, succ, *IS) * MII));
1493  }
1494 
1495  ScheduleInfo[*I].ALAP = alap;
1496  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1497  }
1498 
1499  // After computing the node functions, compute the summary for each node set.
1500  for (NodeSet &I : NodeSets)
1501  I.computeNodeSetInfo(this);
1502 
1503  LLVM_DEBUG({
1504  for (unsigned i = 0; i < SUnits.size(); i++) {
1505  dbgs() << "\tNode " << i << ":\n";
1506  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1507  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1508  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1509  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1510  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1511  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1512  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1513  }
1514  });
1515 }
1516 
1517 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1518 /// as the predecessors of the elements of NodeOrder that are not also in
1519 /// NodeOrder.
1522  const NodeSet *S = nullptr) {
1523  Preds.clear();
1524  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1525  I != E; ++I) {
1526  for (const SDep &Pred : (*I)->Preds) {
1527  if (S && S->count(Pred.getSUnit()) == 0)
1528  continue;
1529  if (ignoreDependence(Pred, true))
1530  continue;
1531  if (NodeOrder.count(Pred.getSUnit()) == 0)
1532  Preds.insert(Pred.getSUnit());
1533  }
1534  // Back-edges are predecessors with an anti-dependence.
1535  for (const SDep &Succ : (*I)->Succs) {
1536  if (Succ.getKind() != SDep::Anti)
1537  continue;
1538  if (S && S->count(Succ.getSUnit()) == 0)
1539  continue;
1540  if (NodeOrder.count(Succ.getSUnit()) == 0)
1541  Preds.insert(Succ.getSUnit());
1542  }
1543  }
1544  return !Preds.empty();
1545 }
1546 
1547 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1548 /// as the successors of the elements of NodeOrder that are not also in
1549 /// NodeOrder.
1552  const NodeSet *S = nullptr) {
1553  Succs.clear();
1554  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1555  I != E; ++I) {
1556  for (SDep &Succ : (*I)->Succs) {
1557  if (S && S->count(Succ.getSUnit()) == 0)
1558  continue;
1559  if (ignoreDependence(Succ, false))
1560  continue;
1561  if (NodeOrder.count(Succ.getSUnit()) == 0)
1562  Succs.insert(Succ.getSUnit());
1563  }
1564  for (SDep &Pred : (*I)->Preds) {
1565  if (Pred.getKind() != SDep::Anti)
1566  continue;
1567  if (S && S->count(Pred.getSUnit()) == 0)
1568  continue;
1569  if (NodeOrder.count(Pred.getSUnit()) == 0)
1570  Succs.insert(Pred.getSUnit());
1571  }
1572  }
1573  return !Succs.empty();
1574 }
1575 
1576 /// Return true if there is a path from the specified node to any of the nodes
1577 /// in DestNodes. Keep track and return the nodes in any path.
1578 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1579  SetVector<SUnit *> &DestNodes,
1580  SetVector<SUnit *> &Exclude,
1581  SmallPtrSet<SUnit *, 8> &Visited) {
1582  if (Cur->isBoundaryNode())
1583  return false;
1584  if (Exclude.contains(Cur))
1585  return false;
1586  if (DestNodes.contains(Cur))
1587  return true;
1588  if (!Visited.insert(Cur).second)
1589  return Path.contains(Cur);
1590  bool FoundPath = false;
1591  for (auto &SI : Cur->Succs)
1592  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1593  for (auto &PI : Cur->Preds)
1594  if (PI.getKind() == SDep::Anti)
1595  FoundPath |=
1596  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1597  if (FoundPath)
1598  Path.insert(Cur);
1599  return FoundPath;
1600 }
1601 
1602 /// Compute the live-out registers for the instructions in a node-set.
1603 /// The live-out registers are those that are defined in the node-set,
1604 /// but not used. Except for use operands of Phis.
1606  NodeSet &NS) {
1611  for (SUnit *SU : NS) {
1612  const MachineInstr *MI = SU->getInstr();
1613  if (MI->isPHI())
1614  continue;
1615  for (const MachineOperand &MO : MI->operands())
1616  if (MO.isReg() && MO.isUse()) {
1617  Register Reg = MO.getReg();
1619  Uses.insert(Reg);
1620  else if (MRI.isAllocatable(Reg))
1621  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1622  ++Units)
1623  Uses.insert(*Units);
1624  }
1625  }
1626  for (SUnit *SU : NS)
1627  for (const MachineOperand &MO : SU->getInstr()->operands())
1628  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1629  Register Reg = MO.getReg();
1631  if (!Uses.count(Reg))
1632  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1634  } else if (MRI.isAllocatable(Reg)) {
1635  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1636  ++Units)
1637  if (!Uses.count(*Units))
1638  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1640  }
1641  }
1642  RPTracker.addLiveRegs(LiveOutRegs);
1643 }
1644 
1645 /// A heuristic to filter nodes in recurrent node-sets if the register
1646 /// pressure of a set is too high.
1647 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1648  for (auto &NS : NodeSets) {
1649  // Skip small node-sets since they won't cause register pressure problems.
1650  if (NS.size() <= 2)
1651  continue;
1652  IntervalPressure RecRegPressure;
1653  RegPressureTracker RecRPTracker(RecRegPressure);
1654  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1655  computeLiveOuts(MF, RecRPTracker, NS);
1656  RecRPTracker.closeBottom();
1657 
1658  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1659  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1660  return A->NodeNum > B->NodeNum;
1661  });
1662 
1663  for (auto &SU : SUnits) {
1664  // Since we're computing the register pressure for a subset of the
1665  // instructions in a block, we need to set the tracker for each
1666  // instruction in the node-set. The tracker is set to the instruction
1667  // just after the one we're interested in.
1669  RecRPTracker.setPos(std::next(CurInstI));
1670 
1671  RegPressureDelta RPDelta;
1672  ArrayRef<PressureChange> CriticalPSets;
1673  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1674  CriticalPSets,
1675  RecRegPressure.MaxSetPressure);
1676  if (RPDelta.Excess.isValid()) {
1677  LLVM_DEBUG(
1678  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1679  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1680  << ":" << RPDelta.Excess.getUnitInc());
1681  NS.setExceedPressure(SU);
1682  break;
1683  }
1684  RecRPTracker.recede();
1685  }
1686  }
1687 }
1688 
1689 /// A heuristic to colocate node sets that have the same set of
1690 /// successors.
1691 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1692  unsigned Colocate = 0;
1693  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1694  NodeSet &N1 = NodeSets[i];
1696  if (N1.empty() || !succ_L(N1, S1))
1697  continue;
1698  for (int j = i + 1; j < e; ++j) {
1699  NodeSet &N2 = NodeSets[j];
1700  if (N1.compareRecMII(N2) != 0)
1701  continue;
1703  if (N2.empty() || !succ_L(N2, S2))
1704  continue;
1705  if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
1706  N1.setColocate(++Colocate);
1707  N2.setColocate(Colocate);
1708  break;
1709  }
1710  }
1711  }
1712 }
1713 
1714 /// Check if the existing node-sets are profitable. If not, then ignore the
1715 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1716 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1717 /// then it's best to try to schedule all instructions together instead of
1718 /// starting with the recurrent node-sets.
1719 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1720  // Look for loops with a large MII.
1721  if (MII < 17)
1722  return;
1723  // Check if the node-set contains only a simple add recurrence.
1724  for (auto &NS : NodeSets) {
1725  if (NS.getRecMII() > 2)
1726  return;
1727  if (NS.getMaxDepth() > MII)
1728  return;
1729  }
1730  NodeSets.clear();
1731  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1732 }
1733 
1734 /// Add the nodes that do not belong to a recurrence set into groups
1735 /// based upon connected componenets.
1736 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1737  SetVector<SUnit *> NodesAdded;
1738  SmallPtrSet<SUnit *, 8> Visited;
1739  // Add the nodes that are on a path between the previous node sets and
1740  // the current node set.
1741  for (NodeSet &I : NodeSets) {
1743  // Add the nodes from the current node set to the previous node set.
1744  if (succ_L(I, N)) {
1745  SetVector<SUnit *> Path;
1746  for (SUnit *NI : N) {
1747  Visited.clear();
1748  computePath(NI, Path, NodesAdded, I, Visited);
1749  }
1750  if (!Path.empty())
1751  I.insert(Path.begin(), Path.end());
1752  }
1753  // Add the nodes from the previous node set to the current node set.
1754  N.clear();
1755  if (succ_L(NodesAdded, N)) {
1756  SetVector<SUnit *> Path;
1757  for (SUnit *NI : N) {
1758  Visited.clear();
1759  computePath(NI, Path, I, NodesAdded, Visited);
1760  }
1761  if (!Path.empty())
1762  I.insert(Path.begin(), Path.end());
1763  }
1764  NodesAdded.insert(I.begin(), I.end());
1765  }
1766 
1767  // Create a new node set with the connected nodes of any successor of a node
1768  // in a recurrent set.
1769  NodeSet NewSet;
1771  if (succ_L(NodesAdded, N))
1772  for (SUnit *I : N)
1773  addConnectedNodes(I, NewSet, NodesAdded);
1774  if (!NewSet.empty())
1775  NodeSets.push_back(NewSet);
1776 
1777  // Create a new node set with the connected nodes of any predecessor of a node
1778  // in a recurrent set.
1779  NewSet.clear();
1780  if (pred_L(NodesAdded, N))
1781  for (SUnit *I : N)
1782  addConnectedNodes(I, NewSet, NodesAdded);
1783  if (!NewSet.empty())
1784  NodeSets.push_back(NewSet);
1785 
1786  // Create new nodes sets with the connected nodes any remaining node that
1787  // has no predecessor.
1788  for (SUnit &SU : SUnits) {
1789  if (NodesAdded.count(&SU) == 0) {
1790  NewSet.clear();
1791  addConnectedNodes(&SU, NewSet, NodesAdded);
1792  if (!NewSet.empty())
1793  NodeSets.push_back(NewSet);
1794  }
1795  }
1796 }
1797 
1798 /// Add the node to the set, and add all of its connected nodes to the set.
1799 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1800  SetVector<SUnit *> &NodesAdded) {
1801  NewSet.insert(SU);
1802  NodesAdded.insert(SU);
1803  for (auto &SI : SU->Succs) {
1804  SUnit *Successor = SI.getSUnit();
1805  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1806  addConnectedNodes(Successor, NewSet, NodesAdded);
1807  }
1808  for (auto &PI : SU->Preds) {
1809  SUnit *Predecessor = PI.getSUnit();
1810  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1811  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1812  }
1813 }
1814 
1815 /// Return true if Set1 contains elements in Set2. The elements in common
1816 /// are returned in a different container.
1817 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1818  SmallSetVector<SUnit *, 8> &Result) {
1819  Result.clear();
1820  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1821  SUnit *SU = Set1[i];
1822  if (Set2.count(SU) != 0)
1823  Result.insert(SU);
1824  }
1825  return !Result.empty();
1826 }
1827 
1828 /// Merge the recurrence node sets that have the same initial node.
1829 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1830  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1831  ++I) {
1832  NodeSet &NI = *I;
1833  for (NodeSetType::iterator J = I + 1; J != E;) {
1834  NodeSet &NJ = *J;
1835  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1836  if (NJ.compareRecMII(NI) > 0)
1837  NI.setRecMII(NJ.getRecMII());
1838  for (SUnit *SU : *J)
1839  I->insert(SU);
1840  NodeSets.erase(J);
1841  E = NodeSets.end();
1842  } else {
1843  ++J;
1844  }
1845  }
1846  }
1847 }
1848 
1849 /// Remove nodes that have been scheduled in previous NodeSets.
1850 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1851  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1852  ++I)
1853  for (NodeSetType::iterator J = I + 1; J != E;) {
1854  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1855 
1856  if (J->empty()) {
1857  NodeSets.erase(J);
1858  E = NodeSets.end();
1859  } else {
1860  ++J;
1861  }
1862  }
1863 }
1864 
1865 /// Compute an ordered list of the dependence graph nodes, which
1866 /// indicates the order that the nodes will be scheduled. This is a
1867 /// two-level algorithm. First, a partial order is created, which
1868 /// consists of a list of sets ordered from highest to lowest priority.
1869 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1871  NodeOrder.clear();
1872 
1873  for (auto &Nodes : NodeSets) {
1874  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1875  OrderKind Order;
1877  if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1878  R.insert(N.begin(), N.end());
1879  Order = BottomUp;
1880  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1881  } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1882  R.insert(N.begin(), N.end());
1883  Order = TopDown;
1884  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1885  } else if (isIntersect(N, Nodes, R)) {
1886  // If some of the successors are in the existing node-set, then use the
1887  // top-down ordering.
1888  Order = TopDown;
1889  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1890  } else if (NodeSets.size() == 1) {
1891  for (auto &N : Nodes)
1892  if (N->Succs.size() == 0)
1893  R.insert(N);
1894  Order = BottomUp;
1895  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1896  } else {
1897  // Find the node with the highest ASAP.
1898  SUnit *maxASAP = nullptr;
1899  for (SUnit *SU : Nodes) {
1900  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1901  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1902  maxASAP = SU;
1903  }
1904  R.insert(maxASAP);
1905  Order = BottomUp;
1906  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1907  }
1908 
1909  while (!R.empty()) {
1910  if (Order == TopDown) {
1911  // Choose the node with the maximum height. If more than one, choose
1912  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1913  // choose the node with the lowest MOV.
1914  while (!R.empty()) {
1915  SUnit *maxHeight = nullptr;
1916  for (SUnit *I : R) {
1917  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1918  maxHeight = I;
1919  else if (getHeight(I) == getHeight(maxHeight) &&
1921  maxHeight = I;
1922  else if (getHeight(I) == getHeight(maxHeight) &&
1924  getZeroLatencyHeight(maxHeight) &&
1925  getMOV(I) < getMOV(maxHeight))
1926  maxHeight = I;
1927  }
1928  NodeOrder.insert(maxHeight);
1929  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1930  R.remove(maxHeight);
1931  for (const auto &I : maxHeight->Succs) {
1932  if (Nodes.count(I.getSUnit()) == 0)
1933  continue;
1934  if (NodeOrder.contains(I.getSUnit()))
1935  continue;
1936  if (ignoreDependence(I, false))
1937  continue;
1938  R.insert(I.getSUnit());
1939  }
1940  // Back-edges are predecessors with an anti-dependence.
1941  for (const auto &I : maxHeight->Preds) {
1942  if (I.getKind() != SDep::Anti)
1943  continue;
1944  if (Nodes.count(I.getSUnit()) == 0)
1945  continue;
1946  if (NodeOrder.contains(I.getSUnit()))
1947  continue;
1948  R.insert(I.getSUnit());
1949  }
1950  }
1951  Order = BottomUp;
1952  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1954  if (pred_L(NodeOrder, N, &Nodes))
1955  R.insert(N.begin(), N.end());
1956  } else {
1957  // Choose the node with the maximum depth. If more than one, choose
1958  // the node with the maximum ZeroLatencyDepth. If still more than one,
1959  // choose the node with the lowest MOV.
1960  while (!R.empty()) {
1961  SUnit *maxDepth = nullptr;
1962  for (SUnit *I : R) {
1963  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1964  maxDepth = I;
1965  else if (getDepth(I) == getDepth(maxDepth) &&
1967  maxDepth = I;
1968  else if (getDepth(I) == getDepth(maxDepth) &&
1969  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1970  getMOV(I) < getMOV(maxDepth))
1971  maxDepth = I;
1972  }
1973  NodeOrder.insert(maxDepth);
1974  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1975  R.remove(maxDepth);
1976  if (Nodes.isExceedSU(maxDepth)) {
1977  Order = TopDown;
1978  R.clear();
1979  R.insert(Nodes.getNode(0));
1980  break;
1981  }
1982  for (const auto &I : maxDepth->Preds) {
1983  if (Nodes.count(I.getSUnit()) == 0)
1984  continue;
1985  if (NodeOrder.contains(I.getSUnit()))
1986  continue;
1987  R.insert(I.getSUnit());
1988  }
1989  // Back-edges are predecessors with an anti-dependence.
1990  for (const auto &I : maxDepth->Succs) {
1991  if (I.getKind() != SDep::Anti)
1992  continue;
1993  if (Nodes.count(I.getSUnit()) == 0)
1994  continue;
1995  if (NodeOrder.contains(I.getSUnit()))
1996  continue;
1997  R.insert(I.getSUnit());
1998  }
1999  }
2000  Order = TopDown;
2001  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2003  if (succ_L(NodeOrder, N, &Nodes))
2004  R.insert(N.begin(), N.end());
2005  }
2006  }
2007  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2008  }
2009 
2010  LLVM_DEBUG({
2011  dbgs() << "Node order: ";
2012  for (SUnit *I : NodeOrder)
2013  dbgs() << " " << I->NodeNum << " ";
2014  dbgs() << "\n";
2015  });
2016 }
2017 
2018 /// Process the nodes in the computed order and create the pipelined schedule
2019 /// of the instructions, if possible. Return true if a schedule is found.
2020 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2021 
2022  if (NodeOrder.empty()){
2023  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2024  return false;
2025  }
2026 
2027  bool scheduleFound = false;
2028  // Keep increasing II until a valid schedule is found.
2029  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2030  Schedule.reset();
2031  Schedule.setInitiationInterval(II);
2032  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2033 
2034  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2035  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2036  do {
2037  SUnit *SU = *NI;
2038 
2039  // Compute the schedule time for the instruction, which is based
2040  // upon the scheduled time for any predecessors/successors.
2041  int EarlyStart = INT_MIN;
2042  int LateStart = INT_MAX;
2043  // These values are set when the size of the schedule window is limited
2044  // due to chain dependences.
2045  int SchedEnd = INT_MAX;
2046  int SchedStart = INT_MIN;
2047  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2048  II, this);
2049  LLVM_DEBUG({
2050  dbgs() << "\n";
2051  dbgs() << "Inst (" << SU->NodeNum << ") ";
2052  SU->getInstr()->dump();
2053  dbgs() << "\n";
2054  });
2055  LLVM_DEBUG({
2056  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2057  LateStart, SchedEnd, SchedStart);
2058  });
2059 
2060  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2061  SchedStart > LateStart)
2062  scheduleFound = false;
2063  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2064  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2065  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2066  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2067  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2068  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2069  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2070  SchedEnd =
2071  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2072  // When scheduling a Phi it is better to start at the late cycle and go
2073  // backwards. The default order may insert the Phi too far away from
2074  // its first dependence.
2075  if (SU->getInstr()->isPHI())
2076  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2077  else
2078  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2079  } else {
2080  int FirstCycle = Schedule.getFirstCycle();
2081  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2082  FirstCycle + getASAP(SU) + II - 1, II);
2083  }
2084  // Even if we find a schedule, make sure the schedule doesn't exceed the
2085  // allowable number of stages. We keep trying if this happens.
2086  if (scheduleFound)
2087  if (SwpMaxStages > -1 &&
2088  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2089  scheduleFound = false;
2090 
2091  LLVM_DEBUG({
2092  if (!scheduleFound)
2093  dbgs() << "\tCan't schedule\n";
2094  });
2095  } while (++NI != NE && scheduleFound);
2096 
2097  // If a schedule is found, check if it is a valid schedule too.
2098  if (scheduleFound)
2099  scheduleFound = Schedule.isValidSchedule(this);
2100  }
2101 
2102  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2103  << " (II=" << Schedule.getInitiationInterval()
2104  << ")\n");
2105 
2106  if (scheduleFound) {
2107  Schedule.finalizeSchedule(this);
2108  Pass.ORE->emit([&]() {
2110  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2111  << "Schedule found with Initiation Interval: "
2112  << ore::NV("II", Schedule.getInitiationInterval())
2113  << ", MaxStageCount: "
2114  << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
2115  });
2116  } else
2117  Schedule.reset();
2118 
2119  return scheduleFound && Schedule.getMaxStageCount() > 0;
2120 }
2121 
2122 /// Return true if we can compute the amount the instruction changes
2123 /// during each iteration. Set Delta to the amount of the change.
2124 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2126  const MachineOperand *BaseOp;
2127  int64_t Offset;
2128  bool OffsetIsScalable;
2129  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2130  return false;
2131 
2132  // FIXME: This algorithm assumes instructions have fixed-size offsets.
2133  if (OffsetIsScalable)
2134  return false;
2135 
2136  if (!BaseOp->isReg())
2137  return false;
2138 
2139  Register BaseReg = BaseOp->getReg();
2140 
2142  // Check if there is a Phi. If so, get the definition in the loop.
2143  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2144  if (BaseDef && BaseDef->isPHI()) {
2145  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2146  BaseDef = MRI.getVRegDef(BaseReg);
2147  }
2148  if (!BaseDef)
2149  return false;
2150 
2151  int D = 0;
2152  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2153  return false;
2154 
2155  Delta = D;
2156  return true;
2157 }
2158 
2159 /// Check if we can change the instruction to use an offset value from the
2160 /// previous iteration. If so, return true and set the base and offset values
2161 /// so that we can rewrite the load, if necessary.
2162 /// v1 = Phi(v0, v3)
2163 /// v2 = load v1, 0
2164 /// v3 = post_store v1, 4, x
2165 /// This function enables the load to be rewritten as v2 = load v3, 4.
2166 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2167  unsigned &BasePos,
2168  unsigned &OffsetPos,
2169  unsigned &NewBase,
2170  int64_t &Offset) {
2171  // Get the load instruction.
2172  if (TII->isPostIncrement(*MI))
2173  return false;
2174  unsigned BasePosLd, OffsetPosLd;
2175  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2176  return false;
2177  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2178 
2179  // Look for the Phi instruction.
2180  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2181  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2182  if (!Phi || !Phi->isPHI())
2183  return false;
2184  // Get the register defined in the loop block.
2185  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2186  if (!PrevReg)
2187  return false;
2188 
2189  // Check for the post-increment load/store instruction.
2190  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2191  if (!PrevDef || PrevDef == MI)
2192  return false;
2193 
2194  if (!TII->isPostIncrement(*PrevDef))
2195  return false;
2196 
2197  unsigned BasePos1 = 0, OffsetPos1 = 0;
2198  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2199  return false;
2200 
2201  // Make sure that the instructions do not access the same memory location in
2202  // the next iteration.
2203  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2204  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2205  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2206  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2207  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2208  MF.DeleteMachineInstr(NewMI);
2209  if (!Disjoint)
2210  return false;
2211 
2212  // Set the return value once we determine that we return true.
2213  BasePos = BasePosLd;
2214  OffsetPos = OffsetPosLd;
2215  NewBase = PrevReg;
2216  Offset = StoreOffset;
2217  return true;
2218 }
2219 
2220 /// Apply changes to the instruction if needed. The changes are need
2221 /// to improve the scheduling and depend up on the final schedule.
2223  SMSchedule &Schedule) {
2224  SUnit *SU = getSUnit(MI);
2226  InstrChanges.find(SU);
2227  if (It != InstrChanges.end()) {
2228  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2229  unsigned BasePos, OffsetPos;
2230  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2231  return;
2232  Register BaseReg = MI->getOperand(BasePos).getReg();
2233  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2234  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2235  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2236  int BaseStageNum = Schedule.stageScheduled(SU);
2237  int BaseCycleNum = Schedule.cycleScheduled(SU);
2238  if (BaseStageNum < DefStageNum) {
2239  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2240  int OffsetDiff = DefStageNum - BaseStageNum;
2241  if (DefCycleNum < BaseCycleNum) {
2242  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2243  if (OffsetDiff > 0)
2244  --OffsetDiff;
2245  }
2246  int64_t NewOffset =
2247  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2248  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2249  SU->setInstr(NewMI);
2250  MISUnitMap[NewMI] = SU;
2251  NewMIs[MI] = NewMI;
2252  }
2253  }
2254 }
2255 
2256 /// Return the instruction in the loop that defines the register.
2257 /// If the definition is a Phi, then follow the Phi operand to
2258 /// the instruction in the loop.
2259 MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2262  while (Def->isPHI()) {
2263  if (!Visited.insert(Def).second)
2264  break;
2265  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2266  if (Def->getOperand(i + 1).getMBB() == BB) {
2267  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2268  break;
2269  }
2270  }
2271  return Def;
2272 }
2273 
2274 /// Return true for an order or output dependence that is loop carried
2275 /// potentially. A dependence is loop carried if the destination defines a valu
2276 /// that may be used or defined by the source in a subsequent iteration.
2278  bool isSucc) {
2279  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2280  Dep.isArtificial())
2281  return false;
2282 
2283  if (!SwpPruneLoopCarried)
2284  return true;
2285 
2286  if (Dep.getKind() == SDep::Output)
2287  return true;
2288 
2289  MachineInstr *SI = Source->getInstr();
2290  MachineInstr *DI = Dep.getSUnit()->getInstr();
2291  if (!isSucc)
2292  std::swap(SI, DI);
2293  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2294 
2295  // Assume ordered loads and stores may have a loop carried dependence.
2296  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2297  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2298  SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2299  return true;
2300 
2301  // Only chain dependences between a load and store can be loop carried.
2302  if (!DI->mayStore() || !SI->mayLoad())
2303  return false;
2304 
2305  unsigned DeltaS, DeltaD;
2306  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2307  return true;
2308 
2309  const MachineOperand *BaseOpS, *BaseOpD;
2310  int64_t OffsetS, OffsetD;
2311  bool OffsetSIsScalable, OffsetDIsScalable;
2313  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2314  TRI) ||
2315  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2316  TRI))
2317  return true;
2318 
2319  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2320  "Expected offsets to be byte offsets");
2321 
2322  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2323  return true;
2324 
2325  // Check that the base register is incremented by a constant value for each
2326  // iteration.
2327  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2328  if (!Def || !Def->isPHI())
2329  return true;
2330  unsigned InitVal = 0;
2331  unsigned LoopVal = 0;
2332  getPhiRegs(*Def, BB, InitVal, LoopVal);
2333  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2334  int D = 0;
2335  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2336  return true;
2337 
2338  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2339  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2340 
2341  // This is the main test, which checks the offset values and the loop
2342  // increment value to determine if the accesses may be loop carried.
2343  if (AccessSizeS == MemoryLocation::UnknownSize ||
2344  AccessSizeD == MemoryLocation::UnknownSize)
2345  return true;
2346 
2347  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2348  return true;
2349 
2350  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2351 }
2352 
2353 void SwingSchedulerDAG::postprocessDAG() {
2354  for (auto &M : Mutations)
2355  M->apply(this);
2356 }
2357 
2358 /// Try to schedule the node at the specified StartCycle and continue
2359 /// until the node is schedule or the EndCycle is reached. This function
2360 /// returns true if the node is scheduled. This routine may search either
2361 /// forward or backward for a place to insert the instruction based upon
2362 /// the relative values of StartCycle and EndCycle.
2363 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2364  bool forward = true;
2365  LLVM_DEBUG({
2366  dbgs() << "Trying to insert node between " << StartCycle << " and "
2367  << EndCycle << " II: " << II << "\n";
2368  });
2369  if (StartCycle > EndCycle)
2370  forward = false;
2371 
2372  // The terminating condition depends on the direction.
2373  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2374  for (int curCycle = StartCycle; curCycle != termCycle;
2375  forward ? ++curCycle : --curCycle) {
2376 
2377  // Add the already scheduled instructions at the specified cycle to the
2378  // DFA.
2379  ProcItinResources.clearResources();
2380  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2381  checkCycle <= LastCycle; checkCycle += II) {
2382  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2383 
2384  for (SUnit *CI : cycleInstrs) {
2385  if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
2386  continue;
2387  assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
2388  "These instructions have already been scheduled.");
2389  ProcItinResources.reserveResources(*CI->getInstr());
2390  }
2391  }
2392  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2393  ProcItinResources.canReserveResources(*SU->getInstr())) {
2394  LLVM_DEBUG({
2395  dbgs() << "\tinsert at cycle " << curCycle << " ";
2396  SU->getInstr()->dump();
2397  });
2398 
2399  ScheduledInstrs[curCycle].push_back(SU);
2400  InstrToCycle.insert(std::make_pair(SU, curCycle));
2401  if (curCycle > LastCycle)
2402  LastCycle = curCycle;
2403  if (curCycle < FirstCycle)
2404  FirstCycle = curCycle;
2405  return true;
2406  }
2407  LLVM_DEBUG({
2408  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2409  SU->getInstr()->dump();
2410  });
2411  }
2412  return false;
2413 }
2414 
2415 // Return the cycle of the earliest scheduled instruction in the chain.
2417  SmallPtrSet<SUnit *, 8> Visited;
2418  SmallVector<SDep, 8> Worklist;
2419  Worklist.push_back(Dep);
2420  int EarlyCycle = INT_MAX;
2421  while (!Worklist.empty()) {
2422  const SDep &Cur = Worklist.pop_back_val();
2423  SUnit *PrevSU = Cur.getSUnit();
2424  if (Visited.count(PrevSU))
2425  continue;
2426  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2427  if (it == InstrToCycle.end())
2428  continue;
2429  EarlyCycle = std::min(EarlyCycle, it->second);
2430  for (const auto &PI : PrevSU->Preds)
2431  if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
2432  Worklist.push_back(PI);
2433  Visited.insert(PrevSU);
2434  }
2435  return EarlyCycle;
2436 }
2437 
2438 // Return the cycle of the latest scheduled instruction in the chain.
2440  SmallPtrSet<SUnit *, 8> Visited;
2441  SmallVector<SDep, 8> Worklist;
2442  Worklist.push_back(Dep);
2443  int LateCycle = INT_MIN;
2444  while (!Worklist.empty()) {
2445  const SDep &Cur = Worklist.pop_back_val();
2446  SUnit *SuccSU = Cur.getSUnit();
2447  if (Visited.count(SuccSU))
2448  continue;
2449  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2450  if (it == InstrToCycle.end())
2451  continue;
2452  LateCycle = std::max(LateCycle, it->second);
2453  for (const auto &SI : SuccSU->Succs)
2454  if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
2455  Worklist.push_back(SI);
2456  Visited.insert(SuccSU);
2457  }
2458  return LateCycle;
2459 }
2460 
2461 /// If an instruction has a use that spans multiple iterations, then
2462 /// return true. These instructions are characterized by having a back-ege
2463 /// to a Phi, which contains a reference to another Phi.
2465  for (auto &P : SU->Preds)
2466  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2467  for (auto &S : P.getSUnit()->Succs)
2468  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2469  return P.getSUnit();
2470  return nullptr;
2471 }
2472 
2473 /// Compute the scheduling start slot for the instruction. The start slot
2474 /// depends on any predecessor or successor nodes scheduled already.
2475 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2476  int *MinEnd, int *MaxStart, int II,
2477  SwingSchedulerDAG *DAG) {
2478  // Iterate over each instruction that has been scheduled already. The start
2479  // slot computation depends on whether the previously scheduled instruction
2480  // is a predecessor or successor of the specified instruction.
2481  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2482 
2483  // Iterate over each instruction in the current cycle.
2484  for (SUnit *I : getInstructions(cycle)) {
2485  // Because we're processing a DAG for the dependences, we recognize
2486  // the back-edge in recurrences by anti dependences.
2487  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2488  const SDep &Dep = SU->Preds[i];
2489  if (Dep.getSUnit() == I) {
2490  if (!DAG->isBackedge(SU, Dep)) {
2491  int EarlyStart = cycle + Dep.getLatency() -
2492  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2493  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2494  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2495  int End = earliestCycleInChain(Dep) + (II - 1);
2496  *MinEnd = std::min(*MinEnd, End);
2497  }
2498  } else {
2499  int LateStart = cycle - Dep.getLatency() +
2500  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2501  *MinLateStart = std::min(*MinLateStart, LateStart);
2502  }
2503  }
2504  // For instruction that requires multiple iterations, make sure that
2505  // the dependent instruction is not scheduled past the definition.
2506  SUnit *BE = multipleIterations(I, DAG);
2507  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2508  !SU->isPred(I))
2509  *MinLateStart = std::min(*MinLateStart, cycle);
2510  }
2511  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2512  if (SU->Succs[i].getSUnit() == I) {
2513  const SDep &Dep = SU->Succs[i];
2514  if (!DAG->isBackedge(SU, Dep)) {
2515  int LateStart = cycle - Dep.getLatency() +
2516  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2517  *MinLateStart = std::min(*MinLateStart, LateStart);
2518  if (DAG->isLoopCarriedDep(SU, Dep)) {
2519  int Start = latestCycleInChain(Dep) + 1 - II;
2520  *MaxStart = std::max(*MaxStart, Start);
2521  }
2522  } else {
2523  int EarlyStart = cycle + Dep.getLatency() -
2524  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2525  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2526  }
2527  }
2528  }
2529  }
2530  }
2531 }
2532 
2533 /// Order the instructions within a cycle so that the definitions occur
2534 /// before the uses. Returns true if the instruction is added to the start
2535 /// of the list, or false if added to the end.
2537  std::deque<SUnit *> &Insts) {
2538  MachineInstr *MI = SU->getInstr();
2539  bool OrderBeforeUse = false;
2540  bool OrderAfterDef = false;
2541  bool OrderBeforeDef = false;
2542  unsigned MoveDef = 0;
2543  unsigned MoveUse = 0;
2544  int StageInst1 = stageScheduled(SU);
2545 
2546  unsigned Pos = 0;
2547  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2548  ++I, ++Pos) {
2549  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2550  MachineOperand &MO = MI->getOperand(i);
2551  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2552  continue;
2553 
2554  Register Reg = MO.getReg();
2555  unsigned BasePos, OffsetPos;
2556  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2557  if (MI->getOperand(BasePos).getReg() == Reg)
2558  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2559  Reg = NewReg;
2560  bool Reads, Writes;
2561  std::tie(Reads, Writes) =
2562  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2563  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2564  OrderBeforeUse = true;
2565  if (MoveUse == 0)
2566  MoveUse = Pos;
2567  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2568  // Add the instruction after the scheduled instruction.
2569  OrderAfterDef = true;
2570  MoveDef = Pos;
2571  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2572  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2573  OrderBeforeUse = true;
2574  if (MoveUse == 0)
2575  MoveUse = Pos;
2576  } else {
2577  OrderAfterDef = true;
2578  MoveDef = Pos;
2579  }
2580  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2581  OrderBeforeUse = true;
2582  if (MoveUse == 0)
2583  MoveUse = Pos;
2584  if (MoveUse != 0) {
2585  OrderAfterDef = true;
2586  MoveDef = Pos - 1;
2587  }
2588  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2589  // Add the instruction before the scheduled instruction.
2590  OrderBeforeUse = true;
2591  if (MoveUse == 0)
2592  MoveUse = Pos;
2593  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2594  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2595  if (MoveUse == 0) {
2596  OrderBeforeDef = true;
2597  MoveUse = Pos;
2598  }
2599  }
2600  }
2601  // Check for order dependences between instructions. Make sure the source
2602  // is ordered before the destination.
2603  for (auto &S : SU->Succs) {
2604  if (S.getSUnit() != *I)
2605  continue;
2606  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2607  OrderBeforeUse = true;
2608  if (Pos < MoveUse)
2609  MoveUse = Pos;
2610  }
2611  // We did not handle HW dependences in previous for loop,
2612  // and we normally set Latency = 0 for Anti deps,
2613  // so may have nodes in same cycle with Anti denpendent on HW regs.
2614  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2615  OrderBeforeUse = true;
2616  if ((MoveUse == 0) || (Pos < MoveUse))
2617  MoveUse = Pos;
2618  }
2619  }
2620  for (auto &P : SU->Preds) {
2621  if (P.getSUnit() != *I)
2622  continue;
2623  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2624  OrderAfterDef = true;
2625  MoveDef = Pos;
2626  }
2627  }
2628  }
2629 
2630  // A circular dependence.
2631  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2632  OrderBeforeUse = false;
2633 
2634  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2635  // to a loop-carried dependence.
2636  if (OrderBeforeDef)
2637  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2638 
2639  // The uncommon case when the instruction order needs to be updated because
2640  // there is both a use and def.
2641  if (OrderBeforeUse && OrderAfterDef) {
2642  SUnit *UseSU = Insts.at(MoveUse);
2643  SUnit *DefSU = Insts.at(MoveDef);
2644  if (MoveUse > MoveDef) {
2645  Insts.erase(Insts.begin() + MoveUse);
2646  Insts.erase(Insts.begin() + MoveDef);
2647  } else {
2648  Insts.erase(Insts.begin() + MoveDef);
2649  Insts.erase(Insts.begin() + MoveUse);
2650  }
2651  orderDependence(SSD, UseSU, Insts);
2652  orderDependence(SSD, SU, Insts);
2653  orderDependence(SSD, DefSU, Insts);
2654  return;
2655  }
2656  // Put the new instruction first if there is a use in the list. Otherwise,
2657  // put it at the end of the list.
2658  if (OrderBeforeUse)
2659  Insts.push_front(SU);
2660  else
2661  Insts.push_back(SU);
2662 }
2663 
2664 /// Return true if the scheduled Phi has a loop carried operand.
2666  if (!Phi.isPHI())
2667  return false;
2668  assert(Phi.isPHI() && "Expecting a Phi.");
2669  SUnit *DefSU = SSD->getSUnit(&Phi);
2670  unsigned DefCycle = cycleScheduled(DefSU);
2671  int DefStage = stageScheduled(DefSU);
2672 
2673  unsigned InitVal = 0;
2674  unsigned LoopVal = 0;
2675  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2676  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2677  if (!UseSU)
2678  return true;
2679  if (UseSU->getInstr()->isPHI())
2680  return true;
2681  unsigned LoopCycle = cycleScheduled(UseSU);
2682  int LoopStage = stageScheduled(UseSU);
2683  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2684 }
2685 
2686 /// Return true if the instruction is a definition that is loop carried
2687 /// and defines the use on the next iteration.
2688 /// v1 = phi(v2, v3)
2689 /// (Def) v3 = op v1
2690 /// (MO) = v1
2691 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2692 /// register.
2695  if (!MO.isReg())
2696  return false;
2697  if (Def->isPHI())
2698  return false;
2699  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2700  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2701  return false;
2702  if (!isLoopCarried(SSD, *Phi))
2703  return false;
2704  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2705  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2706  MachineOperand &DMO = Def->getOperand(i);
2707  if (!DMO.isReg() || !DMO.isDef())
2708  continue;
2709  if (DMO.getReg() == LoopReg)
2710  return true;
2711  }
2712  return false;
2713 }
2714 
2715 // Check if the generated schedule is valid. This function checks if
2716 // an instruction that uses a physical register is scheduled in a
2717 // different stage than the definition. The pipeliner does not handle
2718 // physical register values that may cross a basic block boundary.
2720  for (SUnit &SU : SSD->SUnits) {
2721  if (!SU.hasPhysRegDefs)
2722  continue;
2723  int StageDef = stageScheduled(&SU);
2724  assert(StageDef != -1 && "Instruction should have been scheduled.");
2725  for (auto &SI : SU.Succs)
2726  if (SI.isAssignedRegDep())
2727  if (Register::isPhysicalRegister(SI.getReg()))
2728  if (stageScheduled(SI.getSUnit()) != StageDef)
2729  return false;
2730  }
2731  return true;
2732 }
2733 
2734 /// A property of the node order in swing-modulo-scheduling is
2735 /// that for nodes outside circuits the following holds:
2736 /// none of them is scheduled after both a successor and a
2737 /// predecessor.
2738 /// The method below checks whether the property is met.
2739 /// If not, debug information is printed and statistics information updated.
2740 /// Note that we do not use an assert statement.
2741 /// The reason is that although an invalid node oder may prevent
2742 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2743 /// it does not lead to the generation of incorrect code.
2744 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2745 
2746  // a sorted vector that maps each SUnit to its index in the NodeOrder
2747  typedef std::pair<SUnit *, unsigned> UnitIndex;
2748  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2749 
2750  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2751  Indices.push_back(std::make_pair(NodeOrder[i], i));
2752 
2753  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2754  return std::get<0>(i1) < std::get<0>(i2);
2755  };
2756 
2757  // sort, so that we can perform a binary search
2758  llvm::sort(Indices, CompareKey);
2759 
2760  bool Valid = true;
2761  (void)Valid;
2762  // for each SUnit in the NodeOrder, check whether
2763  // it appears after both a successor and a predecessor
2764  // of the SUnit. If this is the case, and the SUnit
2765  // is not part of circuit, then the NodeOrder is not
2766  // valid.
2767  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2768  SUnit *SU = NodeOrder[i];
2769  unsigned Index = i;
2770 
2771  bool PredBefore = false;
2772  bool SuccBefore = false;
2773 
2774  SUnit *Succ;
2775  SUnit *Pred;
2776  (void)Succ;
2777  (void)Pred;
2778 
2779  for (SDep &PredEdge : SU->Preds) {
2780  SUnit *PredSU = PredEdge.getSUnit();
2781  unsigned PredIndex = std::get<1>(
2782  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2783  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2784  PredBefore = true;
2785  Pred = PredSU;
2786  break;
2787  }
2788  }
2789 
2790  for (SDep &SuccEdge : SU->Succs) {
2791  SUnit *SuccSU = SuccEdge.getSUnit();
2792  // Do not process a boundary node, it was not included in NodeOrder,
2793  // hence not in Indices either, call to std::lower_bound() below will
2794  // return Indices.end().
2795  if (SuccSU->isBoundaryNode())
2796  continue;
2797  unsigned SuccIndex = std::get<1>(
2798  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2799  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2800  SuccBefore = true;
2801  Succ = SuccSU;
2802  break;
2803  }
2804  }
2805 
2806  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2807  // instructions in circuits are allowed to be scheduled
2808  // after both a successor and predecessor.
2809  bool InCircuit = llvm::any_of(
2810  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2811  if (InCircuit)
2812  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2813  else {
2814  Valid = false;
2815  NumNodeOrderIssues++;
2816  LLVM_DEBUG(dbgs() << "Predecessor ";);
2817  }
2818  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2819  << " are scheduled before node " << SU->NodeNum
2820  << "\n";);
2821  }
2822  }
2823 
2824  LLVM_DEBUG({
2825  if (!Valid)
2826  dbgs() << "Invalid node order found!\n";
2827  });
2828 }
2829 
2830 /// Attempt to fix the degenerate cases when the instruction serialization
2831 /// causes the register lifetimes to overlap. For example,
2832 /// p' = store_pi(p, b)
2833 /// = load p, offset
2834 /// In this case p and p' overlap, which means that two registers are needed.
2835 /// Instead, this function changes the load to use p' and updates the offset.
2836 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2837  unsigned OverlapReg = 0;
2838  unsigned NewBaseReg = 0;
2839  for (SUnit *SU : Instrs) {
2840  MachineInstr *MI = SU->getInstr();
2841  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2842  const MachineOperand &MO = MI->getOperand(i);
2843  // Look for an instruction that uses p. The instruction occurs in the
2844  // same cycle but occurs later in the serialized order.
2845  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2846  // Check that the instruction appears in the InstrChanges structure,
2847  // which contains instructions that can have the offset updated.
2849  InstrChanges.find(SU);
2850  if (It != InstrChanges.end()) {
2851  unsigned BasePos, OffsetPos;
2852  // Update the base register and adjust the offset.
2853  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2854  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2855  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2856  int64_t NewOffset =
2857  MI->getOperand(OffsetPos).getImm() - It->second.second;
2858  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2859  SU->setInstr(NewMI);
2860  MISUnitMap[NewMI] = SU;
2861  NewMIs[MI] = NewMI;
2862  }
2863  }
2864  OverlapReg = 0;
2865  NewBaseReg = 0;
2866  break;
2867  }
2868  // Look for an instruction of the form p' = op(p), which uses and defines
2869  // two virtual registers that get allocated to the same physical register.
2870  unsigned TiedUseIdx = 0;
2871  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2872  // OverlapReg is p in the example above.
2873  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2874  // NewBaseReg is p' in the example above.
2875  NewBaseReg = MI->getOperand(i).getReg();
2876  break;
2877  }
2878  }
2879  }
2880 }
2881 
2882 /// After the schedule has been formed, call this function to combine
2883 /// the instructions from the different stages/cycles. That is, this
2884 /// function creates a schedule that represents a single iteration.
2886  // Move all instructions to the first stage from later stages.
2887  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2888  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2889  ++stage) {
2890  std::deque<SUnit *> &cycleInstrs =
2891  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2892  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
2893  E = cycleInstrs.rend();
2894  I != E; ++I)
2895  ScheduledInstrs[cycle].push_front(*I);
2896  }
2897  }
2898 
2899  // Erase all the elements in the later stages. Only one iteration should
2900  // remain in the scheduled list, and it contains all the instructions.
2901  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2902  ScheduledInstrs.erase(cycle);
2903 
2904  // Change the registers in instruction as specified in the InstrChanges
2905  // map. We need to use the new registers to create the correct order.
2906  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
2907  SUnit *SU = &SSD->SUnits[i];
2908  SSD->applyInstrChange(SU->getInstr(), *this);
2909  }
2910 
2911  // Reorder the instructions in each cycle to fix and improve the
2912  // generated code.
2913  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2914  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2915  std::deque<SUnit *> newOrderPhi;
2916  for (SUnit *SU : cycleInstrs) {
2917  if (SU->getInstr()->isPHI())
2918  newOrderPhi.push_back(SU);
2919  }
2920  std::deque<SUnit *> newOrderI;
2921  for (SUnit *SU : cycleInstrs) {
2922  if (!SU->getInstr()->isPHI())
2923  orderDependence(SSD, SU, newOrderI);
2924  }
2925  // Replace the old order with the new order.
2926  cycleInstrs.swap(newOrderPhi);
2927  llvm::append_range(cycleInstrs, newOrderI);
2928  SSD->fixupRegisterOverlaps(cycleInstrs);
2929  }
2930 
2931  LLVM_DEBUG(dump(););
2932 }
2933 
2934 void NodeSet::print(raw_ostream &os) const {
2935  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2936  << " depth " << MaxDepth << " col " << Colocate << "\n";
2937  for (const auto &I : Nodes)
2938  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2939  os << "\n";
2940 }
2941 
2942 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2943 /// Print the schedule information to the given output.
2945  // Iterate over each cycle.
2946  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2947  // Iterate over each instruction in the cycle.
2948  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2949  for (SUnit *CI : cycleInstrs->second) {
2950  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2951  os << "(" << CI->NodeNum << ") ";
2952  CI->getInstr()->print(os);
2953  os << "\n";
2954  }
2955  }
2956 }
2957 
2958 /// Utility function used for debugging to print the schedule.
2961 
2962 #endif
2963 
2965  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2966  unsigned ProcResourceID = 0;
2967 
2968  // We currently limit the resource kinds to 64 and below so that we can use
2969  // uint64_t for Masks
2970  assert(SM.getNumProcResourceKinds() < 64 &&
2971  "Too many kinds of resources, unsupported");
2972  // Create a unique bitmask for every processor resource unit.
2973  // Skip resource at index 0, since it always references 'InvalidUnit'.
2974  Masks.resize(SM.getNumProcResourceKinds());
2975  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2976  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2977  if (Desc.SubUnitsIdxBegin)
2978  continue;
2979  Masks[I] = 1ULL << ProcResourceID;
2980  ProcResourceID++;
2981  }
2982  // Create a unique bitmask for every processor resource group.
2983  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2984  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2985  if (!Desc.SubUnitsIdxBegin)
2986  continue;
2987  Masks[I] = 1ULL << ProcResourceID;
2988  for (unsigned U = 0; U < Desc.NumUnits; ++U)
2989  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
2990  ProcResourceID++;
2991  }
2992  LLVM_DEBUG({
2993  if (SwpShowResMask) {
2994  dbgs() << "ProcResourceDesc:\n";
2995  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2996  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
2997  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2998  ProcResource->Name, I, Masks[I],
2999  ProcResource->NumUnits);
3000  }
3001  dbgs() << " -----------------\n";
3002  }
3003  });
3004 }
3005 
3007 
3008  LLVM_DEBUG({
3009  if (SwpDebugResource)
3010  dbgs() << "canReserveResources:\n";
3011  });
3012  if (UseDFA)
3013  return DFAResources->canReserveResources(MID);
3014 
3015  unsigned InsnClass = MID->getSchedClass();
3016  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3017  if (!SCDesc->isValid()) {
3018  LLVM_DEBUG({
3019  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3020  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3021  });
3022  return true;
3023  }
3024 
3025  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
3026  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
3027  for (; I != E; ++I) {
3028  if (!I->Cycles)
3029  continue;
3030  const MCProcResourceDesc *ProcResource =
3031  SM.getProcResource(I->ProcResourceIdx);
3032  unsigned NumUnits = ProcResource->NumUnits;
3033  LLVM_DEBUG({
3034  if (SwpDebugResource)
3035  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3036  ProcResource->Name, I->ProcResourceIdx,
3037  ProcResourceCount[I->ProcResourceIdx], NumUnits,
3038  I->Cycles);
3039  });
3040  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
3041  return false;
3042  }
3043  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
3044  return true;
3045 }
3046 
3048  LLVM_DEBUG({
3049  if (SwpDebugResource)
3050  dbgs() << "reserveResources:\n";
3051  });
3052  if (UseDFA)
3053  return DFAResources->reserveResources(MID);
3054 
3055  unsigned InsnClass = MID->getSchedClass();
3056  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3057  if (!SCDesc->isValid()) {
3058  LLVM_DEBUG({
3059  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3060  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3061  });
3062  return;
3063  }
3064  for (const MCWriteProcResEntry &PRE :
3065  make_range(STI->getWriteProcResBegin(SCDesc),
3066  STI->getWriteProcResEnd(SCDesc))) {
3067  if (!PRE.Cycles)
3068  continue;
3069  ++ProcResourceCount[PRE.ProcResourceIdx];
3070  LLVM_DEBUG({
3071  if (SwpDebugResource) {
3072  const MCProcResourceDesc *ProcResource =
3073  SM.getProcResource(PRE.ProcResourceIdx);
3074  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3075  ProcResource->Name, PRE.ProcResourceIdx,
3076  ProcResourceCount[PRE.ProcResourceIdx],
3077  ProcResource->NumUnits, PRE.Cycles);
3078  }
3079  });
3080  }
3081  LLVM_DEBUG({
3082  if (SwpDebugResource)
3083  dbgs() << "reserveResources: done!\n\n";
3084  });
3085 }
3086 
3088  return canReserveResources(&MI.getDesc());
3089 }
3090 
3092  return reserveResources(&MI.getDesc());
3093 }
3094 
3096  if (UseDFA)
3097  return DFAResources->clearResources();
3098  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3099 }
i
i
Definition: README.txt:29
SDAG
QP Compare Ordered outs ins xscmpudp No SDAG
Definition: README_P9.txt:301
llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:106
computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1605
llvm::cl::Option::getPosition
unsigned getPosition() const
Definition: CommandLine.h:305
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:170
EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:1817
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1066
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:491
MathExtras.h
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:71
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:79
llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:229
llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:650
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:2934
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:181
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:2960
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:78
SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MCInstrDesc.h
SetOperations.h
llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:62
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:730
llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:708
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1661
llvm::MCInstrDesc::isPseudo
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:264
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:184
llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:371
llvm::MapVector::clear
void clear()
Definition: MapVector.h:88
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:194
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:724
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:612
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:227
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:677
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
MapVector.h
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:510
llvm::SUnit::const_pred_iterator
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:471
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:269
ValueTracking.h
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:34
llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:68
MachineBasicBlock.h
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:655
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1373
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::DenseMapIterator
Definition: DenseMap.h:56
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1382
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::RegisterMaskPair
Definition: RegisterPressure.h:39
llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2277
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2464
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1430
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:233
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:359
llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2363
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:279
llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:80
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:66
computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1578
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:258
SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:141
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:128
RegisterPressure.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:193
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1376
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:579
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:336
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1118
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:84
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1592
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:56
CommandLine.h
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
MCInstrItineraries.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1320
MachineLoopInfo.h
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:2959
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:250
llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:240
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:236
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:471
llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:217
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::MachineFunction::DeleteMachineInstr
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:393
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:537
DFAPacketizer.h
IP
Definition: NVPTXLowerArgs.cpp:166
llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:534
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:527
llvm::Instruction
Definition: Instruction.h:45
llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:2719
SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:108
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::SMSchedule::orderDependence
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2536
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
BitVector.h
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:708
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:518
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
DebugLoc.h
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:240
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::ScheduleDAGTopologicalSort::const_reverse_iterator
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:778
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:311
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:365
llvm::SMSchedule::isLoopCarried
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:2665
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:96
llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:60
llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:535
llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:752
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ResourceManager::canReserveResources
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
Definition: MachinePipeliner.cpp:3006
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1354
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1112
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2475
llvm::cl::opt< bool >
llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431
llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:528
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4427
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:445
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:147
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:49
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1634
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2187
uint64_t
LiveIntervals.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:530
llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:82
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:358
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:373
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:113
llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:63
I
#define I(x, y, z)
Definition: MD5.cpp:59
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:213
llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:357
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
MCRegisterInfo.h
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:200
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:125
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:193
ArrayRef.h
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:933
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:82
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:50
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:661
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:81
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:2693
isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:685
llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:363
iterator_range.h
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:115
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SUnit::const_succ_iterator
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:584
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
ScheduleDAGMutation.h
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:242
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:224
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:166
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:375
SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:68
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:86
llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1988
llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:218
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1577
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:33
llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1172
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:353
llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:2885
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:158
llvm::ResourceManager::reserveResources
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change.
Definition: MachinePipeliner.cpp:3047
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
llvm::MapVector::end
iterator end()
Definition: MapVector.h:71
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:58
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:239
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1283
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1686
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:973
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1424
llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:2964
llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:220
swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1193
SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:387
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:625
llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:67
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:340
llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1457
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::MachineLoop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: MachineLoopInfo.cpp:101
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1492
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::ResourceManager::clearResources
void clearResources()
Reset the state.
Definition: MachinePipeliner.cpp:3095
llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:438
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1791
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:245
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1520
llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:2944
llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2222
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:69
llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2416
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:677
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:186
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:73
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1164
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::rdf::NodeSet
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
MachineInstrBuilder.h
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:182
succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1550
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:267
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:562
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:78
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
LaneBitmask.h
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:347
llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:361
MachineMemOperand.h
llvm::SmallVectorImpl< const Value * >
MachineOperand.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:345
pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:70
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:159
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
llvm::SUnit::pred_iterator
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachinePipeliner.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:135
llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:579
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:321
llvm::cl::desc
Definition: CommandLine.h:414
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
raw_ostream.h
llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2439
llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:572
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineFunction.h
llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1038
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:2836
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::ResourceManager
Definition: MachinePipeliner.h:434
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:618
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:480
llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:210
TargetRegisterInfo.h
Debug.h
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:563
SetVector.h
llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:488
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
MachineDominators.h
llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:264
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364