LLVM  14.0.0git
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetOperations.h"
38 #include "llvm/ADT/SetVector.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
67 #include "llvm/Config/llvm-config.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/Function.h"
71 #include "llvm/MC/LaneBitmask.h"
72 #include "llvm/MC/MCInstrDesc.h"
74 #include "llvm/MC/MCRegisterInfo.h"
75 #include "llvm/Pass.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <climits>
84 #include <cstdint>
85 #include <deque>
86 #include <functional>
87 #include <iterator>
88 #include <map>
89 #include <memory>
90 #include <tuple>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 
96 #define DEBUG_TYPE "pipeliner"
97 
98 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
99 STATISTIC(NumPipelined, "Number of loops software pipelined");
100 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
101 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
102 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
103 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
104 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
105 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
106 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
107 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
108 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
109 
110 /// A command line option to turn software pipelining on or off.
111 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
113  cl::desc("Enable Software Pipelining"));
114 
115 /// A command line option to enable SWP at -Os.
116 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
117  cl::desc("Enable SWP at Os."), cl::Hidden,
118  cl::init(false));
119 
120 /// A command line argument to limit minimum initial interval for pipelining.
121 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
122  cl::desc("Size limit for the MII."),
123  cl::Hidden, cl::init(27));
124 
125 /// A command line argument to limit the number of stages in the pipeline.
126 static cl::opt<int>
127  SwpMaxStages("pipeliner-max-stages",
128  cl::desc("Maximum stages allowed in the generated scheduled."),
129  cl::Hidden, cl::init(3));
130 
131 /// A command line option to disable the pruning of chain dependences due to
132 /// an unrelated Phi.
133 static cl::opt<bool>
134  SwpPruneDeps("pipeliner-prune-deps",
135  cl::desc("Prune dependences between unrelated Phi nodes."),
136  cl::Hidden, cl::init(true));
137 
138 /// A command line option to disable the pruning of loop carried order
139 /// dependences.
140 static cl::opt<bool>
141  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
142  cl::desc("Prune loop carried order dependences."),
143  cl::Hidden, cl::init(true));
144 
145 #ifndef NDEBUG
146 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
147 #endif
148 
149 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
150  cl::ReallyHidden, cl::init(false),
151  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
152 
153 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
154  cl::init(false));
155 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
156  cl::init(false));
157 
159  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
160  cl::desc("Instead of emitting the pipelined code, annotate instructions "
161  "with the generated schedule for feeding into the "
162  "-modulo-schedule-test pass"));
163 
165  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
166  cl::desc(
167  "Use the experimental peeling code generator for software pipelining"));
168 
169 namespace llvm {
170 
171 // A command line option to enable the CopyToPhi DAG mutation.
173  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
174  cl::init(true), cl::ZeroOrMore,
175  cl::desc("Enable CopyToPhi DAG Mutation"));
176 
177 } // end namespace llvm
178 
179 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
180 char MachinePipeliner::ID = 0;
181 #ifndef NDEBUG
183 #endif
185 
187  "Modulo Software Pipelining", false, false)
193  "Modulo Software Pipelining", false, false)
194 
195 /// The "main" function for implementing Swing Modulo Scheduling.
196 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
197  if (skipFunction(mf.getFunction()))
198  return false;
199 
200  if (!EnableSWP)
201  return false;
202 
203  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
205  return false;
206 
207  if (!mf.getSubtarget().enableMachinePipeliner())
208  return false;
209 
210  // Cannot pipeline loops without instruction itineraries if we are using
211  // DFA for the pipeliner.
212  if (mf.getSubtarget().useDFAforSMS() &&
213  (!mf.getSubtarget().getInstrItineraryData() ||
214  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
215  return false;
216 
217  MF = &mf;
218  MLI = &getAnalysis<MachineLoopInfo>();
219  MDT = &getAnalysis<MachineDominatorTree>();
220  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
221  TII = MF->getSubtarget().getInstrInfo();
222  RegClassInfo.runOnMachineFunction(*MF);
223 
224  for (auto &L : *MLI)
225  scheduleLoop(*L);
226 
227  return false;
228 }
229 
230 /// Attempt to perform the SMS algorithm on the specified loop. This function is
231 /// the main entry point for the algorithm. The function identifies candidate
232 /// loops, calculates the minimum initiation interval, and attempts to schedule
233 /// the loop.
234 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
235  bool Changed = false;
236  for (auto &InnerLoop : L)
237  Changed |= scheduleLoop(*InnerLoop);
238 
239 #ifndef NDEBUG
240  // Stop trying after reaching the limit (if any).
241  int Limit = SwpLoopLimit;
242  if (Limit >= 0) {
243  if (NumTries >= SwpLoopLimit)
244  return Changed;
245  NumTries++;
246  }
247 #endif
248 
249  setPragmaPipelineOptions(L);
250  if (!canPipelineLoop(L)) {
251  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
252  ORE->emit([&]() {
253  return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
254  L.getStartLoc(), L.getHeader())
255  << "Failed to pipeline loop";
256  });
257 
258  return Changed;
259  }
260 
261  ++NumTrytoPipeline;
262 
263  Changed = swingModuloScheduler(L);
264 
265  return Changed;
266 }
267 
268 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
269  // Reset the pragma for the next loop in iteration.
270  disabledByPragma = false;
271  II_setByPragma = 0;
272 
273  MachineBasicBlock *LBLK = L.getTopBlock();
274 
275  if (LBLK == nullptr)
276  return;
277 
278  const BasicBlock *BBLK = LBLK->getBasicBlock();
279  if (BBLK == nullptr)
280  return;
281 
282  const Instruction *TI = BBLK->getTerminator();
283  if (TI == nullptr)
284  return;
285 
286  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
287  if (LoopID == nullptr)
288  return;
289 
290  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
291  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
292 
293  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
294  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
295 
296  if (MD == nullptr)
297  continue;
298 
299  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
300 
301  if (S == nullptr)
302  continue;
303 
304  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
305  assert(MD->getNumOperands() == 2 &&
306  "Pipeline initiation interval hint metadata should have two operands.");
308  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
309  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
310  } else if (S->getString() == "llvm.loop.pipeline.disable") {
311  disabledByPragma = true;
312  }
313  }
314 }
315 
316 /// Return true if the loop can be software pipelined. The algorithm is
317 /// restricted to loops with a single basic block. Make sure that the
318 /// branch in the loop can be analyzed.
319 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
320  if (L.getNumBlocks() != 1) {
321  ORE->emit([&]() {
322  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
323  L.getStartLoc(), L.getHeader())
324  << "Not a single basic block: "
325  << ore::NV("NumBlocks", L.getNumBlocks());
326  });
327  return false;
328  }
329 
330  if (disabledByPragma) {
331  ORE->emit([&]() {
332  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
333  L.getStartLoc(), L.getHeader())
334  << "Disabled by Pragma.";
335  });
336  return false;
337  }
338 
339  // Check if the branch can't be understood because we can't do pipelining
340  // if that's the case.
341  LI.TBB = nullptr;
342  LI.FBB = nullptr;
343  LI.BrCond.clear();
344  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
345  LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
346  NumFailBranch++;
347  ORE->emit([&]() {
348  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
349  L.getStartLoc(), L.getHeader())
350  << "The branch can't be understood";
351  });
352  return false;
353  }
354 
355  LI.LoopInductionVar = nullptr;
356  LI.LoopCompare = nullptr;
358  LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
359  NumFailLoop++;
360  ORE->emit([&]() {
361  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
362  L.getStartLoc(), L.getHeader())
363  << "The loop structure is not supported";
364  });
365  return false;
366  }
367 
368  if (!L.getLoopPreheader()) {
369  LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
370  NumFailPreheader++;
371  ORE->emit([&]() {
372  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
373  L.getStartLoc(), L.getHeader())
374  << "No loop preheader found";
375  });
376  return false;
377  }
378 
379  // Remove any subregisters from inputs to phi nodes.
380  preprocessPhiNodes(*L.getHeader());
381  return true;
382 }
383 
384 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
386  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
387 
388  for (MachineInstr &PI : B.phis()) {
389  MachineOperand &DefOp = PI.getOperand(0);
390  assert(DefOp.getSubReg() == 0);
391  auto *RC = MRI.getRegClass(DefOp.getReg());
392 
393  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
394  MachineOperand &RegOp = PI.getOperand(i);
395  if (RegOp.getSubReg() == 0)
396  continue;
397 
398  // If the operand uses a subregister, replace it with a new register
399  // without subregisters, and generate a copy to the new register.
400  Register NewReg = MRI.createVirtualRegister(RC);
401  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
403  const DebugLoc &DL = PredB.findDebugLoc(At);
404  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
405  .addReg(RegOp.getReg(), getRegState(RegOp),
406  RegOp.getSubReg());
407  Slots.insertMachineInstrInMaps(*Copy);
408  RegOp.setReg(NewReg);
409  RegOp.setSubReg(0);
410  }
411  }
412 }
413 
414 /// The SMS algorithm consists of the following main steps:
415 /// 1. Computation and analysis of the dependence graph.
416 /// 2. Ordering of the nodes (instructions).
417 /// 3. Attempt to Schedule the loop.
418 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
419  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
420 
421  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
423 
425  // The kernel should not include any terminator instructions. These
426  // will be added back later.
427  SMS.startBlock(MBB);
428 
429  // Compute the number of 'real' instructions in the basic block by
430  // ignoring terminators.
431  unsigned size = MBB->size();
433  E = MBB->instr_end();
434  I != E; ++I, --size)
435  ;
436 
437  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
438  SMS.schedule();
439  SMS.exitRegion();
440 
441  SMS.finishBlock();
442  return SMS.hasNewSchedule();
443 }
444 
453 }
454 
455 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
456  if (II_setByPragma > 0)
457  MII = II_setByPragma;
458  else
459  MII = std::max(ResMII, RecMII);
460 }
461 
462 void SwingSchedulerDAG::setMAX_II() {
463  if (II_setByPragma > 0)
464  MAX_II = II_setByPragma;
465  else
466  MAX_II = MII + 10;
467 }
468 
469 /// We override the schedule function in ScheduleDAGInstrs to implement the
470 /// scheduling part of the Swing Modulo Scheduling algorithm.
472  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
473  buildSchedGraph(AA);
474  addLoopCarriedDependences(AA);
475  updatePhiDependences();
477  changeDependences();
478  postprocessDAG();
479  LLVM_DEBUG(dump());
480 
481  NodeSetType NodeSets;
482  findCircuits(NodeSets);
483  NodeSetType Circuits = NodeSets;
484 
485  // Calculate the MII.
486  unsigned ResMII = calculateResMII();
487  unsigned RecMII = calculateRecMII(NodeSets);
488 
489  fuseRecs(NodeSets);
490 
491  // This flag is used for testing and can cause correctness problems.
492  if (SwpIgnoreRecMII)
493  RecMII = 0;
494 
495  setMII(ResMII, RecMII);
496  setMAX_II();
497 
498  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
499  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
500 
501  // Can't schedule a loop without a valid MII.
502  if (MII == 0) {
503  LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
504  NumFailZeroMII++;
505  Pass.ORE->emit([&]() {
507  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
508  << "Invalid Minimal Initiation Interval: 0";
509  });
510  return;
511  }
512 
513  // Don't pipeline large loops.
514  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
515  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
516  << ", we don't pipleline large loops\n");
517  NumFailLargeMaxMII++;
518  Pass.ORE->emit([&]() {
520  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
521  << "Minimal Initiation Interval too large: "
522  << ore::NV("MII", (int)MII) << " > "
523  << ore::NV("SwpMaxMii", SwpMaxMii) << "."
524  << "Refer to -pipeliner-max-mii.";
525  });
526  return;
527  }
528 
529  computeNodeFunctions(NodeSets);
530 
531  registerPressureFilter(NodeSets);
532 
533  colocateNodeSets(NodeSets);
534 
535  checkNodeSets(NodeSets);
536 
537  LLVM_DEBUG({
538  for (auto &I : NodeSets) {
539  dbgs() << " Rec NodeSet ";
540  I.dump();
541  }
542  });
543 
544  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
545 
546  groupRemainingNodes(NodeSets);
547 
548  removeDuplicateNodes(NodeSets);
549 
550  LLVM_DEBUG({
551  for (auto &I : NodeSets) {
552  dbgs() << " NodeSet ";
553  I.dump();
554  }
555  });
556 
557  computeNodeOrder(NodeSets);
558 
559  // check for node order issues
560  checkValidNodeOrder(Circuits);
561 
562  SMSchedule Schedule(Pass.MF);
563  Scheduled = schedulePipeline(Schedule);
564 
565  if (!Scheduled){
566  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
567  NumFailNoSchedule++;
568  Pass.ORE->emit([&]() {
570  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
571  << "Unable to find schedule";
572  });
573  return;
574  }
575 
576  unsigned numStages = Schedule.getMaxStageCount();
577  // No need to generate pipeline if there are no overlapped iterations.
578  if (numStages == 0) {
579  LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
580  NumFailZeroStage++;
581  Pass.ORE->emit([&]() {
583  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
584  << "No need to pipeline - no overlapped iterations in schedule.";
585  });
586  return;
587  }
588  // Check that the maximum stage count is less than user-defined limit.
589  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
590  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
591  << " : too many stages, abort\n");
592  NumFailLargeMaxStage++;
593  Pass.ORE->emit([&]() {
595  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
596  << "Too many stages in schedule: "
597  << ore::NV("numStages", (int)numStages) << " > "
598  << ore::NV("SwpMaxStages", SwpMaxStages)
599  << ". Refer to -pipeliner-max-stages.";
600  });
601  return;
602  }
603 
604  Pass.ORE->emit([&]() {
606  Loop.getHeader())
607  << "Pipelined succesfully!";
608  });
609 
610  // Generate the schedule as a ModuloSchedule.
611  DenseMap<MachineInstr *, int> Cycles, Stages;
612  std::vector<MachineInstr *> OrderedInsts;
613  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
614  ++Cycle) {
615  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
616  OrderedInsts.push_back(SU->getInstr());
617  Cycles[SU->getInstr()] = Cycle;
618  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
619  }
620  }
622  for (auto &KV : NewMIs) {
623  Cycles[KV.first] = Cycles[KV.second];
624  Stages[KV.first] = Stages[KV.second];
625  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
626  }
627 
628  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
629  std::move(Stages));
630  if (EmitTestAnnotations) {
631  assert(NewInstrChanges.empty() &&
632  "Cannot serialize a schedule with InstrChanges!");
634  MSTI.annotate();
635  return;
636  }
637  // The experimental code generator can't work if there are InstChanges.
638  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
639  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
640  MSE.expand();
641  } else {
642  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
643  MSE.expand();
644  MSE.cleanup();
645  }
646  ++NumPipelined;
647 }
648 
649 /// Clean up after the software pipeliner runs.
651  for (auto &KV : NewMIs)
652  MF.deleteMachineInstr(KV.second);
653  NewMIs.clear();
654 
655  // Call the superclass.
657 }
658 
659 /// Return the register values for the operands of a Phi instruction.
660 /// This function assume the instruction is a Phi.
662  unsigned &InitVal, unsigned &LoopVal) {
663  assert(Phi.isPHI() && "Expecting a Phi.");
664 
665  InitVal = 0;
666  LoopVal = 0;
667  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
668  if (Phi.getOperand(i + 1).getMBB() != Loop)
669  InitVal = Phi.getOperand(i).getReg();
670  else
671  LoopVal = Phi.getOperand(i).getReg();
672 
673  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
674 }
675 
676 /// Return the Phi register value that comes the loop block.
677 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
678  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
679  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
680  return Phi.getOperand(i).getReg();
681  return 0;
682 }
683 
684 /// Return true if SUb can be reached from SUa following the chain edges.
685 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
686  SmallPtrSet<SUnit *, 8> Visited;
687  SmallVector<SUnit *, 8> Worklist;
688  Worklist.push_back(SUa);
689  while (!Worklist.empty()) {
690  const SUnit *SU = Worklist.pop_back_val();
691  for (auto &SI : SU->Succs) {
692  SUnit *SuccSU = SI.getSUnit();
693  if (SI.getKind() == SDep::Order) {
694  if (Visited.count(SuccSU))
695  continue;
696  if (SuccSU == SUb)
697  return true;
698  Worklist.push_back(SuccSU);
699  Visited.insert(SuccSU);
700  }
701  }
702  }
703  return false;
704 }
705 
706 /// Return true if the instruction causes a chain between memory
707 /// references before and after it.
709  return MI.isCall() || MI.mayRaiseFPException() ||
710  MI.hasUnmodeledSideEffects() ||
711  (MI.hasOrderedMemoryRef() &&
712  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
713 }
714 
715 /// Return the underlying objects for the memory references of an instruction.
716 /// This function calls the code in ValueTracking, but first checks that the
717 /// instruction has a memory operand.
720  if (!MI->hasOneMemOperand())
721  return;
722  MachineMemOperand *MM = *MI->memoperands_begin();
723  if (!MM->getValue())
724  return;
725  getUnderlyingObjects(MM->getValue(), Objs);
726  for (const Value *V : Objs) {
727  if (!isIdentifiedObject(V)) {
728  Objs.clear();
729  return;
730  }
731  Objs.push_back(V);
732  }
733 }
734 
735 /// Add a chain edge between a load and store if the store can be an
736 /// alias of the load on a subsequent iteration, i.e., a loop carried
737 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
738 /// but that code doesn't create loop carried dependences.
739 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
743  for (auto &SU : SUnits) {
744  MachineInstr &MI = *SU.getInstr();
745  if (isDependenceBarrier(MI, AA))
746  PendingLoads.clear();
747  else if (MI.mayLoad()) {
749  ::getUnderlyingObjects(&MI, Objs);
750  if (Objs.empty())
751  Objs.push_back(UnknownValue);
752  for (auto V : Objs) {
753  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
754  SUs.push_back(&SU);
755  }
756  } else if (MI.mayStore()) {
758  ::getUnderlyingObjects(&MI, Objs);
759  if (Objs.empty())
760  Objs.push_back(UnknownValue);
761  for (auto V : Objs) {
763  PendingLoads.find(V);
764  if (I == PendingLoads.end())
765  continue;
766  for (auto Load : I->second) {
767  if (isSuccOrder(Load, &SU))
768  continue;
769  MachineInstr &LdMI = *Load->getInstr();
770  // First, perform the cheaper check that compares the base register.
771  // If they are the same and the load offset is less than the store
772  // offset, then mark the dependence as loop carried potentially.
773  const MachineOperand *BaseOp1, *BaseOp2;
774  int64_t Offset1, Offset2;
775  bool Offset1IsScalable, Offset2IsScalable;
776  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
777  Offset1IsScalable, TRI) &&
778  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
779  Offset2IsScalable, TRI)) {
780  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
781  Offset1IsScalable == Offset2IsScalable &&
782  (int)Offset1 < (int)Offset2) {
784  "What happened to the chain edge?");
785  SDep Dep(Load, SDep::Barrier);
786  Dep.setLatency(1);
787  SU.addPred(Dep);
788  continue;
789  }
790  }
791  // Second, the more expensive check that uses alias analysis on the
792  // base registers. If they alias, and the load offset is less than
793  // the store offset, the mark the dependence as loop carried.
794  if (!AA) {
795  SDep Dep(Load, SDep::Barrier);
796  Dep.setLatency(1);
797  SU.addPred(Dep);
798  continue;
799  }
800  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
801  MachineMemOperand *MMO2 = *MI.memoperands_begin();
802  if (!MMO1->getValue() || !MMO2->getValue()) {
803  SDep Dep(Load, SDep::Barrier);
804  Dep.setLatency(1);
805  SU.addPred(Dep);
806  continue;
807  }
808  if (MMO1->getValue() == MMO2->getValue() &&
809  MMO1->getOffset() <= MMO2->getOffset()) {
810  SDep Dep(Load, SDep::Barrier);
811  Dep.setLatency(1);
812  SU.addPred(Dep);
813  continue;
814  }
815  if (!AA->isNoAlias(
816  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
818  MMO2->getAAInfo()))) {
819  SDep Dep(Load, SDep::Barrier);
820  Dep.setLatency(1);
821  SU.addPred(Dep);
822  }
823  }
824  }
825  }
826  }
827 }
828 
829 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
830 /// processes dependences for PHIs. This function adds true dependences
831 /// from a PHI to a use, and a loop carried dependence from the use to the
832 /// PHI. The loop carried dependence is represented as an anti dependence
833 /// edge. This function also removes chain dependences between unrelated
834 /// PHIs.
835 void SwingSchedulerDAG::updatePhiDependences() {
836  SmallVector<SDep, 4> RemoveDeps;
838 
839  // Iterate over each DAG node.
840  for (SUnit &I : SUnits) {
841  RemoveDeps.clear();
842  // Set to true if the instruction has an operand defined by a Phi.
843  unsigned HasPhiUse = 0;
844  unsigned HasPhiDef = 0;
845  MachineInstr *MI = I.getInstr();
846  // Iterate over each operand, and we process the definitions.
847  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
848  MOE = MI->operands_end();
849  MOI != MOE; ++MOI) {
850  if (!MOI->isReg())
851  continue;
852  Register Reg = MOI->getReg();
853  if (MOI->isDef()) {
854  // If the register is used by a Phi, then create an anti dependence.
856  UI = MRI.use_instr_begin(Reg),
857  UE = MRI.use_instr_end();
858  UI != UE; ++UI) {
859  MachineInstr *UseMI = &*UI;
860  SUnit *SU = getSUnit(UseMI);
861  if (SU != nullptr && UseMI->isPHI()) {
862  if (!MI->isPHI()) {
863  SDep Dep(SU, SDep::Anti, Reg);
864  Dep.setLatency(1);
865  I.addPred(Dep);
866  } else {
867  HasPhiDef = Reg;
868  // Add a chain edge to a dependent Phi that isn't an existing
869  // predecessor.
870  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
871  I.addPred(SDep(SU, SDep::Barrier));
872  }
873  }
874  }
875  } else if (MOI->isUse()) {
876  // If the register is defined by a Phi, then create a true dependence.
878  if (DefMI == nullptr)
879  continue;
880  SUnit *SU = getSUnit(DefMI);
881  if (SU != nullptr && DefMI->isPHI()) {
882  if (!MI->isPHI()) {
883  SDep Dep(SU, SDep::Data, Reg);
884  Dep.setLatency(0);
885  ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
886  I.addPred(Dep);
887  } else {
888  HasPhiUse = Reg;
889  // Add a chain edge to a dependent Phi that isn't an existing
890  // predecessor.
891  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
892  I.addPred(SDep(SU, SDep::Barrier));
893  }
894  }
895  }
896  }
897  // Remove order dependences from an unrelated Phi.
898  if (!SwpPruneDeps)
899  continue;
900  for (auto &PI : I.Preds) {
901  MachineInstr *PMI = PI.getSUnit()->getInstr();
902  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
903  if (I.getInstr()->isPHI()) {
904  if (PMI->getOperand(0).getReg() == HasPhiUse)
905  continue;
906  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
907  continue;
908  }
909  RemoveDeps.push_back(PI);
910  }
911  }
912  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
913  I.removePred(RemoveDeps[i]);
914  }
915 }
916 
917 /// Iterate over each DAG node and see if we can change any dependences
918 /// in order to reduce the recurrence MII.
919 void SwingSchedulerDAG::changeDependences() {
920  // See if an instruction can use a value from the previous iteration.
921  // If so, we update the base and offset of the instruction and change
922  // the dependences.
923  for (SUnit &I : SUnits) {
924  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
925  int64_t NewOffset = 0;
926  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
927  NewOffset))
928  continue;
929 
930  // Get the MI and SUnit for the instruction that defines the original base.
931  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
932  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
933  if (!DefMI)
934  continue;
935  SUnit *DefSU = getSUnit(DefMI);
936  if (!DefSU)
937  continue;
938  // Get the MI and SUnit for the instruction that defins the new base.
939  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
940  if (!LastMI)
941  continue;
942  SUnit *LastSU = getSUnit(LastMI);
943  if (!LastSU)
944  continue;
945 
946  if (Topo.IsReachable(&I, LastSU))
947  continue;
948 
949  // Remove the dependence. The value now depends on a prior iteration.
951  for (const SDep &P : I.Preds)
952  if (P.getSUnit() == DefSU)
953  Deps.push_back(P);
954  for (int i = 0, e = Deps.size(); i != e; i++) {
955  Topo.RemovePred(&I, Deps[i].getSUnit());
956  I.removePred(Deps[i]);
957  }
958  // Remove the chain dependence between the instructions.
959  Deps.clear();
960  for (auto &P : LastSU->Preds)
961  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
962  Deps.push_back(P);
963  for (int i = 0, e = Deps.size(); i != e; i++) {
964  Topo.RemovePred(LastSU, Deps[i].getSUnit());
965  LastSU->removePred(Deps[i]);
966  }
967 
968  // Add a dependence between the new instruction and the instruction
969  // that defines the new base.
970  SDep Dep(&I, SDep::Anti, NewBase);
971  Topo.AddPred(LastSU, &I);
972  LastSU->addPred(Dep);
973 
974  // Remember the base and offset information so that we can update the
975  // instruction during code generation.
976  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
977  }
978 }
979 
980 namespace {
981 
982 // FuncUnitSorter - Comparison operator used to sort instructions by
983 // the number of functional unit choices.
984 struct FuncUnitSorter {
985  const InstrItineraryData *InstrItins;
986  const MCSubtargetInfo *STI;
988 
989  FuncUnitSorter(const TargetSubtargetInfo &TSI)
990  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
991 
992  // Compute the number of functional unit alternatives needed
993  // at each stage, and take the minimum value. We prioritize the
994  // instructions by the least number of choices first.
995  unsigned minFuncUnits(const MachineInstr *Inst,
996  InstrStage::FuncUnits &F) const {
997  unsigned SchedClass = Inst->getDesc().getSchedClass();
998  unsigned min = UINT_MAX;
999  if (InstrItins && !InstrItins->isEmpty()) {
1000  for (const InstrStage &IS :
1001  make_range(InstrItins->beginStage(SchedClass),
1002  InstrItins->endStage(SchedClass))) {
1003  InstrStage::FuncUnits funcUnits = IS.getUnits();
1004  unsigned numAlternatives = countPopulation(funcUnits);
1005  if (numAlternatives < min) {
1006  min = numAlternatives;
1007  F = funcUnits;
1008  }
1009  }
1010  return min;
1011  }
1012  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1013  const MCSchedClassDesc *SCDesc =
1014  STI->getSchedModel().getSchedClassDesc(SchedClass);
1015  if (!SCDesc->isValid())
1016  // No valid Schedule Class Desc for schedClass, should be
1017  // Pseudo/PostRAPseudo
1018  return min;
1019 
1020  for (const MCWriteProcResEntry &PRE :
1021  make_range(STI->getWriteProcResBegin(SCDesc),
1022  STI->getWriteProcResEnd(SCDesc))) {
1023  if (!PRE.Cycles)
1024  continue;
1025  const MCProcResourceDesc *ProcResource =
1026  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1027  unsigned NumUnits = ProcResource->NumUnits;
1028  if (NumUnits < min) {
1029  min = NumUnits;
1030  F = PRE.ProcResourceIdx;
1031  }
1032  }
1033  return min;
1034  }
1035  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1036  }
1037 
1038  // Compute the critical resources needed by the instruction. This
1039  // function records the functional units needed by instructions that
1040  // must use only one functional unit. We use this as a tie breaker
1041  // for computing the resource MII. The instrutions that require
1042  // the same, highly used, functional unit have high priority.
1043  void calcCriticalResources(MachineInstr &MI) {
1044  unsigned SchedClass = MI.getDesc().getSchedClass();
1045  if (InstrItins && !InstrItins->isEmpty()) {
1046  for (const InstrStage &IS :
1047  make_range(InstrItins->beginStage(SchedClass),
1048  InstrItins->endStage(SchedClass))) {
1049  InstrStage::FuncUnits FuncUnits = IS.getUnits();
1050  if (countPopulation(FuncUnits) == 1)
1051  Resources[FuncUnits]++;
1052  }
1053  return;
1054  }
1055  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1056  const MCSchedClassDesc *SCDesc =
1057  STI->getSchedModel().getSchedClassDesc(SchedClass);
1058  if (!SCDesc->isValid())
1059  // No valid Schedule Class Desc for schedClass, should be
1060  // Pseudo/PostRAPseudo
1061  return;
1062 
1063  for (const MCWriteProcResEntry &PRE :
1064  make_range(STI->getWriteProcResBegin(SCDesc),
1065  STI->getWriteProcResEnd(SCDesc))) {
1066  if (!PRE.Cycles)
1067  continue;
1068  Resources[PRE.ProcResourceIdx]++;
1069  }
1070  return;
1071  }
1072  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1073  }
1074 
1075  /// Return true if IS1 has less priority than IS2.
1076  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1077  InstrStage::FuncUnits F1 = 0, F2 = 0;
1078  unsigned MFUs1 = minFuncUnits(IS1, F1);
1079  unsigned MFUs2 = minFuncUnits(IS2, F2);
1080  if (MFUs1 == MFUs2)
1081  return Resources.lookup(F1) < Resources.lookup(F2);
1082  return MFUs1 > MFUs2;
1083  }
1084 };
1085 
1086 } // end anonymous namespace
1087 
1088 /// Calculate the resource constrained minimum initiation interval for the
1089 /// specified loop. We use the DFA to model the resources needed for
1090 /// each instruction, and we ignore dependences. A different DFA is created
1091 /// for each cycle that is required. When adding a new instruction, we attempt
1092 /// to add it to each existing DFA, until a legal space is found. If the
1093 /// instruction cannot be reserved in an existing DFA, we create a new one.
1094 unsigned SwingSchedulerDAG::calculateResMII() {
1095 
1096  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1099  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1100 
1101  // Sort the instructions by the number of available choices for scheduling,
1102  // least to most. Use the number of critical resources as the tie breaker.
1103  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1104  for (MachineInstr &MI :
1106  FUS.calcCriticalResources(MI);
1108  FuncUnitOrder(FUS);
1109 
1110  for (MachineInstr &MI :
1112  FuncUnitOrder.push(&MI);
1113 
1114  while (!FuncUnitOrder.empty()) {
1115  MachineInstr *MI = FuncUnitOrder.top();
1116  FuncUnitOrder.pop();
1117  if (TII->isZeroCost(MI->getOpcode()))
1118  continue;
1119  // Attempt to reserve the instruction in an existing DFA. At least one
1120  // DFA is needed for each cycle.
1121  unsigned NumCycles = getSUnit(MI)->Latency;
1122  unsigned ReservedCycles = 0;
1123  SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
1124  SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
1125  LLVM_DEBUG({
1126  dbgs() << "Trying to reserve resource for " << NumCycles
1127  << " cycles for \n";
1128  MI->dump();
1129  });
1130  for (unsigned C = 0; C < NumCycles; ++C)
1131  while (RI != RE) {
1132  if ((*RI)->canReserveResources(*MI)) {
1133  (*RI)->reserveResources(*MI);
1134  ++ReservedCycles;
1135  break;
1136  }
1137  RI++;
1138  }
1139  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1140  << ", NumCycles:" << NumCycles << "\n");
1141  // Add new DFAs, if needed, to reserve resources.
1142  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1144  << "NewResource created to reserve resources"
1145  << "\n");
1146  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1147  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1148  NewResource->reserveResources(*MI);
1149  Resources.push_back(NewResource);
1150  }
1151  }
1152  int Resmii = Resources.size();
1153  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
1154  // Delete the memory for each of the DFAs that were created earlier.
1155  for (ResourceManager *RI : Resources) {
1156  ResourceManager *D = RI;
1157  delete D;
1158  }
1159  Resources.clear();
1160  return Resmii;
1161 }
1162 
1163 /// Calculate the recurrence-constrainted minimum initiation interval.
1164 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1165 /// for each circuit. The II needs to satisfy the inequality
1166 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1167 /// II that satisfies the inequality, and the RecMII is the maximum
1168 /// of those values.
1169 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1170  unsigned RecMII = 0;
1171 
1172  for (NodeSet &Nodes : NodeSets) {
1173  if (Nodes.empty())
1174  continue;
1175 
1176  unsigned Delay = Nodes.getLatency();
1177  unsigned Distance = 1;
1178 
1179  // ii = ceil(delay / distance)
1180  unsigned CurMII = (Delay + Distance - 1) / Distance;
1181  Nodes.setRecMII(CurMII);
1182  if (CurMII > RecMII)
1183  RecMII = CurMII;
1184  }
1185 
1186  return RecMII;
1187 }
1188 
1189 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1190 /// but we do this to find the circuits, and then change them back.
1191 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1193  for (SUnit &SU : SUnits) {
1194  for (SDep &Pred : SU.Preds)
1195  if (Pred.getKind() == SDep::Anti)
1196  DepsAdded.push_back(std::make_pair(&SU, Pred));
1197  }
1198  for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1199  // Remove this anti dependency and add one in the reverse direction.
1200  SUnit *SU = P.first;
1201  SDep &D = P.second;
1202  SUnit *TargetSU = D.getSUnit();
1203  unsigned Reg = D.getReg();
1204  unsigned Lat = D.getLatency();
1205  SU->removePred(D);
1206  SDep Dep(SU, SDep::Anti, Reg);
1207  Dep.setLatency(Lat);
1208  TargetSU->addPred(Dep);
1209  }
1210 }
1211 
1212 /// Create the adjacency structure of the nodes in the graph.
1213 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1214  SwingSchedulerDAG *DAG) {
1215  BitVector Added(SUnits.size());
1216  DenseMap<int, int> OutputDeps;
1217  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1218  Added.reset();
1219  // Add any successor to the adjacency matrix and exclude duplicates.
1220  for (auto &SI : SUnits[i].Succs) {
1221  // Only create a back-edge on the first and last nodes of a dependence
1222  // chain. This records any chains and adds them later.
1223  if (SI.getKind() == SDep::Output) {
1224  int N = SI.getSUnit()->NodeNum;
1225  int BackEdge = i;
1226  auto Dep = OutputDeps.find(BackEdge);
1227  if (Dep != OutputDeps.end()) {
1228  BackEdge = Dep->second;
1229  OutputDeps.erase(Dep);
1230  }
1231  OutputDeps[N] = BackEdge;
1232  }
1233  // Do not process a boundary node, an artificial node.
1234  // A back-edge is processed only if it goes to a Phi.
1235  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1236  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1237  continue;
1238  int N = SI.getSUnit()->NodeNum;
1239  if (!Added.test(N)) {
1240  AdjK[i].push_back(N);
1241  Added.set(N);
1242  }
1243  }
1244  // A chain edge between a store and a load is treated as a back-edge in the
1245  // adjacency matrix.
1246  for (auto &PI : SUnits[i].Preds) {
1247  if (!SUnits[i].getInstr()->mayStore() ||
1248  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1249  continue;
1250  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1251  int N = PI.getSUnit()->NodeNum;
1252  if (!Added.test(N)) {
1253  AdjK[i].push_back(N);
1254  Added.set(N);
1255  }
1256  }
1257  }
1258  }
1259  // Add back-edges in the adjacency matrix for the output dependences.
1260  for (auto &OD : OutputDeps)
1261  if (!Added.test(OD.second)) {
1262  AdjK[OD.first].push_back(OD.second);
1263  Added.set(OD.second);
1264  }
1265 }
1266 
1267 /// Identify an elementary circuit in the dependence graph starting at the
1268 /// specified node.
1269 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1270  bool HasBackedge) {
1271  SUnit *SV = &SUnits[V];
1272  bool F = false;
1273  Stack.insert(SV);
1274  Blocked.set(V);
1275 
1276  for (auto W : AdjK[V]) {
1277  if (NumPaths > MaxPaths)
1278  break;
1279  if (W < S)
1280  continue;
1281  if (W == S) {
1282  if (!HasBackedge)
1283  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1284  F = true;
1285  ++NumPaths;
1286  break;
1287  } else if (!Blocked.test(W)) {
1288  if (circuit(W, S, NodeSets,
1289  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1290  F = true;
1291  }
1292  }
1293 
1294  if (F)
1295  unblock(V);
1296  else {
1297  for (auto W : AdjK[V]) {
1298  if (W < S)
1299  continue;
1300  if (B[W].count(SV) == 0)
1301  B[W].insert(SV);
1302  }
1303  }
1304  Stack.pop_back();
1305  return F;
1306 }
1307 
1308 /// Unblock a node in the circuit finding algorithm.
1309 void SwingSchedulerDAG::Circuits::unblock(int U) {
1310  Blocked.reset(U);
1311  SmallPtrSet<SUnit *, 4> &BU = B[U];
1312  while (!BU.empty()) {
1314  assert(SI != BU.end() && "Invalid B set.");
1315  SUnit *W = *SI;
1316  BU.erase(W);
1317  if (Blocked.test(W->NodeNum))
1318  unblock(W->NodeNum);
1319  }
1320 }
1321 
1322 /// Identify all the elementary circuits in the dependence graph using
1323 /// Johnson's circuit algorithm.
1324 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1325  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1326  // but we do this to find the circuits, and then change them back.
1328 
1329  Circuits Cir(SUnits, Topo);
1330  // Create the adjacency structure.
1331  Cir.createAdjacencyStructure(this);
1332  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1333  Cir.reset();
1334  Cir.circuit(i, i, NodeSets);
1335  }
1336 
1337  // Change the dependences back so that we've created a DAG again.
1339 }
1340 
1341 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1342 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1343 // additional copies that are needed across iterations. An artificial dependence
1344 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1345 
1346 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1347 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1348 // PHI-------True-Dep------> USEOfPhi
1349 
1350 // The mutation creates
1351 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1352 
1353 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1354 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1355 // late to avoid additional copies across iterations. The possible scheduling
1356 // order would be
1357 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1358 
1360  for (SUnit &SU : DAG->SUnits) {
1361  // Find the COPY/REG_SEQUENCE instruction.
1362  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1363  continue;
1364 
1365  // Record the loop carried PHIs.
1366  SmallVector<SUnit *, 4> PHISUs;
1367  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1368  SmallVector<SUnit *, 4> SrcSUs;
1369 
1370  for (auto &Dep : SU.Preds) {
1371  SUnit *TmpSU = Dep.getSUnit();
1372  MachineInstr *TmpMI = TmpSU->getInstr();
1373  SDep::Kind DepKind = Dep.getKind();
1374  // Save the loop carried PHI.
1375  if (DepKind == SDep::Anti && TmpMI->isPHI())
1376  PHISUs.push_back(TmpSU);
1377  // Save the source of COPY/REG_SEQUENCE.
1378  // If the source has no pre-decessors, we will end up creating cycles.
1379  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1380  SrcSUs.push_back(TmpSU);
1381  }
1382 
1383  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1384  continue;
1385 
1386  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1387  // SUnit to the container.
1388  SmallVector<SUnit *, 8> UseSUs;
1389  // Do not use iterator based loop here as we are updating the container.
1390  for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1391  for (auto &Dep : PHISUs[Index]->Succs) {
1392  if (Dep.getKind() != SDep::Data)
1393  continue;
1394 
1395  SUnit *TmpSU = Dep.getSUnit();
1396  MachineInstr *TmpMI = TmpSU->getInstr();
1397  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1398  PHISUs.push_back(TmpSU);
1399  continue;
1400  }
1401  UseSUs.push_back(TmpSU);
1402  }
1403  }
1404 
1405  if (UseSUs.size() == 0)
1406  continue;
1407 
1408  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1409  // Add the artificial dependencies if it does not form a cycle.
1410  for (auto I : UseSUs) {
1411  for (auto Src : SrcSUs) {
1412  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1413  Src->addPred(SDep(I, SDep::Artificial));
1414  SDAG->Topo.AddPred(Src, I);
1415  }
1416  }
1417  }
1418  }
1419 }
1420 
1421 /// Return true for DAG nodes that we ignore when computing the cost functions.
1422 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1423 /// in the calculation of the ASAP, ALAP, etc functions.
1424 static bool ignoreDependence(const SDep &D, bool isPred) {
1425  if (D.isArtificial())
1426  return true;
1427  return D.getKind() == SDep::Anti && isPred;
1428 }
1429 
1430 /// Compute several functions need to order the nodes for scheduling.
1431 /// ASAP - Earliest time to schedule a node.
1432 /// ALAP - Latest time to schedule a node.
1433 /// MOV - Mobility function, difference between ALAP and ASAP.
1434 /// D - Depth of each node.
1435 /// H - Height of each node.
1436 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1437  ScheduleInfo.resize(SUnits.size());
1438 
1439  LLVM_DEBUG({
1440  for (int I : Topo) {
1441  const SUnit &SU = SUnits[I];
1442  dumpNode(SU);
1443  }
1444  });
1445 
1446  int maxASAP = 0;
1447  // Compute ASAP and ZeroLatencyDepth.
1448  for (int I : Topo) {
1449  int asap = 0;
1450  int zeroLatencyDepth = 0;
1451  SUnit *SU = &SUnits[I];
1452  for (const SDep &P : SU->Preds) {
1453  SUnit *pred = P.getSUnit();
1454  if (P.getLatency() == 0)
1455  zeroLatencyDepth =
1456  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1457  if (ignoreDependence(P, true))
1458  continue;
1459  asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
1460  getDistance(pred, SU, P) * MII));
1461  }
1462  maxASAP = std::max(maxASAP, asap);
1463  ScheduleInfo[I].ASAP = asap;
1464  ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1465  }
1466 
1467  // Compute ALAP, ZeroLatencyHeight, and MOV.
1468  for (int I : llvm::reverse(Topo)) {
1469  int alap = maxASAP;
1470  int zeroLatencyHeight = 0;
1471  SUnit *SU = &SUnits[I];
1472  for (const SDep &S : SU->Succs) {
1473  SUnit *succ = S.getSUnit();
1474  if (S.getLatency() == 0)
1475  zeroLatencyHeight =
1476  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1477  if (ignoreDependence(S, true))
1478  continue;
1479  alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
1480  getDistance(SU, succ, S) * MII));
1481  }
1482 
1483  ScheduleInfo[I].ALAP = alap;
1484  ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
1485  }
1486 
1487  // After computing the node functions, compute the summary for each node set.
1488  for (NodeSet &I : NodeSets)
1489  I.computeNodeSetInfo(this);
1490 
1491  LLVM_DEBUG({
1492  for (unsigned i = 0; i < SUnits.size(); i++) {
1493  dbgs() << "\tNode " << i << ":\n";
1494  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1495  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1496  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1497  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1498  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1499  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1500  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1501  }
1502  });
1503 }
1504 
1505 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1506 /// as the predecessors of the elements of NodeOrder that are not also in
1507 /// NodeOrder.
1510  const NodeSet *S = nullptr) {
1511  Preds.clear();
1512  for (const SUnit *SU : NodeOrder) {
1513  for (const SDep &Pred : SU->Preds) {
1514  if (S && S->count(Pred.getSUnit()) == 0)
1515  continue;
1516  if (ignoreDependence(Pred, true))
1517  continue;
1518  if (NodeOrder.count(Pred.getSUnit()) == 0)
1519  Preds.insert(Pred.getSUnit());
1520  }
1521  // Back-edges are predecessors with an anti-dependence.
1522  for (const SDep &Succ : SU->Succs) {
1523  if (Succ.getKind() != SDep::Anti)
1524  continue;
1525  if (S && S->count(Succ.getSUnit()) == 0)
1526  continue;
1527  if (NodeOrder.count(Succ.getSUnit()) == 0)
1528  Preds.insert(Succ.getSUnit());
1529  }
1530  }
1531  return !Preds.empty();
1532 }
1533 
1534 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1535 /// as the successors of the elements of NodeOrder that are not also in
1536 /// NodeOrder.
1539  const NodeSet *S = nullptr) {
1540  Succs.clear();
1541  for (const SUnit *SU : NodeOrder) {
1542  for (const SDep &Succ : SU->Succs) {
1543  if (S && S->count(Succ.getSUnit()) == 0)
1544  continue;
1545  if (ignoreDependence(Succ, false))
1546  continue;
1547  if (NodeOrder.count(Succ.getSUnit()) == 0)
1548  Succs.insert(Succ.getSUnit());
1549  }
1550  for (const SDep &Pred : SU->Preds) {
1551  if (Pred.getKind() != SDep::Anti)
1552  continue;
1553  if (S && S->count(Pred.getSUnit()) == 0)
1554  continue;
1555  if (NodeOrder.count(Pred.getSUnit()) == 0)
1556  Succs.insert(Pred.getSUnit());
1557  }
1558  }
1559  return !Succs.empty();
1560 }
1561 
1562 /// Return true if there is a path from the specified node to any of the nodes
1563 /// in DestNodes. Keep track and return the nodes in any path.
1564 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1565  SetVector<SUnit *> &DestNodes,
1566  SetVector<SUnit *> &Exclude,
1567  SmallPtrSet<SUnit *, 8> &Visited) {
1568  if (Cur->isBoundaryNode())
1569  return false;
1570  if (Exclude.contains(Cur))
1571  return false;
1572  if (DestNodes.contains(Cur))
1573  return true;
1574  if (!Visited.insert(Cur).second)
1575  return Path.contains(Cur);
1576  bool FoundPath = false;
1577  for (auto &SI : Cur->Succs)
1578  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1579  for (auto &PI : Cur->Preds)
1580  if (PI.getKind() == SDep::Anti)
1581  FoundPath |=
1582  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1583  if (FoundPath)
1584  Path.insert(Cur);
1585  return FoundPath;
1586 }
1587 
1588 /// Compute the live-out registers for the instructions in a node-set.
1589 /// The live-out registers are those that are defined in the node-set,
1590 /// but not used. Except for use operands of Phis.
1592  NodeSet &NS) {
1597  for (SUnit *SU : NS) {
1598  const MachineInstr *MI = SU->getInstr();
1599  if (MI->isPHI())
1600  continue;
1601  for (const MachineOperand &MO : MI->operands())
1602  if (MO.isReg() && MO.isUse()) {
1603  Register Reg = MO.getReg();
1605  Uses.insert(Reg);
1606  else if (MRI.isAllocatable(Reg))
1607  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1608  ++Units)
1609  Uses.insert(*Units);
1610  }
1611  }
1612  for (SUnit *SU : NS)
1613  for (const MachineOperand &MO : SU->getInstr()->operands())
1614  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1615  Register Reg = MO.getReg();
1617  if (!Uses.count(Reg))
1618  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1620  } else if (MRI.isAllocatable(Reg)) {
1621  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1622  ++Units)
1623  if (!Uses.count(*Units))
1624  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1626  }
1627  }
1628  RPTracker.addLiveRegs(LiveOutRegs);
1629 }
1630 
1631 /// A heuristic to filter nodes in recurrent node-sets if the register
1632 /// pressure of a set is too high.
1633 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1634  for (auto &NS : NodeSets) {
1635  // Skip small node-sets since they won't cause register pressure problems.
1636  if (NS.size() <= 2)
1637  continue;
1638  IntervalPressure RecRegPressure;
1639  RegPressureTracker RecRPTracker(RecRegPressure);
1640  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1641  computeLiveOuts(MF, RecRPTracker, NS);
1642  RecRPTracker.closeBottom();
1643 
1644  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1645  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1646  return A->NodeNum > B->NodeNum;
1647  });
1648 
1649  for (auto &SU : SUnits) {
1650  // Since we're computing the register pressure for a subset of the
1651  // instructions in a block, we need to set the tracker for each
1652  // instruction in the node-set. The tracker is set to the instruction
1653  // just after the one we're interested in.
1655  RecRPTracker.setPos(std::next(CurInstI));
1656 
1657  RegPressureDelta RPDelta;
1658  ArrayRef<PressureChange> CriticalPSets;
1659  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1660  CriticalPSets,
1661  RecRegPressure.MaxSetPressure);
1662  if (RPDelta.Excess.isValid()) {
1663  LLVM_DEBUG(
1664  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1665  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1666  << ":" << RPDelta.Excess.getUnitInc());
1667  NS.setExceedPressure(SU);
1668  break;
1669  }
1670  RecRPTracker.recede();
1671  }
1672  }
1673 }
1674 
1675 /// A heuristic to colocate node sets that have the same set of
1676 /// successors.
1677 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1678  unsigned Colocate = 0;
1679  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1680  NodeSet &N1 = NodeSets[i];
1682  if (N1.empty() || !succ_L(N1, S1))
1683  continue;
1684  for (int j = i + 1; j < e; ++j) {
1685  NodeSet &N2 = NodeSets[j];
1686  if (N1.compareRecMII(N2) != 0)
1687  continue;
1689  if (N2.empty() || !succ_L(N2, S2))
1690  continue;
1691  if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
1692  N1.setColocate(++Colocate);
1693  N2.setColocate(Colocate);
1694  break;
1695  }
1696  }
1697  }
1698 }
1699 
1700 /// Check if the existing node-sets are profitable. If not, then ignore the
1701 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1702 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1703 /// then it's best to try to schedule all instructions together instead of
1704 /// starting with the recurrent node-sets.
1705 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1706  // Look for loops with a large MII.
1707  if (MII < 17)
1708  return;
1709  // Check if the node-set contains only a simple add recurrence.
1710  for (auto &NS : NodeSets) {
1711  if (NS.getRecMII() > 2)
1712  return;
1713  if (NS.getMaxDepth() > MII)
1714  return;
1715  }
1716  NodeSets.clear();
1717  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1718 }
1719 
1720 /// Add the nodes that do not belong to a recurrence set into groups
1721 /// based upon connected componenets.
1722 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1723  SetVector<SUnit *> NodesAdded;
1724  SmallPtrSet<SUnit *, 8> Visited;
1725  // Add the nodes that are on a path between the previous node sets and
1726  // the current node set.
1727  for (NodeSet &I : NodeSets) {
1729  // Add the nodes from the current node set to the previous node set.
1730  if (succ_L(I, N)) {
1731  SetVector<SUnit *> Path;
1732  for (SUnit *NI : N) {
1733  Visited.clear();
1734  computePath(NI, Path, NodesAdded, I, Visited);
1735  }
1736  if (!Path.empty())
1737  I.insert(Path.begin(), Path.end());
1738  }
1739  // Add the nodes from the previous node set to the current node set.
1740  N.clear();
1741  if (succ_L(NodesAdded, N)) {
1742  SetVector<SUnit *> Path;
1743  for (SUnit *NI : N) {
1744  Visited.clear();
1745  computePath(NI, Path, I, NodesAdded, Visited);
1746  }
1747  if (!Path.empty())
1748  I.insert(Path.begin(), Path.end());
1749  }
1750  NodesAdded.insert(I.begin(), I.end());
1751  }
1752 
1753  // Create a new node set with the connected nodes of any successor of a node
1754  // in a recurrent set.
1755  NodeSet NewSet;
1757  if (succ_L(NodesAdded, N))
1758  for (SUnit *I : N)
1759  addConnectedNodes(I, NewSet, NodesAdded);
1760  if (!NewSet.empty())
1761  NodeSets.push_back(NewSet);
1762 
1763  // Create a new node set with the connected nodes of any predecessor of a node
1764  // in a recurrent set.
1765  NewSet.clear();
1766  if (pred_L(NodesAdded, N))
1767  for (SUnit *I : N)
1768  addConnectedNodes(I, NewSet, NodesAdded);
1769  if (!NewSet.empty())
1770  NodeSets.push_back(NewSet);
1771 
1772  // Create new nodes sets with the connected nodes any remaining node that
1773  // has no predecessor.
1774  for (SUnit &SU : SUnits) {
1775  if (NodesAdded.count(&SU) == 0) {
1776  NewSet.clear();
1777  addConnectedNodes(&SU, NewSet, NodesAdded);
1778  if (!NewSet.empty())
1779  NodeSets.push_back(NewSet);
1780  }
1781  }
1782 }
1783 
1784 /// Add the node to the set, and add all of its connected nodes to the set.
1785 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1786  SetVector<SUnit *> &NodesAdded) {
1787  NewSet.insert(SU);
1788  NodesAdded.insert(SU);
1789  for (auto &SI : SU->Succs) {
1790  SUnit *Successor = SI.getSUnit();
1791  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1792  addConnectedNodes(Successor, NewSet, NodesAdded);
1793  }
1794  for (auto &PI : SU->Preds) {
1795  SUnit *Predecessor = PI.getSUnit();
1796  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1797  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1798  }
1799 }
1800 
1801 /// Return true if Set1 contains elements in Set2. The elements in common
1802 /// are returned in a different container.
1803 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1804  SmallSetVector<SUnit *, 8> &Result) {
1805  Result.clear();
1806  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1807  SUnit *SU = Set1[i];
1808  if (Set2.count(SU) != 0)
1809  Result.insert(SU);
1810  }
1811  return !Result.empty();
1812 }
1813 
1814 /// Merge the recurrence node sets that have the same initial node.
1815 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1816  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1817  ++I) {
1818  NodeSet &NI = *I;
1819  for (NodeSetType::iterator J = I + 1; J != E;) {
1820  NodeSet &NJ = *J;
1821  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1822  if (NJ.compareRecMII(NI) > 0)
1823  NI.setRecMII(NJ.getRecMII());
1824  for (SUnit *SU : *J)
1825  I->insert(SU);
1826  NodeSets.erase(J);
1827  E = NodeSets.end();
1828  } else {
1829  ++J;
1830  }
1831  }
1832  }
1833 }
1834 
1835 /// Remove nodes that have been scheduled in previous NodeSets.
1836 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1837  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1838  ++I)
1839  for (NodeSetType::iterator J = I + 1; J != E;) {
1840  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1841 
1842  if (J->empty()) {
1843  NodeSets.erase(J);
1844  E = NodeSets.end();
1845  } else {
1846  ++J;
1847  }
1848  }
1849 }
1850 
1851 /// Compute an ordered list of the dependence graph nodes, which
1852 /// indicates the order that the nodes will be scheduled. This is a
1853 /// two-level algorithm. First, a partial order is created, which
1854 /// consists of a list of sets ordered from highest to lowest priority.
1855 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1857  NodeOrder.clear();
1858 
1859  for (auto &Nodes : NodeSets) {
1860  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1861  OrderKind Order;
1863  if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1864  R.insert(N.begin(), N.end());
1865  Order = BottomUp;
1866  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1867  } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1868  R.insert(N.begin(), N.end());
1869  Order = TopDown;
1870  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1871  } else if (isIntersect(N, Nodes, R)) {
1872  // If some of the successors are in the existing node-set, then use the
1873  // top-down ordering.
1874  Order = TopDown;
1875  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1876  } else if (NodeSets.size() == 1) {
1877  for (auto &N : Nodes)
1878  if (N->Succs.size() == 0)
1879  R.insert(N);
1880  Order = BottomUp;
1881  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1882  } else {
1883  // Find the node with the highest ASAP.
1884  SUnit *maxASAP = nullptr;
1885  for (SUnit *SU : Nodes) {
1886  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1887  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1888  maxASAP = SU;
1889  }
1890  R.insert(maxASAP);
1891  Order = BottomUp;
1892  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1893  }
1894 
1895  while (!R.empty()) {
1896  if (Order == TopDown) {
1897  // Choose the node with the maximum height. If more than one, choose
1898  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1899  // choose the node with the lowest MOV.
1900  while (!R.empty()) {
1901  SUnit *maxHeight = nullptr;
1902  for (SUnit *I : R) {
1903  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1904  maxHeight = I;
1905  else if (getHeight(I) == getHeight(maxHeight) &&
1907  maxHeight = I;
1908  else if (getHeight(I) == getHeight(maxHeight) &&
1910  getZeroLatencyHeight(maxHeight) &&
1911  getMOV(I) < getMOV(maxHeight))
1912  maxHeight = I;
1913  }
1914  NodeOrder.insert(maxHeight);
1915  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1916  R.remove(maxHeight);
1917  for (const auto &I : maxHeight->Succs) {
1918  if (Nodes.count(I.getSUnit()) == 0)
1919  continue;
1920  if (NodeOrder.contains(I.getSUnit()))
1921  continue;
1922  if (ignoreDependence(I, false))
1923  continue;
1924  R.insert(I.getSUnit());
1925  }
1926  // Back-edges are predecessors with an anti-dependence.
1927  for (const auto &I : maxHeight->Preds) {
1928  if (I.getKind() != SDep::Anti)
1929  continue;
1930  if (Nodes.count(I.getSUnit()) == 0)
1931  continue;
1932  if (NodeOrder.contains(I.getSUnit()))
1933  continue;
1934  R.insert(I.getSUnit());
1935  }
1936  }
1937  Order = BottomUp;
1938  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1940  if (pred_L(NodeOrder, N, &Nodes))
1941  R.insert(N.begin(), N.end());
1942  } else {
1943  // Choose the node with the maximum depth. If more than one, choose
1944  // the node with the maximum ZeroLatencyDepth. If still more than one,
1945  // choose the node with the lowest MOV.
1946  while (!R.empty()) {
1947  SUnit *maxDepth = nullptr;
1948  for (SUnit *I : R) {
1949  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1950  maxDepth = I;
1951  else if (getDepth(I) == getDepth(maxDepth) &&
1953  maxDepth = I;
1954  else if (getDepth(I) == getDepth(maxDepth) &&
1955  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1956  getMOV(I) < getMOV(maxDepth))
1957  maxDepth = I;
1958  }
1959  NodeOrder.insert(maxDepth);
1960  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1961  R.remove(maxDepth);
1962  if (Nodes.isExceedSU(maxDepth)) {
1963  Order = TopDown;
1964  R.clear();
1965  R.insert(Nodes.getNode(0));
1966  break;
1967  }
1968  for (const auto &I : maxDepth->Preds) {
1969  if (Nodes.count(I.getSUnit()) == 0)
1970  continue;
1971  if (NodeOrder.contains(I.getSUnit()))
1972  continue;
1973  R.insert(I.getSUnit());
1974  }
1975  // Back-edges are predecessors with an anti-dependence.
1976  for (const auto &I : maxDepth->Succs) {
1977  if (I.getKind() != SDep::Anti)
1978  continue;
1979  if (Nodes.count(I.getSUnit()) == 0)
1980  continue;
1981  if (NodeOrder.contains(I.getSUnit()))
1982  continue;
1983  R.insert(I.getSUnit());
1984  }
1985  }
1986  Order = TopDown;
1987  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1989  if (succ_L(NodeOrder, N, &Nodes))
1990  R.insert(N.begin(), N.end());
1991  }
1992  }
1993  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1994  }
1995 
1996  LLVM_DEBUG({
1997  dbgs() << "Node order: ";
1998  for (SUnit *I : NodeOrder)
1999  dbgs() << " " << I->NodeNum << " ";
2000  dbgs() << "\n";
2001  });
2002 }
2003 
2004 /// Process the nodes in the computed order and create the pipelined schedule
2005 /// of the instructions, if possible. Return true if a schedule is found.
2006 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2007 
2008  if (NodeOrder.empty()){
2009  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2010  return false;
2011  }
2012 
2013  bool scheduleFound = false;
2014  // Keep increasing II until a valid schedule is found.
2015  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2016  Schedule.reset();
2017  Schedule.setInitiationInterval(II);
2018  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2019 
2020  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2021  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2022  do {
2023  SUnit *SU = *NI;
2024 
2025  // Compute the schedule time for the instruction, which is based
2026  // upon the scheduled time for any predecessors/successors.
2027  int EarlyStart = INT_MIN;
2028  int LateStart = INT_MAX;
2029  // These values are set when the size of the schedule window is limited
2030  // due to chain dependences.
2031  int SchedEnd = INT_MAX;
2032  int SchedStart = INT_MIN;
2033  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2034  II, this);
2035  LLVM_DEBUG({
2036  dbgs() << "\n";
2037  dbgs() << "Inst (" << SU->NodeNum << ") ";
2038  SU->getInstr()->dump();
2039  dbgs() << "\n";
2040  });
2041  LLVM_DEBUG({
2042  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2043  LateStart, SchedEnd, SchedStart);
2044  });
2045 
2046  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2047  SchedStart > LateStart)
2048  scheduleFound = false;
2049  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2050  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2051  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2052  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2053  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2054  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2055  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2056  SchedEnd =
2057  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2058  // When scheduling a Phi it is better to start at the late cycle and go
2059  // backwards. The default order may insert the Phi too far away from
2060  // its first dependence.
2061  if (SU->getInstr()->isPHI())
2062  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2063  else
2064  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2065  } else {
2066  int FirstCycle = Schedule.getFirstCycle();
2067  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2068  FirstCycle + getASAP(SU) + II - 1, II);
2069  }
2070  // Even if we find a schedule, make sure the schedule doesn't exceed the
2071  // allowable number of stages. We keep trying if this happens.
2072  if (scheduleFound)
2073  if (SwpMaxStages > -1 &&
2074  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2075  scheduleFound = false;
2076 
2077  LLVM_DEBUG({
2078  if (!scheduleFound)
2079  dbgs() << "\tCan't schedule\n";
2080  });
2081  } while (++NI != NE && scheduleFound);
2082 
2083  // If a schedule is found, check if it is a valid schedule too.
2084  if (scheduleFound)
2085  scheduleFound = Schedule.isValidSchedule(this);
2086  }
2087 
2088  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2089  << " (II=" << Schedule.getInitiationInterval()
2090  << ")\n");
2091 
2092  if (scheduleFound) {
2093  Schedule.finalizeSchedule(this);
2094  Pass.ORE->emit([&]() {
2096  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2097  << "Schedule found with Initiation Interval: "
2098  << ore::NV("II", Schedule.getInitiationInterval())
2099  << ", MaxStageCount: "
2100  << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
2101  });
2102  } else
2103  Schedule.reset();
2104 
2105  return scheduleFound && Schedule.getMaxStageCount() > 0;
2106 }
2107 
2108 /// Return true if we can compute the amount the instruction changes
2109 /// during each iteration. Set Delta to the amount of the change.
2110 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2112  const MachineOperand *BaseOp;
2113  int64_t Offset;
2114  bool OffsetIsScalable;
2115  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2116  return false;
2117 
2118  // FIXME: This algorithm assumes instructions have fixed-size offsets.
2119  if (OffsetIsScalable)
2120  return false;
2121 
2122  if (!BaseOp->isReg())
2123  return false;
2124 
2125  Register BaseReg = BaseOp->getReg();
2126 
2128  // Check if there is a Phi. If so, get the definition in the loop.
2129  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2130  if (BaseDef && BaseDef->isPHI()) {
2131  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2132  BaseDef = MRI.getVRegDef(BaseReg);
2133  }
2134  if (!BaseDef)
2135  return false;
2136 
2137  int D = 0;
2138  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2139  return false;
2140 
2141  Delta = D;
2142  return true;
2143 }
2144 
2145 /// Check if we can change the instruction to use an offset value from the
2146 /// previous iteration. If so, return true and set the base and offset values
2147 /// so that we can rewrite the load, if necessary.
2148 /// v1 = Phi(v0, v3)
2149 /// v2 = load v1, 0
2150 /// v3 = post_store v1, 4, x
2151 /// This function enables the load to be rewritten as v2 = load v3, 4.
2152 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2153  unsigned &BasePos,
2154  unsigned &OffsetPos,
2155  unsigned &NewBase,
2156  int64_t &Offset) {
2157  // Get the load instruction.
2158  if (TII->isPostIncrement(*MI))
2159  return false;
2160  unsigned BasePosLd, OffsetPosLd;
2161  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2162  return false;
2163  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2164 
2165  // Look for the Phi instruction.
2166  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2167  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2168  if (!Phi || !Phi->isPHI())
2169  return false;
2170  // Get the register defined in the loop block.
2171  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2172  if (!PrevReg)
2173  return false;
2174 
2175  // Check for the post-increment load/store instruction.
2176  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2177  if (!PrevDef || PrevDef == MI)
2178  return false;
2179 
2180  if (!TII->isPostIncrement(*PrevDef))
2181  return false;
2182 
2183  unsigned BasePos1 = 0, OffsetPos1 = 0;
2184  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2185  return false;
2186 
2187  // Make sure that the instructions do not access the same memory location in
2188  // the next iteration.
2189  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2190  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2191  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2192  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2193  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2194  MF.deleteMachineInstr(NewMI);
2195  if (!Disjoint)
2196  return false;
2197 
2198  // Set the return value once we determine that we return true.
2199  BasePos = BasePosLd;
2200  OffsetPos = OffsetPosLd;
2201  NewBase = PrevReg;
2202  Offset = StoreOffset;
2203  return true;
2204 }
2205 
2206 /// Apply changes to the instruction if needed. The changes are need
2207 /// to improve the scheduling and depend up on the final schedule.
2209  SMSchedule &Schedule) {
2210  SUnit *SU = getSUnit(MI);
2212  InstrChanges.find(SU);
2213  if (It != InstrChanges.end()) {
2214  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2215  unsigned BasePos, OffsetPos;
2216  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2217  return;
2218  Register BaseReg = MI->getOperand(BasePos).getReg();
2219  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2220  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2221  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2222  int BaseStageNum = Schedule.stageScheduled(SU);
2223  int BaseCycleNum = Schedule.cycleScheduled(SU);
2224  if (BaseStageNum < DefStageNum) {
2225  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2226  int OffsetDiff = DefStageNum - BaseStageNum;
2227  if (DefCycleNum < BaseCycleNum) {
2228  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2229  if (OffsetDiff > 0)
2230  --OffsetDiff;
2231  }
2232  int64_t NewOffset =
2233  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2234  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2235  SU->setInstr(NewMI);
2236  MISUnitMap[NewMI] = SU;
2237  NewMIs[MI] = NewMI;
2238  }
2239  }
2240 }
2241 
2242 /// Return the instruction in the loop that defines the register.
2243 /// If the definition is a Phi, then follow the Phi operand to
2244 /// the instruction in the loop.
2245 MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2248  while (Def->isPHI()) {
2249  if (!Visited.insert(Def).second)
2250  break;
2251  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2252  if (Def->getOperand(i + 1).getMBB() == BB) {
2253  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2254  break;
2255  }
2256  }
2257  return Def;
2258 }
2259 
2260 /// Return true for an order or output dependence that is loop carried
2261 /// potentially. A dependence is loop carried if the destination defines a valu
2262 /// that may be used or defined by the source in a subsequent iteration.
2264  bool isSucc) {
2265  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2266  Dep.isArtificial())
2267  return false;
2268 
2269  if (!SwpPruneLoopCarried)
2270  return true;
2271 
2272  if (Dep.getKind() == SDep::Output)
2273  return true;
2274 
2275  MachineInstr *SI = Source->getInstr();
2276  MachineInstr *DI = Dep.getSUnit()->getInstr();
2277  if (!isSucc)
2278  std::swap(SI, DI);
2279  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2280 
2281  // Assume ordered loads and stores may have a loop carried dependence.
2282  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2283  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2284  SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2285  return true;
2286 
2287  // Only chain dependences between a load and store can be loop carried.
2288  if (!DI->mayStore() || !SI->mayLoad())
2289  return false;
2290 
2291  unsigned DeltaS, DeltaD;
2292  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2293  return true;
2294 
2295  const MachineOperand *BaseOpS, *BaseOpD;
2296  int64_t OffsetS, OffsetD;
2297  bool OffsetSIsScalable, OffsetDIsScalable;
2299  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2300  TRI) ||
2301  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2302  TRI))
2303  return true;
2304 
2305  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2306  "Expected offsets to be byte offsets");
2307 
2308  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2309  return true;
2310 
2311  // Check that the base register is incremented by a constant value for each
2312  // iteration.
2313  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2314  if (!Def || !Def->isPHI())
2315  return true;
2316  unsigned InitVal = 0;
2317  unsigned LoopVal = 0;
2318  getPhiRegs(*Def, BB, InitVal, LoopVal);
2319  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2320  int D = 0;
2321  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2322  return true;
2323 
2324  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2325  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2326 
2327  // This is the main test, which checks the offset values and the loop
2328  // increment value to determine if the accesses may be loop carried.
2329  if (AccessSizeS == MemoryLocation::UnknownSize ||
2330  AccessSizeD == MemoryLocation::UnknownSize)
2331  return true;
2332 
2333  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2334  return true;
2335 
2336  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2337 }
2338 
2339 void SwingSchedulerDAG::postprocessDAG() {
2340  for (auto &M : Mutations)
2341  M->apply(this);
2342 }
2343 
2344 /// Try to schedule the node at the specified StartCycle and continue
2345 /// until the node is schedule or the EndCycle is reached. This function
2346 /// returns true if the node is scheduled. This routine may search either
2347 /// forward or backward for a place to insert the instruction based upon
2348 /// the relative values of StartCycle and EndCycle.
2349 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2350  bool forward = true;
2351  LLVM_DEBUG({
2352  dbgs() << "Trying to insert node between " << StartCycle << " and "
2353  << EndCycle << " II: " << II << "\n";
2354  });
2355  if (StartCycle > EndCycle)
2356  forward = false;
2357 
2358  // The terminating condition depends on the direction.
2359  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2360  for (int curCycle = StartCycle; curCycle != termCycle;
2361  forward ? ++curCycle : --curCycle) {
2362 
2363  // Add the already scheduled instructions at the specified cycle to the
2364  // DFA.
2365  ProcItinResources.clearResources();
2366  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2367  checkCycle <= LastCycle; checkCycle += II) {
2368  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2369 
2370  for (SUnit *CI : cycleInstrs) {
2371  if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
2372  continue;
2373  assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
2374  "These instructions have already been scheduled.");
2375  ProcItinResources.reserveResources(*CI->getInstr());
2376  }
2377  }
2378  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2379  ProcItinResources.canReserveResources(*SU->getInstr())) {
2380  LLVM_DEBUG({
2381  dbgs() << "\tinsert at cycle " << curCycle << " ";
2382  SU->getInstr()->dump();
2383  });
2384 
2385  ScheduledInstrs[curCycle].push_back(SU);
2386  InstrToCycle.insert(std::make_pair(SU, curCycle));
2387  if (curCycle > LastCycle)
2388  LastCycle = curCycle;
2389  if (curCycle < FirstCycle)
2390  FirstCycle = curCycle;
2391  return true;
2392  }
2393  LLVM_DEBUG({
2394  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2395  SU->getInstr()->dump();
2396  });
2397  }
2398  return false;
2399 }
2400 
2401 // Return the cycle of the earliest scheduled instruction in the chain.
2403  SmallPtrSet<SUnit *, 8> Visited;
2404  SmallVector<SDep, 8> Worklist;
2405  Worklist.push_back(Dep);
2406  int EarlyCycle = INT_MAX;
2407  while (!Worklist.empty()) {
2408  const SDep &Cur = Worklist.pop_back_val();
2409  SUnit *PrevSU = Cur.getSUnit();
2410  if (Visited.count(PrevSU))
2411  continue;
2412  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2413  if (it == InstrToCycle.end())
2414  continue;
2415  EarlyCycle = std::min(EarlyCycle, it->second);
2416  for (const auto &PI : PrevSU->Preds)
2417  if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
2418  Worklist.push_back(PI);
2419  Visited.insert(PrevSU);
2420  }
2421  return EarlyCycle;
2422 }
2423 
2424 // Return the cycle of the latest scheduled instruction in the chain.
2426  SmallPtrSet<SUnit *, 8> Visited;
2427  SmallVector<SDep, 8> Worklist;
2428  Worklist.push_back(Dep);
2429  int LateCycle = INT_MIN;
2430  while (!Worklist.empty()) {
2431  const SDep &Cur = Worklist.pop_back_val();
2432  SUnit *SuccSU = Cur.getSUnit();
2433  if (Visited.count(SuccSU))
2434  continue;
2435  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2436  if (it == InstrToCycle.end())
2437  continue;
2438  LateCycle = std::max(LateCycle, it->second);
2439  for (const auto &SI : SuccSU->Succs)
2440  if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
2441  Worklist.push_back(SI);
2442  Visited.insert(SuccSU);
2443  }
2444  return LateCycle;
2445 }
2446 
2447 /// If an instruction has a use that spans multiple iterations, then
2448 /// return true. These instructions are characterized by having a back-ege
2449 /// to a Phi, which contains a reference to another Phi.
2451  for (auto &P : SU->Preds)
2452  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2453  for (auto &S : P.getSUnit()->Succs)
2454  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2455  return P.getSUnit();
2456  return nullptr;
2457 }
2458 
2459 /// Compute the scheduling start slot for the instruction. The start slot
2460 /// depends on any predecessor or successor nodes scheduled already.
2461 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2462  int *MinEnd, int *MaxStart, int II,
2463  SwingSchedulerDAG *DAG) {
2464  // Iterate over each instruction that has been scheduled already. The start
2465  // slot computation depends on whether the previously scheduled instruction
2466  // is a predecessor or successor of the specified instruction.
2467  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2468 
2469  // Iterate over each instruction in the current cycle.
2470  for (SUnit *I : getInstructions(cycle)) {
2471  // Because we're processing a DAG for the dependences, we recognize
2472  // the back-edge in recurrences by anti dependences.
2473  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2474  const SDep &Dep = SU->Preds[i];
2475  if (Dep.getSUnit() == I) {
2476  if (!DAG->isBackedge(SU, Dep)) {
2477  int EarlyStart = cycle + Dep.getLatency() -
2478  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2479  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2480  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2481  int End = earliestCycleInChain(Dep) + (II - 1);
2482  *MinEnd = std::min(*MinEnd, End);
2483  }
2484  } else {
2485  int LateStart = cycle - Dep.getLatency() +
2486  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2487  *MinLateStart = std::min(*MinLateStart, LateStart);
2488  }
2489  }
2490  // For instruction that requires multiple iterations, make sure that
2491  // the dependent instruction is not scheduled past the definition.
2492  SUnit *BE = multipleIterations(I, DAG);
2493  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2494  !SU->isPred(I))
2495  *MinLateStart = std::min(*MinLateStart, cycle);
2496  }
2497  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2498  if (SU->Succs[i].getSUnit() == I) {
2499  const SDep &Dep = SU->Succs[i];
2500  if (!DAG->isBackedge(SU, Dep)) {
2501  int LateStart = cycle - Dep.getLatency() +
2502  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2503  *MinLateStart = std::min(*MinLateStart, LateStart);
2504  if (DAG->isLoopCarriedDep(SU, Dep)) {
2505  int Start = latestCycleInChain(Dep) + 1 - II;
2506  *MaxStart = std::max(*MaxStart, Start);
2507  }
2508  } else {
2509  int EarlyStart = cycle + Dep.getLatency() -
2510  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2511  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2512  }
2513  }
2514  }
2515  }
2516  }
2517 }
2518 
2519 /// Order the instructions within a cycle so that the definitions occur
2520 /// before the uses. Returns true if the instruction is added to the start
2521 /// of the list, or false if added to the end.
2523  std::deque<SUnit *> &Insts) {
2524  MachineInstr *MI = SU->getInstr();
2525  bool OrderBeforeUse = false;
2526  bool OrderAfterDef = false;
2527  bool OrderBeforeDef = false;
2528  unsigned MoveDef = 0;
2529  unsigned MoveUse = 0;
2530  int StageInst1 = stageScheduled(SU);
2531 
2532  unsigned Pos = 0;
2533  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2534  ++I, ++Pos) {
2535  for (MachineOperand &MO : MI->operands()) {
2536  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2537  continue;
2538 
2539  Register Reg = MO.getReg();
2540  unsigned BasePos, OffsetPos;
2541  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2542  if (MI->getOperand(BasePos).getReg() == Reg)
2543  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2544  Reg = NewReg;
2545  bool Reads, Writes;
2546  std::tie(Reads, Writes) =
2547  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2548  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2549  OrderBeforeUse = true;
2550  if (MoveUse == 0)
2551  MoveUse = Pos;
2552  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2553  // Add the instruction after the scheduled instruction.
2554  OrderAfterDef = true;
2555  MoveDef = Pos;
2556  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2557  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2558  OrderBeforeUse = true;
2559  if (MoveUse == 0)
2560  MoveUse = Pos;
2561  } else {
2562  OrderAfterDef = true;
2563  MoveDef = Pos;
2564  }
2565  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2566  OrderBeforeUse = true;
2567  if (MoveUse == 0)
2568  MoveUse = Pos;
2569  if (MoveUse != 0) {
2570  OrderAfterDef = true;
2571  MoveDef = Pos - 1;
2572  }
2573  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2574  // Add the instruction before the scheduled instruction.
2575  OrderBeforeUse = true;
2576  if (MoveUse == 0)
2577  MoveUse = Pos;
2578  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2579  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2580  if (MoveUse == 0) {
2581  OrderBeforeDef = true;
2582  MoveUse = Pos;
2583  }
2584  }
2585  }
2586  // Check for order dependences between instructions. Make sure the source
2587  // is ordered before the destination.
2588  for (auto &S : SU->Succs) {
2589  if (S.getSUnit() != *I)
2590  continue;
2591  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2592  OrderBeforeUse = true;
2593  if (Pos < MoveUse)
2594  MoveUse = Pos;
2595  }
2596  // We did not handle HW dependences in previous for loop,
2597  // and we normally set Latency = 0 for Anti deps,
2598  // so may have nodes in same cycle with Anti denpendent on HW regs.
2599  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2600  OrderBeforeUse = true;
2601  if ((MoveUse == 0) || (Pos < MoveUse))
2602  MoveUse = Pos;
2603  }
2604  }
2605  for (auto &P : SU->Preds) {
2606  if (P.getSUnit() != *I)
2607  continue;
2608  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2609  OrderAfterDef = true;
2610  MoveDef = Pos;
2611  }
2612  }
2613  }
2614 
2615  // A circular dependence.
2616  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2617  OrderBeforeUse = false;
2618 
2619  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2620  // to a loop-carried dependence.
2621  if (OrderBeforeDef)
2622  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2623 
2624  // The uncommon case when the instruction order needs to be updated because
2625  // there is both a use and def.
2626  if (OrderBeforeUse && OrderAfterDef) {
2627  SUnit *UseSU = Insts.at(MoveUse);
2628  SUnit *DefSU = Insts.at(MoveDef);
2629  if (MoveUse > MoveDef) {
2630  Insts.erase(Insts.begin() + MoveUse);
2631  Insts.erase(Insts.begin() + MoveDef);
2632  } else {
2633  Insts.erase(Insts.begin() + MoveDef);
2634  Insts.erase(Insts.begin() + MoveUse);
2635  }
2636  orderDependence(SSD, UseSU, Insts);
2637  orderDependence(SSD, SU, Insts);
2638  orderDependence(SSD, DefSU, Insts);
2639  return;
2640  }
2641  // Put the new instruction first if there is a use in the list. Otherwise,
2642  // put it at the end of the list.
2643  if (OrderBeforeUse)
2644  Insts.push_front(SU);
2645  else
2646  Insts.push_back(SU);
2647 }
2648 
2649 /// Return true if the scheduled Phi has a loop carried operand.
2651  if (!Phi.isPHI())
2652  return false;
2653  assert(Phi.isPHI() && "Expecting a Phi.");
2654  SUnit *DefSU = SSD->getSUnit(&Phi);
2655  unsigned DefCycle = cycleScheduled(DefSU);
2656  int DefStage = stageScheduled(DefSU);
2657 
2658  unsigned InitVal = 0;
2659  unsigned LoopVal = 0;
2660  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2661  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2662  if (!UseSU)
2663  return true;
2664  if (UseSU->getInstr()->isPHI())
2665  return true;
2666  unsigned LoopCycle = cycleScheduled(UseSU);
2667  int LoopStage = stageScheduled(UseSU);
2668  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2669 }
2670 
2671 /// Return true if the instruction is a definition that is loop carried
2672 /// and defines the use on the next iteration.
2673 /// v1 = phi(v2, v3)
2674 /// (Def) v3 = op v1
2675 /// (MO) = v1
2676 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2677 /// register.
2680  if (!MO.isReg())
2681  return false;
2682  if (Def->isPHI())
2683  return false;
2684  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2685  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2686  return false;
2687  if (!isLoopCarried(SSD, *Phi))
2688  return false;
2689  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2690  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2691  MachineOperand &DMO = Def->getOperand(i);
2692  if (!DMO.isReg() || !DMO.isDef())
2693  continue;
2694  if (DMO.getReg() == LoopReg)
2695  return true;
2696  }
2697  return false;
2698 }
2699 
2700 // Check if the generated schedule is valid. This function checks if
2701 // an instruction that uses a physical register is scheduled in a
2702 // different stage than the definition. The pipeliner does not handle
2703 // physical register values that may cross a basic block boundary.
2705  for (SUnit &SU : SSD->SUnits) {
2706  if (!SU.hasPhysRegDefs)
2707  continue;
2708  int StageDef = stageScheduled(&SU);
2709  assert(StageDef != -1 && "Instruction should have been scheduled.");
2710  for (auto &SI : SU.Succs)
2711  if (SI.isAssignedRegDep())
2712  if (Register::isPhysicalRegister(SI.getReg()))
2713  if (stageScheduled(SI.getSUnit()) != StageDef)
2714  return false;
2715  }
2716  return true;
2717 }
2718 
2719 /// A property of the node order in swing-modulo-scheduling is
2720 /// that for nodes outside circuits the following holds:
2721 /// none of them is scheduled after both a successor and a
2722 /// predecessor.
2723 /// The method below checks whether the property is met.
2724 /// If not, debug information is printed and statistics information updated.
2725 /// Note that we do not use an assert statement.
2726 /// The reason is that although an invalid node oder may prevent
2727 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2728 /// it does not lead to the generation of incorrect code.
2729 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2730 
2731  // a sorted vector that maps each SUnit to its index in the NodeOrder
2732  typedef std::pair<SUnit *, unsigned> UnitIndex;
2733  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2734 
2735  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2736  Indices.push_back(std::make_pair(NodeOrder[i], i));
2737 
2738  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2739  return std::get<0>(i1) < std::get<0>(i2);
2740  };
2741 
2742  // sort, so that we can perform a binary search
2743  llvm::sort(Indices, CompareKey);
2744 
2745  bool Valid = true;
2746  (void)Valid;
2747  // for each SUnit in the NodeOrder, check whether
2748  // it appears after both a successor and a predecessor
2749  // of the SUnit. If this is the case, and the SUnit
2750  // is not part of circuit, then the NodeOrder is not
2751  // valid.
2752  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2753  SUnit *SU = NodeOrder[i];
2754  unsigned Index = i;
2755 
2756  bool PredBefore = false;
2757  bool SuccBefore = false;
2758 
2759  SUnit *Succ;
2760  SUnit *Pred;
2761  (void)Succ;
2762  (void)Pred;
2763 
2764  for (SDep &PredEdge : SU->Preds) {
2765  SUnit *PredSU = PredEdge.getSUnit();
2766  unsigned PredIndex = std::get<1>(
2767  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2768  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2769  PredBefore = true;
2770  Pred = PredSU;
2771  break;
2772  }
2773  }
2774 
2775  for (SDep &SuccEdge : SU->Succs) {
2776  SUnit *SuccSU = SuccEdge.getSUnit();
2777  // Do not process a boundary node, it was not included in NodeOrder,
2778  // hence not in Indices either, call to std::lower_bound() below will
2779  // return Indices.end().
2780  if (SuccSU->isBoundaryNode())
2781  continue;
2782  unsigned SuccIndex = std::get<1>(
2783  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2784  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2785  SuccBefore = true;
2786  Succ = SuccSU;
2787  break;
2788  }
2789  }
2790 
2791  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2792  // instructions in circuits are allowed to be scheduled
2793  // after both a successor and predecessor.
2794  bool InCircuit = llvm::any_of(
2795  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2796  if (InCircuit)
2797  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2798  else {
2799  Valid = false;
2800  NumNodeOrderIssues++;
2801  LLVM_DEBUG(dbgs() << "Predecessor ";);
2802  }
2803  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2804  << " are scheduled before node " << SU->NodeNum
2805  << "\n";);
2806  }
2807  }
2808 
2809  LLVM_DEBUG({
2810  if (!Valid)
2811  dbgs() << "Invalid node order found!\n";
2812  });
2813 }
2814 
2815 /// Attempt to fix the degenerate cases when the instruction serialization
2816 /// causes the register lifetimes to overlap. For example,
2817 /// p' = store_pi(p, b)
2818 /// = load p, offset
2819 /// In this case p and p' overlap, which means that two registers are needed.
2820 /// Instead, this function changes the load to use p' and updates the offset.
2821 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2822  unsigned OverlapReg = 0;
2823  unsigned NewBaseReg = 0;
2824  for (SUnit *SU : Instrs) {
2825  MachineInstr *MI = SU->getInstr();
2826  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2827  const MachineOperand &MO = MI->getOperand(i);
2828  // Look for an instruction that uses p. The instruction occurs in the
2829  // same cycle but occurs later in the serialized order.
2830  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2831  // Check that the instruction appears in the InstrChanges structure,
2832  // which contains instructions that can have the offset updated.
2834  InstrChanges.find(SU);
2835  if (It != InstrChanges.end()) {
2836  unsigned BasePos, OffsetPos;
2837  // Update the base register and adjust the offset.
2838  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2839  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2840  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2841  int64_t NewOffset =
2842  MI->getOperand(OffsetPos).getImm() - It->second.second;
2843  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2844  SU->setInstr(NewMI);
2845  MISUnitMap[NewMI] = SU;
2846  NewMIs[MI] = NewMI;
2847  }
2848  }
2849  OverlapReg = 0;
2850  NewBaseReg = 0;
2851  break;
2852  }
2853  // Look for an instruction of the form p' = op(p), which uses and defines
2854  // two virtual registers that get allocated to the same physical register.
2855  unsigned TiedUseIdx = 0;
2856  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2857  // OverlapReg is p in the example above.
2858  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2859  // NewBaseReg is p' in the example above.
2860  NewBaseReg = MI->getOperand(i).getReg();
2861  break;
2862  }
2863  }
2864  }
2865 }
2866 
2867 /// After the schedule has been formed, call this function to combine
2868 /// the instructions from the different stages/cycles. That is, this
2869 /// function creates a schedule that represents a single iteration.
2871  // Move all instructions to the first stage from later stages.
2872  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2873  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2874  ++stage) {
2875  std::deque<SUnit *> &cycleInstrs =
2876  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2877  for (SUnit *SU : llvm::reverse(cycleInstrs))
2878  ScheduledInstrs[cycle].push_front(SU);
2879  }
2880  }
2881 
2882  // Erase all the elements in the later stages. Only one iteration should
2883  // remain in the scheduled list, and it contains all the instructions.
2884  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2885  ScheduledInstrs.erase(cycle);
2886 
2887  // Change the registers in instruction as specified in the InstrChanges
2888  // map. We need to use the new registers to create the correct order.
2889  for (const SUnit &SU : SSD->SUnits)
2890  SSD->applyInstrChange(SU.getInstr(), *this);
2891 
2892  // Reorder the instructions in each cycle to fix and improve the
2893  // generated code.
2894  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2895  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2896  std::deque<SUnit *> newOrderPhi;
2897  for (SUnit *SU : cycleInstrs) {
2898  if (SU->getInstr()->isPHI())
2899  newOrderPhi.push_back(SU);
2900  }
2901  std::deque<SUnit *> newOrderI;
2902  for (SUnit *SU : cycleInstrs) {
2903  if (!SU->getInstr()->isPHI())
2904  orderDependence(SSD, SU, newOrderI);
2905  }
2906  // Replace the old order with the new order.
2907  cycleInstrs.swap(newOrderPhi);
2908  llvm::append_range(cycleInstrs, newOrderI);
2909  SSD->fixupRegisterOverlaps(cycleInstrs);
2910  }
2911 
2912  LLVM_DEBUG(dump(););
2913 }
2914 
2915 void NodeSet::print(raw_ostream &os) const {
2916  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2917  << " depth " << MaxDepth << " col " << Colocate << "\n";
2918  for (const auto &I : Nodes)
2919  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2920  os << "\n";
2921 }
2922 
2923 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2924 /// Print the schedule information to the given output.
2926  // Iterate over each cycle.
2927  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2928  // Iterate over each instruction in the cycle.
2929  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2930  for (SUnit *CI : cycleInstrs->second) {
2931  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2932  os << "(" << CI->NodeNum << ") ";
2933  CI->getInstr()->print(os);
2934  os << "\n";
2935  }
2936  }
2937 }
2938 
2939 /// Utility function used for debugging to print the schedule.
2942 
2943 #endif
2944 
2946  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2947  unsigned ProcResourceID = 0;
2948 
2949  // We currently limit the resource kinds to 64 and below so that we can use
2950  // uint64_t for Masks
2951  assert(SM.getNumProcResourceKinds() < 64 &&
2952  "Too many kinds of resources, unsupported");
2953  // Create a unique bitmask for every processor resource unit.
2954  // Skip resource at index 0, since it always references 'InvalidUnit'.
2955  Masks.resize(SM.getNumProcResourceKinds());
2956  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2957  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2958  if (Desc.SubUnitsIdxBegin)
2959  continue;
2960  Masks[I] = 1ULL << ProcResourceID;
2961  ProcResourceID++;
2962  }
2963  // Create a unique bitmask for every processor resource group.
2964  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2965  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2966  if (!Desc.SubUnitsIdxBegin)
2967  continue;
2968  Masks[I] = 1ULL << ProcResourceID;
2969  for (unsigned U = 0; U < Desc.NumUnits; ++U)
2970  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
2971  ProcResourceID++;
2972  }
2973  LLVM_DEBUG({
2974  if (SwpShowResMask) {
2975  dbgs() << "ProcResourceDesc:\n";
2976  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2977  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
2978  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2979  ProcResource->Name, I, Masks[I],
2980  ProcResource->NumUnits);
2981  }
2982  dbgs() << " -----------------\n";
2983  }
2984  });
2985 }
2986 
2988 
2989  LLVM_DEBUG({
2990  if (SwpDebugResource)
2991  dbgs() << "canReserveResources:\n";
2992  });
2993  if (UseDFA)
2994  return DFAResources->canReserveResources(MID);
2995 
2996  unsigned InsnClass = MID->getSchedClass();
2997  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
2998  if (!SCDesc->isValid()) {
2999  LLVM_DEBUG({
3000  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3001  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3002  });
3003  return true;
3004  }
3005 
3006  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
3007  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
3008  for (; I != E; ++I) {
3009  if (!I->Cycles)
3010  continue;
3011  const MCProcResourceDesc *ProcResource =
3012  SM.getProcResource(I->ProcResourceIdx);
3013  unsigned NumUnits = ProcResource->NumUnits;
3014  LLVM_DEBUG({
3015  if (SwpDebugResource)
3016  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3017  ProcResource->Name, I->ProcResourceIdx,
3018  ProcResourceCount[I->ProcResourceIdx], NumUnits,
3019  I->Cycles);
3020  });
3021  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
3022  return false;
3023  }
3024  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
3025  return true;
3026 }
3027 
3029  LLVM_DEBUG({
3030  if (SwpDebugResource)
3031  dbgs() << "reserveResources:\n";
3032  });
3033  if (UseDFA)
3034  return DFAResources->reserveResources(MID);
3035 
3036  unsigned InsnClass = MID->getSchedClass();
3037  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3038  if (!SCDesc->isValid()) {
3039  LLVM_DEBUG({
3040  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3041  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3042  });
3043  return;
3044  }
3045  for (const MCWriteProcResEntry &PRE :
3046  make_range(STI->getWriteProcResBegin(SCDesc),
3047  STI->getWriteProcResEnd(SCDesc))) {
3048  if (!PRE.Cycles)
3049  continue;
3050  ++ProcResourceCount[PRE.ProcResourceIdx];
3051  LLVM_DEBUG({
3052  if (SwpDebugResource) {
3053  const MCProcResourceDesc *ProcResource =
3054  SM.getProcResource(PRE.ProcResourceIdx);
3055  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3056  ProcResource->Name, PRE.ProcResourceIdx,
3057  ProcResourceCount[PRE.ProcResourceIdx],
3058  ProcResource->NumUnits, PRE.Cycles);
3059  }
3060  });
3061  }
3062  LLVM_DEBUG({
3063  if (SwpDebugResource)
3064  dbgs() << "reserveResources: done!\n\n";
3065  });
3066 }
3067 
3069  return canReserveResources(&MI.getDesc());
3070 }
3071 
3073  return reserveResources(&MI.getDesc());
3074 }
3075 
3077  if (UseDFA)
3078  return DFAResources->clearResources();
3079  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3080 }
i
i
Definition: README.txt:29
SDAG
QP Compare Ordered outs ins xscmpudp No SDAG
Definition: README_P9.txt:301
llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:106
computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1591
llvm::cl::Option::getPosition
unsigned getPosition() const
Definition: CommandLine.h:305
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:170
EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:1803
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1070
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:507
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:71
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:79
llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:229
llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:650
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:206
llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:2915
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:175
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:2941
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:78
SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MCInstrDesc.h
SetOperations.h
llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:62
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:724
llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:708
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1702
llvm::MCInstrDesc::isPseudo
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:264
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:76
llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:184
llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:371
llvm::MapVector::clear
void clear()
Definition: MapVector.h:88
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1175
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:209
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:723
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:612
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:227
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:677
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:634
MapVector.h
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:561
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:409
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:471
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:269
ValueTracking.h
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:68
MachineBasicBlock.h
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:47
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:655
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1372
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::DenseMapIterator
Definition: DenseMap.h:56
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:486
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1382
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:357
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::RegisterMaskPair
Definition: RegisterPressure.h:39
llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2263
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2450
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1424
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1285
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:233
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:359
llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2349
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:642
llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:279
llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:80
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:66
computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1564
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:258
SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1564
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:141
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:127
RegisterPressure.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:208
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1359
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1143
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:84
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1592
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:56
CommandLine.h
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
MCInstrItineraries.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1318
MachineLoopInfo.h
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:81
llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:2940
llvm::AAResults
Definition: AliasAnalysis.h:507
llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:250
llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:240
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:236
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:471
llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:217
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:537
DFAPacketizer.h
llvm::MachineFunction::deleteMachineInstr
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:399
llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:534
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:527
llvm::Instruction
Definition: Instruction.h:45
llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:2704
SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:108
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:607
llvm::SMSchedule::orderDependence
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2522
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1775
BitVector.h
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:708
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:518
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
DebugLoc.h
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:240
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:311
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:71
llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:365
llvm::SMSchedule::isLoopCarried
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:2650
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:96
llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:60
llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:535
llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:754
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ResourceManager::canReserveResources
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
Definition: MachinePipeliner.cpp:2987
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1354
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1137
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:641
llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2461
llvm::cl::opt< bool >
llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431
llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:528
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:208
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4325
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:445
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:147
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:48
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1675
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2160
uint64_t
LiveIntervals.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:530
llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:82
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:364
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:373
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:113
llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:63
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:213
llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:357
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
MCRegisterInfo.h
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:199
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:125
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:193
ArrayRef.h
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:946
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:82
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:50
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:661
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:243
llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:81
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:852
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:2678
isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:685
llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:363
iterator_range.h
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1249
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:115
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:258
llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:584
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1521
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:140
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1573
ScheduleDAGMutation.h
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:241
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:224
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:166
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:375
SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:68
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1599
llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:86
llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1961
llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:218
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1789
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1558
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:32
llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1166
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:353
llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:2870
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:158
llvm::ResourceManager::reserveResources
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change.
Definition: MachinePipeliner.cpp:3028
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:365
SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
llvm::MapVector::end
iterator end()
Definition: MapVector.h:71
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:58
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:219
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:239
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1277
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1727
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:975
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:607
llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1424
llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:2945
llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:220
swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1191
SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:387
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1018
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:627
llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:67
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1440
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::MachineLoop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: MachineLoopInfo.cpp:102
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1533
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:214
llvm::ResourceManager::clearResources
void clearResources()
Reset the state.
Definition: MachinePipeliner.cpp:3076
llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:438
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1791
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:579
pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1508
llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:2925
llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2208
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:69
llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2402
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:27
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:677
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:91
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:73
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1158
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:272
llvm::rdf::NodeSet
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
MachineInstrBuilder.h
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:182
succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1537
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:269
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:556
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1335
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:62
llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:78
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
LaneBitmask.h
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:361
MachineMemOperand.h
llvm::SmallVectorImpl< const Value * >
MachineOperand.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:306
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:345
pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:70
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:159
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
MachinePipeliner.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:135
llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:579
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
llvm::cl::desc
Definition: CommandLine.h:412
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
raw_ostream.h
llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2425
llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:572
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:46
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:39
MachineFunction.h
llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1038
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:2821
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::ResourceManager
Definition: MachinePipeliner.h:434
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:618
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:480
llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:210
TargetRegisterInfo.h
Debug.h
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:274
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:563
SetVector.h
llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:488
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:287
MachineDominators.h
llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:264
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364