LLVM  14.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/InitializePasses.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "pipeliner"
22 using namespace llvm;
23 
25  for (MachineInstr *MI : ScheduledInstrs)
26  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
27 }
28 
29 //===----------------------------------------------------------------------===//
30 // ModuloScheduleExpander implementation
31 //===----------------------------------------------------------------------===//
32 
33 /// Return the register values for the operands of a Phi instruction.
34 /// This function assume the instruction is a Phi.
36  unsigned &InitVal, unsigned &LoopVal) {
37  assert(Phi.isPHI() && "Expecting a Phi.");
38 
39  InitVal = 0;
40  LoopVal = 0;
41  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
42  if (Phi.getOperand(i + 1).getMBB() != Loop)
43  InitVal = Phi.getOperand(i).getReg();
44  else
45  LoopVal = Phi.getOperand(i).getReg();
46 
47  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
48 }
49 
50 /// Return the Phi register value that comes from the incoming block.
51 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
52  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
53  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
54  return Phi.getOperand(i).getReg();
55  return 0;
56 }
57 
58 /// Return the Phi register value that comes the loop block.
59 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
60  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
61  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
62  return Phi.getOperand(i).getReg();
63  return 0;
64 }
65 
67  BB = Schedule.getLoop()->getTopBlock();
68  Preheader = *BB->pred_begin();
69  if (Preheader == BB)
70  Preheader = *std::next(BB->pred_begin());
71 
72  // Iterate over the definitions in each instruction, and compute the
73  // stage difference for each use. Keep the maximum value.
74  for (MachineInstr *MI : Schedule.getInstructions()) {
75  int DefStage = Schedule.getStage(MI);
76  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
77  MachineOperand &Op = MI->getOperand(i);
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
85  EI = MRI.use_end();
86  UI != EI; ++UI) {
87  MachineOperand &UseOp = *UI;
88  MachineInstr *UseMI = UseOp.getParent();
89  int UseStage = Schedule.getStage(UseMI);
90  unsigned Diff = 0;
91  if (UseStage != -1 && UseStage >= DefStage)
92  Diff = UseStage - DefStage;
93  if (MI->isPHI()) {
94  if (isLoopCarried(*MI))
95  ++Diff;
96  else
97  PhiIsSwapped = true;
98  }
99  MaxDiff = std::max(Diff, MaxDiff);
100  }
101  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
102  }
103  }
104 
105  generatePipelinedLoop();
106 }
107 
108 void ModuloScheduleExpander::generatePipelinedLoop() {
110  assert(LoopInfo && "Must be able to analyze loop!");
111 
112  // Create a new basic block for the kernel and add it to the CFG.
114 
115  unsigned MaxStageCount = Schedule.getNumStages() - 1;
116 
117  // Remember the registers that are used in different stages. The index is
118  // the iteration, or stage, that the instruction is scheduled in. This is
119  // a map between register names in the original block and the names created
120  // in each stage of the pipelined loop.
121  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
122  InstrMapTy InstrMap;
123 
125 
126  // Generate the prolog instructions that set up the pipeline.
127  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
128  MF.insert(BB->getIterator(), KernelBB);
129 
130  // Rearrange the instructions to generate the new, pipelined loop,
131  // and update register names as needed.
132  for (MachineInstr *CI : Schedule.getInstructions()) {
133  if (CI->isPHI())
134  continue;
135  unsigned StageNum = Schedule.getStage(CI);
136  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
137  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
138  KernelBB->push_back(NewMI);
139  InstrMap[NewMI] = CI;
140  }
141 
142  // Copy any terminator instructions to the new kernel, and update
143  // names as needed.
145  E = BB->instr_end();
146  I != E; ++I) {
147  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
148  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
149  KernelBB->push_back(NewMI);
150  InstrMap[NewMI] = &*I;
151  }
152 
153  NewKernel = KernelBB;
154  KernelBB->transferSuccessors(BB);
155  KernelBB->replaceSuccessor(BB, KernelBB);
156 
157  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
158  InstrMap, MaxStageCount, MaxStageCount, false);
159  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
160  MaxStageCount, MaxStageCount, false);
161 
162  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
163 
165  // Generate the epilog instructions to complete the pipeline.
166  generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
167 
168  // We need this step because the register allocation doesn't handle some
169  // situations well, so we insert copies to help out.
170  splitLifetimes(KernelBB, EpilogBBs);
171 
172  // Remove dead instructions due to loop induction variables.
173  removeDeadInstructions(KernelBB, EpilogBBs);
174 
175  // Add branches between prolog and epilog blocks.
176  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
177 
178  delete[] VRMap;
179 }
180 
182  // Remove the original loop since it's no longer referenced.
183  for (auto &I : *BB)
185  BB->clear();
186  BB->eraseFromParent();
187 }
188 
189 /// Generate the pipeline prolog code.
190 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
191  MachineBasicBlock *KernelBB,
192  ValueMapTy *VRMap,
193  MBBVectorTy &PrologBBs) {
194  MachineBasicBlock *PredBB = Preheader;
195  InstrMapTy InstrMap;
196 
197  // Generate a basic block for each stage, not including the last stage,
198  // which will be generated in the kernel. Each basic block may contain
199  // instructions from multiple stages/iterations.
200  for (unsigned i = 0; i < LastStage; ++i) {
201  // Create and insert the prolog basic block prior to the original loop
202  // basic block. The original loop is removed later.
204  PrologBBs.push_back(NewBB);
205  MF.insert(BB->getIterator(), NewBB);
206  NewBB->transferSuccessors(PredBB);
207  PredBB->addSuccessor(NewBB);
208  PredBB = NewBB;
209 
210  // Generate instructions for each appropriate stage. Process instructions
211  // in original program order.
212  for (int StageNum = i; StageNum >= 0; --StageNum) {
213  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
214  BBE = BB->getFirstTerminator();
215  BBI != BBE; ++BBI) {
216  if (Schedule.getStage(&*BBI) == StageNum) {
217  if (BBI->isPHI())
218  continue;
219  MachineInstr *NewMI =
220  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
221  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
222  NewBB->push_back(NewMI);
223  InstrMap[NewMI] = &*BBI;
224  }
225  }
226  }
227  rewritePhiValues(NewBB, i, VRMap, InstrMap);
228  LLVM_DEBUG({
229  dbgs() << "prolog:\n";
230  NewBB->dump();
231  });
232  }
233 
234  PredBB->replaceSuccessor(BB, KernelBB);
235 
236  // Check if we need to remove the branch from the preheader to the original
237  // loop, and replace it with a branch to the new loop.
238  unsigned numBranches = TII->removeBranch(*Preheader);
239  if (numBranches) {
241  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
242  }
243 }
244 
245 /// Generate the pipeline epilog code. The epilog code finishes the iterations
246 /// that were started in either the prolog or the kernel. We create a basic
247 /// block for each stage that needs to complete.
248 void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
249  MachineBasicBlock *KernelBB,
250  ValueMapTy *VRMap,
251  MBBVectorTy &EpilogBBs,
252  MBBVectorTy &PrologBBs) {
253  // We need to change the branch from the kernel to the first epilog block, so
254  // this call to analyze branch uses the kernel rather than the original BB.
255  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
257  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
258  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
259  if (checkBranch)
260  return;
261 
262  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
263  if (*LoopExitI == KernelBB)
264  ++LoopExitI;
265  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
266  MachineBasicBlock *LoopExitBB = *LoopExitI;
267 
268  MachineBasicBlock *PredBB = KernelBB;
269  MachineBasicBlock *EpilogStart = LoopExitBB;
270  InstrMapTy InstrMap;
271 
272  // Generate a basic block for each stage, not including the last stage,
273  // which was generated for the kernel. Each basic block may contain
274  // instructions from multiple stages/iterations.
275  int EpilogStage = LastStage + 1;
276  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
278  EpilogBBs.push_back(NewBB);
279  MF.insert(BB->getIterator(), NewBB);
280 
281  PredBB->replaceSuccessor(LoopExitBB, NewBB);
282  NewBB->addSuccessor(LoopExitBB);
283 
284  if (EpilogStart == LoopExitBB)
285  EpilogStart = NewBB;
286 
287  // Add instructions to the epilog depending on the current block.
288  // Process instructions in original program order.
289  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
290  for (auto &BBI : *BB) {
291  if (BBI.isPHI())
292  continue;
293  MachineInstr *In = &BBI;
294  if ((unsigned)Schedule.getStage(In) == StageNum) {
295  // Instructions with memoperands in the epilog are updated with
296  // conservative values.
297  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
298  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
299  NewBB->push_back(NewMI);
300  InstrMap[NewMI] = In;
301  }
302  }
303  }
304  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
305  InstrMap, LastStage, EpilogStage, i == 1);
306  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
307  LastStage, EpilogStage, i == 1);
308  PredBB = NewBB;
309 
310  LLVM_DEBUG({
311  dbgs() << "epilog:\n";
312  NewBB->dump();
313  });
314  }
315 
316  // Fix any Phi nodes in the loop exit block.
317  LoopExitBB->replacePhiUsesWith(BB, PredBB);
318 
319  // Create a branch to the new epilog from the kernel.
320  // Remove the original branch and add a new branch to the epilog.
321  TII->removeBranch(*KernelBB);
322  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
323  // Add a branch to the loop exit.
324  if (EpilogBBs.size() > 0) {
325  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
327  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
328  }
329 }
330 
331 /// Replace all uses of FromReg that appear outside the specified
332 /// basic block with ToReg.
333 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
336  LiveIntervals &LIS) {
338  E = MRI.use_end();
339  I != E;) {
340  MachineOperand &O = *I;
341  ++I;
342  if (O.getParent()->getParent() != MBB)
343  O.setReg(ToReg);
344  }
345  if (!LIS.hasInterval(ToReg))
346  LIS.createEmptyInterval(ToReg);
347 }
348 
349 /// Return true if the register has a use that occurs outside the
350 /// specified loop.
351 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
354  E = MRI.use_end();
355  I != E; ++I)
356  if (I->getParent()->getParent() != BB)
357  return true;
358  return false;
359 }
360 
361 /// Generate Phis for the specific block in the generated pipelined code.
362 /// This function looks at the Phis from the original code to guide the
363 /// creation of new Phis.
364 void ModuloScheduleExpander::generateExistingPhis(
366  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
367  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
368  // Compute the stage number for the initial value of the Phi, which
369  // comes from the prolog. The prolog to use depends on to which kernel/
370  // epilog that we're adding the Phi.
371  unsigned PrologStage = 0;
372  unsigned PrevStage = 0;
373  bool InKernel = (LastStageNum == CurStageNum);
374  if (InKernel) {
375  PrologStage = LastStageNum - 1;
376  PrevStage = CurStageNum;
377  } else {
378  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
379  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
380  }
381 
382  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
383  BBE = BB->getFirstNonPHI();
384  BBI != BBE; ++BBI) {
385  Register Def = BBI->getOperand(0).getReg();
386 
387  unsigned InitVal = 0;
388  unsigned LoopVal = 0;
389  getPhiRegs(*BBI, BB, InitVal, LoopVal);
390 
391  unsigned PhiOp1 = 0;
392  // The Phi value from the loop body typically is defined in the loop, but
393  // not always. So, we need to check if the value is defined in the loop.
394  unsigned PhiOp2 = LoopVal;
395  if (VRMap[LastStageNum].count(LoopVal))
396  PhiOp2 = VRMap[LastStageNum][LoopVal];
397 
398  int StageScheduled = Schedule.getStage(&*BBI);
399  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
400  unsigned NumStages = getStagesForReg(Def, CurStageNum);
401  if (NumStages == 0) {
402  // We don't need to generate a Phi anymore, but we need to rename any uses
403  // of the Phi value.
404  unsigned NewReg = VRMap[PrevStage][LoopVal];
405  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
406  InitVal, NewReg);
407  if (VRMap[CurStageNum].count(LoopVal))
408  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
409  }
410  // Adjust the number of Phis needed depending on the number of prologs left,
411  // and the distance from where the Phi is first scheduled. The number of
412  // Phis cannot exceed the number of prolog stages. Each stage can
413  // potentially define two values.
414  unsigned MaxPhis = PrologStage + 2;
415  if (!InKernel && (int)PrologStage <= LoopValStage)
416  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
417  unsigned NumPhis = std::min(NumStages, MaxPhis);
418 
419  unsigned NewReg = 0;
420  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
421  // In the epilog, we may need to look back one stage to get the correct
422  // Phi name, because the epilog and prolog blocks execute the same stage.
423  // The correct name is from the previous block only when the Phi has
424  // been completely scheduled prior to the epilog, and Phi value is not
425  // needed in multiple stages.
426  int StageDiff = 0;
427  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
428  NumPhis == 1)
429  StageDiff = 1;
430  // Adjust the computations below when the phi and the loop definition
431  // are scheduled in different stages.
432  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
433  StageDiff = StageScheduled - LoopValStage;
434  for (unsigned np = 0; np < NumPhis; ++np) {
435  // If the Phi hasn't been scheduled, then use the initial Phi operand
436  // value. Otherwise, use the scheduled version of the instruction. This
437  // is a little complicated when a Phi references another Phi.
438  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
439  PhiOp1 = InitVal;
440  // Check if the Phi has already been scheduled in a prolog stage.
441  else if (PrologStage >= AccessStage + StageDiff + np &&
442  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
443  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
444  // Check if the Phi has already been scheduled, but the loop instruction
445  // is either another Phi, or doesn't occur in the loop.
446  else if (PrologStage >= AccessStage + StageDiff + np) {
447  // If the Phi references another Phi, we need to examine the other
448  // Phi to get the correct value.
449  PhiOp1 = LoopVal;
450  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
451  int Indirects = 1;
452  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
453  int PhiStage = Schedule.getStage(InstOp1);
454  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
455  PhiOp1 = getInitPhiReg(*InstOp1, BB);
456  else
457  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
458  InstOp1 = MRI.getVRegDef(PhiOp1);
459  int PhiOpStage = Schedule.getStage(InstOp1);
460  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
461  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
462  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
463  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
464  break;
465  }
466  ++Indirects;
467  }
468  } else
469  PhiOp1 = InitVal;
470  // If this references a generated Phi in the kernel, get the Phi operand
471  // from the incoming block.
472  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
473  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
474  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
475 
476  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
477  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
478  // In the epilog, a map lookup is needed to get the value from the kernel,
479  // or previous epilog block. How is does this depends on if the
480  // instruction is scheduled in the previous block.
481  if (!InKernel) {
482  int StageDiffAdj = 0;
483  if (LoopValStage != -1 && StageScheduled > LoopValStage)
484  StageDiffAdj = StageScheduled - LoopValStage;
485  // Use the loop value defined in the kernel, unless the kernel
486  // contains the last definition of the Phi.
487  if (np == 0 && PrevStage == LastStageNum &&
488  (StageScheduled != 0 || LoopValStage != 0) &&
489  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
490  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
491  // Use the value defined by the Phi. We add one because we switch
492  // from looking at the loop value to the Phi definition.
493  else if (np > 0 && PrevStage == LastStageNum &&
494  VRMap[PrevStage - np + 1].count(Def))
495  PhiOp2 = VRMap[PrevStage - np + 1][Def];
496  // Use the loop value defined in the kernel.
497  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
498  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
499  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
500  // Use the value defined by the Phi, unless we're generating the first
501  // epilog and the Phi refers to a Phi in a different stage.
502  else if (VRMap[PrevStage - np].count(Def) &&
503  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
504  (LoopValStage == StageScheduled)))
505  PhiOp2 = VRMap[PrevStage - np][Def];
506  }
507 
508  // Check if we can reuse an existing Phi. This occurs when a Phi
509  // references another Phi, and the other Phi is scheduled in an
510  // earlier stage. We can try to reuse an existing Phi up until the last
511  // stage of the current Phi.
512  if (LoopDefIsPhi) {
513  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
514  int LVNumStages = getStagesForPhi(LoopVal);
515  int StageDiff = (StageScheduled - LoopValStage);
516  LVNumStages -= StageDiff;
517  // Make sure the loop value Phi has been processed already.
518  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
519  NewReg = PhiOp2;
520  unsigned ReuseStage = CurStageNum;
521  if (isLoopCarried(*PhiInst))
522  ReuseStage -= LVNumStages;
523  // Check if the Phi to reuse has been generated yet. If not, then
524  // there is nothing to reuse.
525  if (VRMap[ReuseStage - np].count(LoopVal)) {
526  NewReg = VRMap[ReuseStage - np][LoopVal];
527 
528  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
529  Def, NewReg);
530  // Update the map with the new Phi name.
531  VRMap[CurStageNum - np][Def] = NewReg;
532  PhiOp2 = NewReg;
533  if (VRMap[LastStageNum - np - 1].count(LoopVal))
534  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
535 
536  if (IsLast && np == NumPhis - 1)
537  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
538  continue;
539  }
540  }
541  }
542  if (InKernel && StageDiff > 0 &&
543  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
544  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
545  }
546 
547  const TargetRegisterClass *RC = MRI.getRegClass(Def);
548  NewReg = MRI.createVirtualRegister(RC);
549 
550  MachineInstrBuilder NewPhi =
551  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
552  TII->get(TargetOpcode::PHI), NewReg);
553  NewPhi.addReg(PhiOp1).addMBB(BB1);
554  NewPhi.addReg(PhiOp2).addMBB(BB2);
555  if (np == 0)
556  InstrMap[NewPhi] = &*BBI;
557 
558  // We define the Phis after creating the new pipelined code, so
559  // we need to rename the Phi values in scheduled instructions.
560 
561  unsigned PrevReg = 0;
562  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
563  PrevReg = VRMap[PrevStage - np][LoopVal];
564  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
565  NewReg, PrevReg);
566  // If the Phi has been scheduled, use the new name for rewriting.
567  if (VRMap[CurStageNum - np].count(Def)) {
568  unsigned R = VRMap[CurStageNum - np][Def];
569  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
570  NewReg);
571  }
572 
573  // Check if we need to rename any uses that occurs after the loop. The
574  // register to replace depends on whether the Phi is scheduled in the
575  // epilog.
576  if (IsLast && np == NumPhis - 1)
577  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
578 
579  // In the kernel, a dependent Phi uses the value from this Phi.
580  if (InKernel)
581  PhiOp2 = NewReg;
582 
583  // Update the map with the new Phi name.
584  VRMap[CurStageNum - np][Def] = NewReg;
585  }
586 
587  while (NumPhis++ < NumStages) {
588  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
589  NewReg, 0);
590  }
591 
592  // Check if we need to rename a Phi that has been eliminated due to
593  // scheduling.
594  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
595  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
596  }
597 }
598 
599 /// Generate Phis for the specified block in the generated pipelined code.
600 /// These are new Phis needed because the definition is scheduled after the
601 /// use in the pipelined sequence.
602 void ModuloScheduleExpander::generatePhis(
604  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
605  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
606  // Compute the stage number that contains the initial Phi value, and
607  // the Phi from the previous stage.
608  unsigned PrologStage = 0;
609  unsigned PrevStage = 0;
610  unsigned StageDiff = CurStageNum - LastStageNum;
611  bool InKernel = (StageDiff == 0);
612  if (InKernel) {
613  PrologStage = LastStageNum - 1;
614  PrevStage = CurStageNum;
615  } else {
616  PrologStage = LastStageNum - StageDiff;
617  PrevStage = LastStageNum + StageDiff - 1;
618  }
619 
620  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
621  BBE = BB->instr_end();
622  BBI != BBE; ++BBI) {
623  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
624  MachineOperand &MO = BBI->getOperand(i);
625  if (!MO.isReg() || !MO.isDef() ||
627  continue;
628 
629  int StageScheduled = Schedule.getStage(&*BBI);
630  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
631  Register Def = MO.getReg();
632  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
633  // An instruction scheduled in stage 0 and is used after the loop
634  // requires a phi in the epilog for the last definition from either
635  // the kernel or prolog.
636  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
637  hasUseAfterLoop(Def, BB, MRI))
638  NumPhis = 1;
639  if (!InKernel && (unsigned)StageScheduled > PrologStage)
640  continue;
641 
642  unsigned PhiOp2 = VRMap[PrevStage][Def];
643  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
644  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
645  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
646  // The number of Phis can't exceed the number of prolog stages. The
647  // prolog stage number is zero based.
648  if (NumPhis > PrologStage + 1 - StageScheduled)
649  NumPhis = PrologStage + 1 - StageScheduled;
650  for (unsigned np = 0; np < NumPhis; ++np) {
651  unsigned PhiOp1 = VRMap[PrologStage][Def];
652  if (np <= PrologStage)
653  PhiOp1 = VRMap[PrologStage - np][Def];
654  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
655  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
656  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
657  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
658  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
659  }
660  if (!InKernel)
661  PhiOp2 = VRMap[PrevStage - np][Def];
662 
663  const TargetRegisterClass *RC = MRI.getRegClass(Def);
664  Register NewReg = MRI.createVirtualRegister(RC);
665 
666  MachineInstrBuilder NewPhi =
667  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
668  TII->get(TargetOpcode::PHI), NewReg);
669  NewPhi.addReg(PhiOp1).addMBB(BB1);
670  NewPhi.addReg(PhiOp2).addMBB(BB2);
671  if (np == 0)
672  InstrMap[NewPhi] = &*BBI;
673 
674  // Rewrite uses and update the map. The actions depend upon whether
675  // we generating code for the kernel or epilog blocks.
676  if (InKernel) {
677  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
678  NewReg);
679  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
680  NewReg);
681 
682  PhiOp2 = NewReg;
683  VRMap[PrevStage - np - 1][Def] = NewReg;
684  } else {
685  VRMap[CurStageNum - np][Def] = NewReg;
686  if (np == NumPhis - 1)
687  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
688  NewReg);
689  }
690  if (IsLast && np == NumPhis - 1)
691  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
692  }
693  }
694  }
695 }
696 
697 /// Remove instructions that generate values with no uses.
698 /// Typically, these are induction variable operations that generate values
699 /// used in the loop itself. A dead instruction has a definition with
700 /// no uses, or uses that occur in the original loop only.
701 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
702  MBBVectorTy &EpilogBBs) {
703  // For each epilog block, check that the value defined by each instruction
704  // is used. If not, delete it.
705  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
706  MBE = EpilogBBs.rend();
707  MBB != MBE; ++MBB)
708  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
709  ME = (*MBB)->instr_rend();
710  MI != ME;) {
711  // From DeadMachineInstructionElem. Don't delete inline assembly.
712  if (MI->isInlineAsm()) {
713  ++MI;
714  continue;
715  }
716  bool SawStore = false;
717  // Check if it's safe to remove the instruction due to side effects.
718  // We can, and want to, remove Phis here.
719  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
720  ++MI;
721  continue;
722  }
723  bool used = true;
724  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
725  MOE = MI->operands_end();
726  MOI != MOE; ++MOI) {
727  if (!MOI->isReg() || !MOI->isDef())
728  continue;
729  Register reg = MOI->getReg();
730  // Assume physical registers are used, unless they are marked dead.
731  if (Register::isPhysicalRegister(reg)) {
732  used = !MOI->isDead();
733  if (used)
734  break;
735  continue;
736  }
737  unsigned realUses = 0;
738  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
739  EI = MRI.use_end();
740  UI != EI; ++UI) {
741  // Check if there are any uses that occur only in the original
742  // loop. If so, that's not a real use.
743  if (UI->getParent()->getParent() != BB) {
744  realUses++;
745  used = true;
746  break;
747  }
748  }
749  if (realUses > 0)
750  break;
751  used = false;
752  }
753  if (!used) {
755  MI++->eraseFromParent();
756  continue;
757  }
758  ++MI;
759  }
760  // In the kernel block, check if we can remove a Phi that generates a value
761  // used in an instruction removed in the epilog block.
762  for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
763  Register reg = MI.getOperand(0).getReg();
764  if (MRI.use_begin(reg) == MRI.use_end()) {
766  MI.eraseFromParent();
767  }
768  }
769 }
770 
771 /// For loop carried definitions, we split the lifetime of a virtual register
772 /// that has uses past the definition in the next iteration. A copy with a new
773 /// virtual register is inserted before the definition, which helps with
774 /// generating a better register assignment.
775 ///
776 /// v1 = phi(a, v2) v1 = phi(a, v2)
777 /// v2 = phi(b, v3) v2 = phi(b, v3)
778 /// v3 = .. v4 = copy v1
779 /// .. = V1 v3 = ..
780 /// .. = v4
781 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
782  MBBVectorTy &EpilogBBs) {
784  for (auto &PHI : KernelBB->phis()) {
785  Register Def = PHI.getOperand(0).getReg();
786  // Check for any Phi definition that used as an operand of another Phi
787  // in the same block.
789  E = MRI.use_instr_end();
790  I != E; ++I) {
791  if (I->isPHI() && I->getParent() == KernelBB) {
792  // Get the loop carried definition.
793  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
794  if (!LCDef)
795  continue;
796  MachineInstr *MI = MRI.getVRegDef(LCDef);
797  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
798  continue;
799  // Search through the rest of the block looking for uses of the Phi
800  // definition. If one occurs, then split the lifetime.
801  unsigned SplitReg = 0;
803  KernelBB->instr_end()))
804  if (BBJ.readsRegister(Def)) {
805  // We split the lifetime when we find the first use.
806  if (SplitReg == 0) {
807  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
808  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
809  TII->get(TargetOpcode::COPY), SplitReg)
810  .addReg(Def);
811  }
812  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
813  }
814  if (!SplitReg)
815  continue;
816  // Search through each of the epilog blocks for any uses to be renamed.
817  for (auto &Epilog : EpilogBBs)
818  for (auto &I : *Epilog)
819  if (I.readsRegister(Def))
820  I.substituteRegister(Def, SplitReg, 0, *TRI);
821  break;
822  }
823  }
824  }
825 }
826 
827 /// Remove the incoming block from the Phis in a basic block.
829  for (MachineInstr &MI : *BB) {
830  if (!MI.isPHI())
831  break;
832  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
833  if (MI.getOperand(i + 1).getMBB() == Incoming) {
834  MI.RemoveOperand(i + 1);
835  MI.RemoveOperand(i);
836  break;
837  }
838  }
839 }
840 
841 /// Create branches from each prolog basic block to the appropriate epilog
842 /// block. These edges are needed if the loop ends before reaching the
843 /// kernel.
844 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
845  MBBVectorTy &PrologBBs,
846  MachineBasicBlock *KernelBB,
847  MBBVectorTy &EpilogBBs,
848  ValueMapTy *VRMap) {
849  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
850  MachineBasicBlock *LastPro = KernelBB;
851  MachineBasicBlock *LastEpi = KernelBB;
852 
853  // Start from the blocks connected to the kernel and work "out"
854  // to the first prolog and the last epilog blocks.
856  unsigned MaxIter = PrologBBs.size() - 1;
857  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
858  // Add branches to the prolog that go to the corresponding
859  // epilog, and the fall-thru prolog/kernel block.
860  MachineBasicBlock *Prolog = PrologBBs[j];
861  MachineBasicBlock *Epilog = EpilogBBs[i];
862 
864  Optional<bool> StaticallyGreater =
865  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
866  unsigned numAdded = 0;
867  if (!StaticallyGreater.hasValue()) {
868  Prolog->addSuccessor(Epilog);
869  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
870  } else if (*StaticallyGreater == false) {
871  Prolog->addSuccessor(Epilog);
872  Prolog->removeSuccessor(LastPro);
873  LastEpi->removeSuccessor(Epilog);
874  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
875  removePhis(Epilog, LastEpi);
876  // Remove the blocks that are no longer referenced.
877  if (LastPro != LastEpi) {
878  LastEpi->clear();
879  LastEpi->eraseFromParent();
880  }
881  if (LastPro == KernelBB) {
882  LoopInfo->disposed();
883  NewKernel = nullptr;
884  }
885  LastPro->clear();
886  LastPro->eraseFromParent();
887  } else {
888  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
890  }
891  LastPro = Prolog;
892  LastEpi = Epilog;
893  for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
894  E = Prolog->instr_rend();
895  I != E && numAdded > 0; ++I, --numAdded)
896  updateInstruction(&*I, false, j, 0, VRMap);
897  }
898 
899  if (NewKernel) {
900  LoopInfo->setPreheader(PrologBBs[MaxIter]);
901  LoopInfo->adjustTripCount(-(MaxIter + 1));
902  }
903 }
904 
905 /// Return true if we can compute the amount the instruction changes
906 /// during each iteration. Set Delta to the amount of the change.
907 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
909  const MachineOperand *BaseOp;
910  int64_t Offset;
911  bool OffsetIsScalable;
912  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
913  return false;
914 
915  // FIXME: This algorithm assumes instructions have fixed-size offsets.
916  if (OffsetIsScalable)
917  return false;
918 
919  if (!BaseOp->isReg())
920  return false;
921 
922  Register BaseReg = BaseOp->getReg();
923 
924  MachineRegisterInfo &MRI = MF.getRegInfo();
925  // Check if there is a Phi. If so, get the definition in the loop.
926  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
927  if (BaseDef && BaseDef->isPHI()) {
928  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
929  BaseDef = MRI.getVRegDef(BaseReg);
930  }
931  if (!BaseDef)
932  return false;
933 
934  int D = 0;
935  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
936  return false;
937 
938  Delta = D;
939  return true;
940 }
941 
942 /// Update the memory operand with a new offset when the pipeliner
943 /// generates a new copy of the instruction that refers to a
944 /// different memory location.
945 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
946  MachineInstr &OldMI,
947  unsigned Num) {
948  if (Num == 0)
949  return;
950  // If the instruction has memory operands, then adjust the offset
951  // when the instruction appears in different stages.
952  if (NewMI.memoperands_empty())
953  return;
955  for (MachineMemOperand *MMO : NewMI.memoperands()) {
956  // TODO: Figure out whether isAtomic is really necessary (see D57601).
957  if (MMO->isVolatile() || MMO->isAtomic() ||
958  (MMO->isInvariant() && MMO->isDereferenceable()) ||
959  (!MMO->getValue())) {
960  NewMMOs.push_back(MMO);
961  continue;
962  }
963  unsigned Delta;
964  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
965  int64_t AdjOffset = Delta * Num;
966  NewMMOs.push_back(
967  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
968  } else {
969  NewMMOs.push_back(
971  }
972  }
973  NewMI.setMemRefs(MF, NewMMOs);
974 }
975 
976 /// Clone the instruction for the new pipelined loop and update the
977 /// memory operands, if needed.
978 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
979  unsigned CurStageNum,
980  unsigned InstStageNum) {
981  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
982  // Check for tied operands in inline asm instructions. This should be handled
983  // elsewhere, but I'm not sure of the best solution.
984  if (OldMI->isInlineAsm())
985  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
986  const auto &MO = OldMI->getOperand(i);
987  if (MO.isReg() && MO.isUse())
988  break;
989  unsigned UseIdx;
990  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
991  NewMI->tieOperands(i, UseIdx);
992  }
993  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
994  return NewMI;
995 }
996 
997 /// Clone the instruction for the new pipelined loop. If needed, this
998 /// function updates the instruction using the values saved in the
999 /// InstrChanges structure.
1000 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1001  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1002  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1003  auto It = InstrChanges.find(OldMI);
1004  if (It != InstrChanges.end()) {
1005  std::pair<unsigned, int64_t> RegAndOffset = It->second;
1006  unsigned BasePos, OffsetPos;
1007  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1008  return nullptr;
1009  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1010  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1011  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1012  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1013  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1014  }
1015  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1016  return NewMI;
1017 }
1018 
1019 /// Update the machine instruction with new virtual registers. This
1020 /// function may change the defintions and/or uses.
1021 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1022  bool LastDef,
1023  unsigned CurStageNum,
1024  unsigned InstrStageNum,
1025  ValueMapTy *VRMap) {
1026  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
1027  MachineOperand &MO = NewMI->getOperand(i);
1028  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1029  continue;
1030  Register reg = MO.getReg();
1031  if (MO.isDef()) {
1032  // Create a new virtual register for the definition.
1033  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1034  Register NewReg = MRI.createVirtualRegister(RC);
1035  MO.setReg(NewReg);
1036  VRMap[CurStageNum][reg] = NewReg;
1037  if (LastDef)
1038  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1039  } else if (MO.isUse()) {
1040  MachineInstr *Def = MRI.getVRegDef(reg);
1041  // Compute the stage that contains the last definition for instruction.
1042  int DefStageNum = Schedule.getStage(Def);
1043  unsigned StageNum = CurStageNum;
1044  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1045  // Compute the difference in stages between the defintion and the use.
1046  unsigned StageDiff = (InstrStageNum - DefStageNum);
1047  // Make an adjustment to get the last definition.
1048  StageNum -= StageDiff;
1049  }
1050  if (VRMap[StageNum].count(reg))
1051  MO.setReg(VRMap[StageNum][reg]);
1052  }
1053  }
1054 }
1055 
1056 /// Return the instruction in the loop that defines the register.
1057 /// If the definition is a Phi, then follow the Phi operand to
1058 /// the instruction in the loop.
1059 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1062  while (Def->isPHI()) {
1063  if (!Visited.insert(Def).second)
1064  break;
1065  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1066  if (Def->getOperand(i + 1).getMBB() == BB) {
1067  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1068  break;
1069  }
1070  }
1071  return Def;
1072 }
1073 
1074 /// Return the new name for the value from the previous stage.
1075 unsigned ModuloScheduleExpander::getPrevMapVal(
1076  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1077  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1078  unsigned PrevVal = 0;
1079  if (StageNum > PhiStage) {
1080  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1081  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1082  // The name is defined in the previous stage.
1083  PrevVal = VRMap[StageNum - 1][LoopVal];
1084  else if (VRMap[StageNum].count(LoopVal))
1085  // The previous name is defined in the current stage when the instruction
1086  // order is swapped.
1087  PrevVal = VRMap[StageNum][LoopVal];
1088  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1089  // The loop value hasn't yet been scheduled.
1090  PrevVal = LoopVal;
1091  else if (StageNum == PhiStage + 1)
1092  // The loop value is another phi, which has not been scheduled.
1093  PrevVal = getInitPhiReg(*LoopInst, BB);
1094  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1095  // The loop value is another phi, which has been scheduled.
1096  PrevVal =
1097  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1098  LoopStage, VRMap, BB);
1099  }
1100  return PrevVal;
1101 }
1102 
1103 /// Rewrite the Phi values in the specified block to use the mappings
1104 /// from the initial operand. Once the Phi is scheduled, we switch
1105 /// to using the loop value instead of the Phi value, so those names
1106 /// do not need to be rewritten.
1107 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1108  unsigned StageNum,
1109  ValueMapTy *VRMap,
1110  InstrMapTy &InstrMap) {
1111  for (auto &PHI : BB->phis()) {
1112  unsigned InitVal = 0;
1113  unsigned LoopVal = 0;
1114  getPhiRegs(PHI, BB, InitVal, LoopVal);
1115  Register PhiDef = PHI.getOperand(0).getReg();
1116 
1117  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1118  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1119  unsigned NumPhis = getStagesForPhi(PhiDef);
1120  if (NumPhis > StageNum)
1121  NumPhis = StageNum;
1122  for (unsigned np = 0; np <= NumPhis; ++np) {
1123  unsigned NewVal =
1124  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1125  if (!NewVal)
1126  NewVal = InitVal;
1127  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1128  NewVal);
1129  }
1130  }
1131 }
1132 
1133 /// Rewrite a previously scheduled instruction to use the register value
1134 /// from the new instruction. Make sure the instruction occurs in the
1135 /// basic block, and we don't change the uses in the new instruction.
1136 void ModuloScheduleExpander::rewriteScheduledInstr(
1137  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1138  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1139  unsigned PrevReg) {
1140  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1141  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1142  // Rewrite uses that have been scheduled already to use the new
1143  // Phi register.
1145  EI = MRI.use_end();
1146  UI != EI;) {
1147  MachineOperand &UseOp = *UI;
1148  MachineInstr *UseMI = UseOp.getParent();
1149  ++UI;
1150  if (UseMI->getParent() != BB)
1151  continue;
1152  if (UseMI->isPHI()) {
1153  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1154  continue;
1155  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1156  continue;
1157  }
1158  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1159  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1160  MachineInstr *OrigMI = OrigInstr->second;
1161  int StageSched = Schedule.getStage(OrigMI);
1162  int CycleSched = Schedule.getCycle(OrigMI);
1163  unsigned ReplaceReg = 0;
1164  // This is the stage for the scheduled instruction.
1165  if (StagePhi == StageSched && Phi->isPHI()) {
1166  int CyclePhi = Schedule.getCycle(Phi);
1167  if (PrevReg && InProlog)
1168  ReplaceReg = PrevReg;
1169  else if (PrevReg && !isLoopCarried(*Phi) &&
1170  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1171  ReplaceReg = PrevReg;
1172  else
1173  ReplaceReg = NewReg;
1174  }
1175  // The scheduled instruction occurs before the scheduled Phi, and the
1176  // Phi is not loop carried.
1177  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1178  ReplaceReg = NewReg;
1179  if (StagePhi > StageSched && Phi->isPHI())
1180  ReplaceReg = NewReg;
1181  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1182  ReplaceReg = NewReg;
1183  if (ReplaceReg) {
1184  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1185  UseOp.setReg(ReplaceReg);
1186  }
1187  }
1188 }
1189 
1190 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1191  if (!Phi.isPHI())
1192  return false;
1193  int DefCycle = Schedule.getCycle(&Phi);
1194  int DefStage = Schedule.getStage(&Phi);
1195 
1196  unsigned InitVal = 0;
1197  unsigned LoopVal = 0;
1198  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1199  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1200  if (!Use || Use->isPHI())
1201  return true;
1202  int LoopCycle = Schedule.getCycle(Use);
1203  int LoopStage = Schedule.getStage(Use);
1204  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1205 }
1206 
1207 //===----------------------------------------------------------------------===//
1208 // PeelingModuloScheduleExpander implementation
1209 //===----------------------------------------------------------------------===//
1210 // This is a reimplementation of ModuloScheduleExpander that works by creating
1211 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1212 //===----------------------------------------------------------------------===//
1213 
1214 namespace {
1215 // Remove any dead phis in MBB. Dead phis either have only one block as input
1216 // (in which case they are the identity) or have no uses.
1217 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1218  LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1219  bool Changed = true;
1220  while (Changed) {
1221  Changed = false;
1223  assert(MI.isPHI());
1224  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1225  if (LIS)
1227  MI.eraseFromParent();
1228  Changed = true;
1229  } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1230  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1231  MRI.getRegClass(MI.getOperand(0).getReg()));
1232  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1233  MI.getOperand(1).getReg());
1234  if (LIS)
1236  MI.eraseFromParent();
1237  Changed = true;
1238  }
1239  }
1240  }
1241 }
1242 
1243 /// Rewrites the kernel block in-place to adhere to the given schedule.
1244 /// KernelRewriter holds all of the state required to perform the rewriting.
1245 class KernelRewriter {
1246  ModuloSchedule &S;
1248  MachineBasicBlock *PreheaderBB, *ExitBB;
1250  const TargetInstrInfo *TII;
1251  LiveIntervals *LIS;
1252 
1253  // Map from register class to canonical undef register for that class.
1255  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1256  // this map is only used when InitReg is non-undef.
1258  // Map from LoopReg to phi register where the InitReg is undef.
1259  DenseMap<Register, Register> UndefPhis;
1260 
1261  // Reg is used by MI. Return the new register MI should use to adhere to the
1262  // schedule. Insert phis as necessary.
1263  Register remapUse(Register Reg, MachineInstr &MI);
1264  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1265  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1266  // or will be chosen so as to share another phi.
1267  Register phi(Register LoopReg, Optional<Register> InitReg = {},
1268  const TargetRegisterClass *RC = nullptr);
1269  // Create an undef register of the given register class.
1270  Register undef(const TargetRegisterClass *RC);
1271 
1272 public:
1273  KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1274  LiveIntervals *LIS = nullptr);
1275  void rewrite();
1276 };
1277 } // namespace
1278 
1279 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1280  MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1281  : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1282  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1283  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1284  PreheaderBB = *BB->pred_begin();
1285  if (PreheaderBB == BB)
1286  PreheaderBB = *std::next(BB->pred_begin());
1287 }
1288 
1289 void KernelRewriter::rewrite() {
1290  // Rearrange the loop to be in schedule order. Note that the schedule may
1291  // contain instructions that are not owned by the loop block (InstrChanges and
1292  // friends), so we gracefully handle unowned instructions and delete any
1293  // instructions that weren't in the schedule.
1294  auto InsertPt = BB->getFirstTerminator();
1295  MachineInstr *FirstMI = nullptr;
1296  for (MachineInstr *MI : S.getInstructions()) {
1297  if (MI->isPHI())
1298  continue;
1299  if (MI->getParent())
1300  MI->removeFromParent();
1301  BB->insert(InsertPt, MI);
1302  if (!FirstMI)
1303  FirstMI = MI;
1304  }
1305  assert(FirstMI && "Failed to find first MI in schedule");
1306 
1307  // At this point all of the scheduled instructions are between FirstMI
1308  // and the end of the block. Kill from the first non-phi to FirstMI.
1309  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1310  if (LIS)
1312  (I++)->eraseFromParent();
1313  }
1314 
1315  // Now remap every instruction in the loop.
1316  for (MachineInstr &MI : *BB) {
1317  if (MI.isPHI() || MI.isTerminator())
1318  continue;
1319  for (MachineOperand &MO : MI.uses()) {
1320  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1321  continue;
1322  Register Reg = remapUse(MO.getReg(), MI);
1323  MO.setReg(Reg);
1324  }
1325  }
1326  EliminateDeadPhis(BB, MRI, LIS);
1327 
1328  // Ensure a phi exists for all instructions that are either referenced by
1329  // an illegal phi or by an instruction outside the loop. This allows us to
1330  // treat remaps of these values the same as "normal" values that come from
1331  // loop-carried phis.
1332  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1333  if (MI->isPHI()) {
1334  Register R = MI->getOperand(0).getReg();
1335  phi(R);
1336  continue;
1337  }
1338 
1339  for (MachineOperand &Def : MI->defs()) {
1340  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1341  if (MI.getParent() != BB) {
1342  phi(Def.getReg());
1343  break;
1344  }
1345  }
1346  }
1347  }
1348 }
1349 
1350 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1351  MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1352  if (!Producer)
1353  return Reg;
1354 
1355  int ConsumerStage = S.getStage(&MI);
1356  if (!Producer->isPHI()) {
1357  // Non-phi producers are simple to remap. Insert as many phis as the
1358  // difference between the consumer and producer stages.
1359  if (Producer->getParent() != BB)
1360  // Producer was not inside the loop. Use the register as-is.
1361  return Reg;
1362  int ProducerStage = S.getStage(Producer);
1363  assert(ConsumerStage != -1 &&
1364  "In-loop consumer should always be scheduled!");
1365  assert(ConsumerStage >= ProducerStage);
1366  unsigned StageDiff = ConsumerStage - ProducerStage;
1367 
1368  for (unsigned I = 0; I < StageDiff; ++I)
1369  Reg = phi(Reg);
1370  return Reg;
1371  }
1372 
1373  // First, dive through the phi chain to find the defaults for the generated
1374  // phis.
1375  SmallVector<Optional<Register>, 4> Defaults;
1376  Register LoopReg = Reg;
1377  auto LoopProducer = Producer;
1378  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1379  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1380  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1381  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1382  assert(LoopProducer);
1383  }
1384  int LoopProducerStage = S.getStage(LoopProducer);
1385 
1386  Optional<Register> IllegalPhiDefault;
1387 
1388  if (LoopProducerStage == -1) {
1389  // Do nothing.
1390  } else if (LoopProducerStage > ConsumerStage) {
1391  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1392  // In addition, Consumer's cycle must be scheduled after Producer in the
1393  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1394  // functions.
1395 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1396  int LoopProducerCycle = S.getCycle(LoopProducer);
1397  int ConsumerCycle = S.getCycle(&MI);
1398 #endif
1399  assert(LoopProducerCycle <= ConsumerCycle);
1400  assert(LoopProducerStage == ConsumerStage + 1);
1401  // Peel off the first phi from Defaults and insert a phi between producer
1402  // and consumer. This phi will not be at the front of the block so we
1403  // consider it illegal. It will only exist during the rewrite process; it
1404  // needs to exist while we peel off prologs because these could take the
1405  // default value. After that we can replace all uses with the loop producer
1406  // value.
1407  IllegalPhiDefault = Defaults.front();
1408  Defaults.erase(Defaults.begin());
1409  } else {
1410  assert(ConsumerStage >= LoopProducerStage);
1411  int StageDiff = ConsumerStage - LoopProducerStage;
1412  if (StageDiff > 0) {
1413  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1414  << " to " << (Defaults.size() + StageDiff) << "\n");
1415  // If we need more phis than we have defaults for, pad out with undefs for
1416  // the earliest phis, which are at the end of the defaults chain (the
1417  // chain is in reverse order).
1418  Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1419  ? Optional<Register>()
1420  : Defaults.back());
1421  }
1422  }
1423 
1424  // Now we know the number of stages to jump back, insert the phi chain.
1425  auto DefaultI = Defaults.rbegin();
1426  while (DefaultI != Defaults.rend())
1427  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1428 
1429  if (IllegalPhiDefault.hasValue()) {
1430  // The consumer optionally consumes LoopProducer in the same iteration
1431  // (because the producer is scheduled at an earlier cycle than the consumer)
1432  // or the initial value. To facilitate this we create an illegal block here
1433  // by embedding a phi in the middle of the block. We will fix this up
1434  // immediately prior to pruning.
1435  auto RC = MRI.getRegClass(Reg);
1437  MachineInstr *IllegalPhi =
1438  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1439  .addReg(IllegalPhiDefault.getValue())
1440  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1441  .addReg(LoopReg)
1442  .addMBB(BB); // Block choice is arbitrary and has no effect.
1443  // Illegal phi should belong to the producer stage so that it can be
1444  // filtered correctly during peeling.
1445  S.setStage(IllegalPhi, LoopProducerStage);
1446  return R;
1447  }
1448 
1449  return LoopReg;
1450 }
1451 
1453  const TargetRegisterClass *RC) {
1454  // If the init register is not undef, try and find an existing phi.
1455  if (InitReg.hasValue()) {
1456  auto I = Phis.find({LoopReg, InitReg.getValue()});
1457  if (I != Phis.end())
1458  return I->second;
1459  } else {
1460  for (auto &KV : Phis) {
1461  if (KV.first.first == LoopReg)
1462  return KV.second;
1463  }
1464  }
1465 
1466  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1467  // find a phi that takes undef as input.
1468  auto I = UndefPhis.find(LoopReg);
1469  if (I != UndefPhis.end()) {
1470  Register R = I->second;
1471  if (!InitReg.hasValue())
1472  // Found a phi taking undef as input, and this input is undef so return
1473  // without any more changes.
1474  return R;
1475  // Found a phi taking undef as input, so rewrite it to take InitReg.
1477  MI->getOperand(1).setReg(InitReg.getValue());
1478  Phis.insert({{LoopReg, InitReg.getValue()}, R});
1480  UndefPhis.erase(I);
1481  return R;
1482  }
1483 
1484  // Failed to find any existing phi to reuse, so create a new one.
1485  if (!RC)
1486  RC = MRI.getRegClass(LoopReg);
1488  if (InitReg.hasValue())
1489  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1490  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1491  .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1492  .addMBB(PreheaderBB)
1493  .addReg(LoopReg)
1494  .addMBB(BB);
1495  if (!InitReg.hasValue())
1496  UndefPhis[LoopReg] = R;
1497  else
1498  Phis[{LoopReg, *InitReg}] = R;
1499  return R;
1500 }
1501 
1502 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1503  Register &R = Undefs[RC];
1504  if (R == 0) {
1505  // Create an IMPLICIT_DEF that defines this register if we need it.
1506  // All uses of this should be removed by the time we have finished unrolling
1507  // prologs and epilogs.
1508  R = MRI.createVirtualRegister(RC);
1509  auto *InsertBB = &PreheaderBB->getParent()->front();
1510  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1511  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1512  }
1513  return R;
1514 }
1515 
1516 namespace {
1517 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1518 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1519 /// through and defaults for these phis.
1520 class KernelOperandInfo {
1523  SmallVector<Register, 4> PhiDefaults;
1526 
1527 public:
1528  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1529  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1530  : MRI(MRI) {
1531  Source = MO;
1532  BB = MO->getParent()->getParent();
1533  while (isRegInLoop(MO)) {
1534  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1535  if (MI->isFullCopy()) {
1536  MO = &MI->getOperand(1);
1537  continue;
1538  }
1539  if (!MI->isPHI())
1540  break;
1541  // If this is an illegal phi, don't count it in distance.
1542  if (IllegalPhis.count(MI)) {
1543  MO = &MI->getOperand(3);
1544  continue;
1545  }
1546 
1548  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1549  : &MI->getOperand(3);
1550  PhiDefaults.push_back(Default);
1551  }
1552  Target = MO;
1553  }
1554 
1555  bool operator==(const KernelOperandInfo &Other) const {
1556  return PhiDefaults.size() == Other.PhiDefaults.size();
1557  }
1558 
1559  void print(raw_ostream &OS) const {
1560  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1561  << *Source->getParent();
1562  }
1563 
1564 private:
1565  bool isRegInLoop(MachineOperand *MO) {
1566  return MO->isReg() && MO->getReg().isVirtual() &&
1567  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1568  }
1569 };
1570 } // namespace
1571 
1573 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1574  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1575  if (LPD == LPD_Front)
1576  PeeledFront.push_back(NewBB);
1577  else
1578  PeeledBack.push_front(NewBB);
1579  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1580  ++I, ++NI) {
1581  CanonicalMIs[&*I] = &*I;
1582  CanonicalMIs[&*NI] = &*I;
1583  BlockMIs[{NewBB, &*I}] = &*NI;
1584  BlockMIs[{BB, &*I}] = &*I;
1585  }
1586  return NewBB;
1587 }
1588 
1590  int MinStage) {
1591  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1592  I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1593  MachineInstr *MI = &*I++;
1594  int Stage = getStage(MI);
1595  if (Stage == -1 || Stage >= MinStage)
1596  continue;
1597 
1598  for (MachineOperand &DefMO : MI->defs()) {
1600  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1601  // Only PHIs can use values from this block by construction.
1602  // Match with the equivalent PHI in B.
1603  assert(UseMI.isPHI());
1604  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1605  MI->getParent());
1606  Subs.emplace_back(&UseMI, Reg);
1607  }
1608  for (auto &Sub : Subs)
1609  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1611  }
1612  if (LIS)
1614  MI->eraseFromParent();
1615  }
1616 }
1617 
1619  MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1620  auto InsertPt = DestBB->getFirstNonPHI();
1622  for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
1623  MachineInstr *MI = &*I++;
1624  if (MI->isPHI()) {
1625  // This is an illegal PHI. If we move any instructions using an illegal
1626  // PHI, we need to create a legal Phi.
1627  if (getStage(MI) != Stage) {
1628  // The legal Phi is not necessary if the illegal phi's stage
1629  // is being moved.
1630  Register PhiR = MI->getOperand(0).getReg();
1631  auto RC = MRI.getRegClass(PhiR);
1633  MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1634  DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1635  .addReg(PhiR)
1636  .addMBB(SourceBB);
1637  BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
1638  CanonicalMIs[NI] = CanonicalMIs[MI];
1639  Remaps[PhiR] = NR;
1640  }
1641  }
1642  if (getStage(MI) != Stage)
1643  continue;
1644  MI->removeFromParent();
1645  DestBB->insert(InsertPt, MI);
1646  auto *KernelMI = CanonicalMIs[MI];
1647  BlockMIs[{DestBB, KernelMI}] = MI;
1648  BlockMIs.erase({SourceBB, KernelMI});
1649  }
1650  SmallVector<MachineInstr *, 4> PhiToDelete;
1651  for (MachineInstr &MI : DestBB->phis()) {
1652  assert(MI.getNumOperands() == 3);
1653  MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1654  // If the instruction referenced by the phi is moved inside the block
1655  // we don't need the phi anymore.
1656  if (getStage(Def) == Stage) {
1657  Register PhiReg = MI.getOperand(0).getReg();
1658  assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1659  MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1660  MI.getOperand(0).setReg(PhiReg);
1661  PhiToDelete.push_back(&MI);
1662  }
1663  }
1664  for (auto *P : PhiToDelete)
1665  P->eraseFromParent();
1666  InsertPt = DestBB->getFirstNonPHI();
1667  // Helper to clone Phi instructions into the destination block. We clone Phi
1668  // greedily to avoid combinatorial explosion of Phi instructions.
1669  auto clonePhi = [&](MachineInstr *Phi) {
1670  MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1671  DestBB->insert(InsertPt, NewMI);
1672  Register OrigR = Phi->getOperand(0).getReg();
1674  NewMI->getOperand(0).setReg(R);
1675  NewMI->getOperand(1).setReg(OrigR);
1676  NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1677  Remaps[OrigR] = R;
1678  CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1679  BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1681  return R;
1682  };
1683  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1684  for (MachineOperand &MO : I->uses()) {
1685  if (!MO.isReg())
1686  continue;
1687  if (Remaps.count(MO.getReg()))
1688  MO.setReg(Remaps[MO.getReg()]);
1689  else {
1690  // If we are using a phi from the source block we need to add a new phi
1691  // pointing to the old one.
1693  if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1694  Register R = clonePhi(Use);
1695  MO.setReg(R);
1696  }
1697  }
1698  }
1699  }
1700 }
1701 
1702 Register
1704  MachineInstr *Phi) {
1705  unsigned distance = PhiNodeLoopIteration[Phi];
1706  MachineInstr *CanonicalUse = CanonicalPhi;
1707  Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1708  for (unsigned I = 0; I < distance; ++I) {
1709  assert(CanonicalUse->isPHI());
1710  assert(CanonicalUse->getNumOperands() == 5);
1711  unsigned LoopRegIdx = 3, InitRegIdx = 1;
1712  if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1713  std::swap(LoopRegIdx, InitRegIdx);
1714  CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1715  CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1716  }
1717  return CanonicalUseReg;
1718 }
1719 
1721  BitVector LS(Schedule.getNumStages(), true);
1722  BitVector AS(Schedule.getNumStages(), true);
1723  LiveStages[BB] = LS;
1724  AvailableStages[BB] = AS;
1725 
1726  // Peel out the prologs.
1727  LS.reset();
1728  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1729  LS[I] = 1;
1730  Prologs.push_back(peelKernel(LPD_Front));
1731  LiveStages[Prologs.back()] = LS;
1732  AvailableStages[Prologs.back()] = LS;
1733  }
1734 
1735  // Create a block that will end up as the new loop exiting block (dominated by
1736  // all prologs and epilogs). It will only contain PHIs, in the same order as
1737  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1738  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1739  // property that any value deffed in BB but used outside of BB is used by a
1740  // PHI in the exiting block.
1742  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1743  // Push out the epilogs, again in reverse order.
1744  // We can't assume anything about the minumum loop trip count at this point,
1745  // so emit a fairly complex epilog.
1746 
1747  // We first peel number of stages minus one epilogue. Then we remove dead
1748  // stages and reorder instructions based on their stage. If we have 3 stages
1749  // we generate first:
1750  // E0[3, 2, 1]
1751  // E1[3', 2']
1752  // E2[3'']
1753  // And then we move instructions based on their stages to have:
1754  // E0[3]
1755  // E1[2, 3']
1756  // E2[1, 2', 3'']
1757  // The transformation is legal because we only move instructions past
1758  // instructions of a previous loop iteration.
1759  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1760  Epilogs.push_back(peelKernel(LPD_Back));
1761  MachineBasicBlock *B = Epilogs.back();
1763  // Keep track at which iteration each phi belongs to. We need it to know
1764  // what version of the variable to use during prologue/epilogue stitching.
1765  EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1766  for (MachineInstr &Phi : B->phis())
1768  }
1769  for (size_t I = 0; I < Epilogs.size(); I++) {
1770  LS.reset();
1771  for (size_t J = I; J < Epilogs.size(); J++) {
1772  int Iteration = J;
1773  unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1774  // Move stage one block at a time so that Phi nodes are updated correctly.
1775  for (size_t K = Iteration; K > I; K--)
1776  moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1777  LS[Stage] = 1;
1778  }
1779  LiveStages[Epilogs[I]] = LS;
1780  AvailableStages[Epilogs[I]] = AS;
1781  }
1782 
1783  // Now we've defined all the prolog and epilog blocks as a fallthrough
1784  // sequence, add the edges that will be followed if the loop trip count is
1785  // lower than the number of stages (connecting prologs directly with epilogs).
1786  auto PI = Prologs.begin();
1787  auto EI = Epilogs.begin();
1788  assert(Prologs.size() == Epilogs.size());
1789  for (; PI != Prologs.end(); ++PI, ++EI) {
1790  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1791  (*PI)->addSuccessor(*EI);
1792  for (MachineInstr &MI : (*EI)->phis()) {
1793  Register Reg = MI.getOperand(1).getReg();
1795  if (Use && Use->getParent() == Pred) {
1796  MachineInstr *CanonicalUse = CanonicalMIs[Use];
1797  if (CanonicalUse->isPHI()) {
1798  // If the use comes from a phi we need to skip as many phi as the
1799  // distance between the epilogue and the kernel. Trace through the phi
1800  // chain to find the right value.
1801  Reg = getPhiCanonicalReg(CanonicalUse, Use);
1802  }
1804  }
1805  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1806  MI.addOperand(MachineOperand::CreateMBB(*PI));
1807  }
1808  }
1809 
1810  // Create a list of all blocks in order.
1812  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1813  Blocks.push_back(BB);
1814  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1815 
1816  // Iterate in reverse order over all instructions, remapping as we go.
1817  for (MachineBasicBlock *B : reverse(Blocks)) {
1818  for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1819  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1820  MachineInstr *MI = &*I++;
1821  rewriteUsesOf(MI);
1822  }
1823  }
1824  for (auto *MI : IllegalPhisToDelete) {
1825  if (LIS)
1827  MI->eraseFromParent();
1828  }
1830 
1831  // Now all remapping has been done, we're free to optimize the generated code.
1832  for (MachineBasicBlock *B : reverse(Blocks))
1833  EliminateDeadPhis(B, MRI, LIS);
1834  EliminateDeadPhis(ExitingBB, MRI, LIS);
1835 }
1836 
1838  MachineFunction &MF = *BB->getParent();
1839  MachineBasicBlock *Exit = *BB->succ_begin();
1840  if (Exit == BB)
1841  Exit = *std::next(BB->succ_begin());
1842 
1844  MF.insert(std::next(BB->getIterator()), NewBB);
1845 
1846  // Clone all phis in BB into NewBB and rewrite.
1847  for (MachineInstr &MI : BB->phis()) {
1848  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1849  Register OldR = MI.getOperand(3).getReg();
1852  for (MachineInstr &Use : MRI.use_instructions(OldR))
1853  if (Use.getParent() != BB)
1854  Uses.push_back(&Use);
1855  for (MachineInstr *Use : Uses)
1856  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1858  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1859  .addReg(OldR)
1860  .addMBB(BB);
1861  BlockMIs[{NewBB, &MI}] = NI;
1862  CanonicalMIs[NI] = &MI;
1863  }
1864  BB->replaceSuccessor(Exit, NewBB);
1865  Exit->replacePhiUsesWith(BB, NewBB);
1866  NewBB->addSuccessor(Exit);
1867 
1868  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1870  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1871  (void)CanAnalyzeBr;
1872  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1873  TII->removeBranch(*BB);
1874  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1875  Cond, DebugLoc());
1876  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1877  return NewBB;
1878 }
1879 
1880 Register
1882  MachineBasicBlock *BB) {
1884  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1885  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1886 }
1887 
1889  if (MI->isPHI()) {
1890  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1891  // and it is produced by this block.
1892  Register PhiR = MI->getOperand(0).getReg();
1893  Register R = MI->getOperand(3).getReg();
1894  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1895  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1896  R = MI->getOperand(1).getReg();
1897  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1898  MRI.replaceRegWith(PhiR, R);
1899  // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1900  // later to figure out how to remap registers.
1901  MI->getOperand(0).setReg(PhiR);
1902  IllegalPhisToDelete.push_back(MI);
1903  return;
1904  }
1905 
1906  int Stage = getStage(MI);
1907  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1908  LiveStages[MI->getParent()].test(Stage))
1909  // Instruction is live, no rewriting to do.
1910  return;
1911 
1912  for (MachineOperand &DefMO : MI->defs()) {
1914  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1915  // Only PHIs can use values from this block by construction.
1916  // Match with the equivalent PHI in B.
1917  assert(UseMI.isPHI());
1918  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1919  MI->getParent());
1920  Subs.emplace_back(&UseMI, Reg);
1921  }
1922  for (auto &Sub : Subs)
1923  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1925  }
1926  if (LIS)
1928  MI->eraseFromParent();
1929 }
1930 
1932  // Work outwards from the kernel.
1933  bool KernelDisposed = false;
1934  int TC = Schedule.getNumStages() - 1;
1935  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1936  ++PI, ++EI, --TC) {
1937  MachineBasicBlock *Prolog = *PI;
1938  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1939  MachineBasicBlock *Epilog = *EI;
1941  TII->removeBranch(*Prolog);
1942  Optional<bool> StaticallyGreater =
1943  LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1944  if (!StaticallyGreater.hasValue()) {
1945  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1946  // Dynamically branch based on Cond.
1947  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1948  } else if (*StaticallyGreater == false) {
1949  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1950  // Prolog never falls through; branch to epilog and orphan interior
1951  // blocks. Leave it to unreachable-block-elim to clean up.
1952  Prolog->removeSuccessor(Fallthrough);
1953  for (MachineInstr &P : Fallthrough->phis()) {
1954  P.RemoveOperand(2);
1955  P.RemoveOperand(1);
1956  }
1958  KernelDisposed = true;
1959  } else {
1960  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1961  // Prolog always falls through; remove incoming values in epilog.
1962  Prolog->removeSuccessor(Epilog);
1963  for (MachineInstr &P : Epilog->phis()) {
1964  P.RemoveOperand(4);
1965  P.RemoveOperand(3);
1966  }
1967  }
1968  }
1969 
1970  if (!KernelDisposed) {
1971  LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1972  LoopInfo->setPreheader(Prologs.back());
1973  } else {
1974  LoopInfo->disposed();
1975  }
1976 }
1977 
1979  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
1980  KR.rewrite();
1981 }
1982 
1984  BB = Schedule.getLoop()->getTopBlock();
1988  assert(LoopInfo);
1989 
1990  rewriteKernel();
1992  fixupBranches();
1993 }
1994 
1996  BB = Schedule.getLoop()->getTopBlock();
1998 
1999  // Dump the schedule before we invalidate and remap all its instructions.
2000  // Stash it in a string so we can print it if we found an error.
2001  std::string ScheduleDump;
2002  raw_string_ostream OS(ScheduleDump);
2003  Schedule.print(OS);
2004  OS.flush();
2005 
2006  // First, run the normal ModuleScheduleExpander. We don't support any
2007  // InstrChanges.
2008  assert(LIS && "Requires LiveIntervals!");
2011  MSE.expand();
2012  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2013  if (!ExpandedKernel) {
2014  // The expander optimized away the kernel. We can't do any useful checking.
2015  MSE.cleanup();
2016  return;
2017  }
2018  // Before running the KernelRewriter, re-add BB into the CFG.
2020 
2021  // Now run the new expansion algorithm.
2022  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2023  KR.rewrite();
2025 
2026  // Collect all illegal phis that the new algorithm created. We'll give these
2027  // to KernelOperandInfo.
2028  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2029  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2030  if (NI->isPHI())
2031  IllegalPhis.insert(&*NI);
2032  }
2033 
2034  // Co-iterate across both kernels. We expect them to be identical apart from
2035  // phis and full COPYs (we look through both).
2037  auto OI = ExpandedKernel->begin();
2038  auto NI = BB->begin();
2039  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2040  while (OI->isPHI() || OI->isFullCopy())
2041  ++OI;
2042  while (NI->isPHI() || NI->isFullCopy())
2043  ++NI;
2044  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2045  // Analyze every operand separately.
2046  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2047  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2048  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2049  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2050  }
2051 
2052  bool Failed = false;
2053  for (auto &OldAndNew : KOIs) {
2054  if (OldAndNew.first == OldAndNew.second)
2055  continue;
2056  Failed = true;
2057  errs() << "Modulo kernel validation error: [\n";
2058  errs() << " [golden] ";
2059  OldAndNew.first.print(errs());
2060  errs() << " ";
2061  OldAndNew.second.print(errs());
2062  errs() << "]\n";
2063  }
2064 
2065  if (Failed) {
2066  errs() << "Golden reference kernel:\n";
2067  ExpandedKernel->print(errs());
2068  errs() << "New kernel:\n";
2069  BB->print(errs());
2070  errs() << ScheduleDump;
2072  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2073  }
2074 
2075  // Cleanup by removing BB from the CFG again as the original
2076  // ModuloScheduleExpander intended.
2078  MSE.cleanup();
2079 }
2080 
2081 //===----------------------------------------------------------------------===//
2082 // ModuloScheduleTestPass implementation
2083 //===----------------------------------------------------------------------===//
2084 // This pass constructs a ModuloSchedule from its module and runs
2085 // ModuloScheduleExpander.
2086 //
2087 // The module is expected to contain a single-block analyzable loop.
2088 // The total order of instructions is taken from the loop as-is.
2089 // Instructions are expected to be annotated with a PostInstrSymbol.
2090 // This PostInstrSymbol must have the following format:
2091 // "Stage=%d Cycle=%d".
2092 //===----------------------------------------------------------------------===//
2093 
2094 namespace {
2095 class ModuloScheduleTest : public MachineFunctionPass {
2096 public:
2097  static char ID;
2098 
2099  ModuloScheduleTest() : MachineFunctionPass(ID) {
2101  }
2102 
2103  bool runOnMachineFunction(MachineFunction &MF) override;
2104  void runOnLoop(MachineFunction &MF, MachineLoop &L);
2105 
2106  void getAnalysisUsage(AnalysisUsage &AU) const override {
2108  AU.addRequired<LiveIntervals>();
2110  }
2111 };
2112 } // namespace
2113 
2114 char ModuloScheduleTest::ID = 0;
2115 
2116 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2117  "Modulo Schedule test pass", false, false)
2120 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2121  "Modulo Schedule test pass", false, false)
2122 
2123 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2124  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2125  for (auto *L : MLI) {
2126  if (L->getTopBlock() != L->getBottomBlock())
2127  continue;
2128  runOnLoop(MF, *L);
2129  return false;
2130  }
2131  return false;
2132 }
2133 
2134 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2135  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2136  std::pair<StringRef, StringRef> StageTokenAndValue =
2137  getToken(StageAndCycle.first, "-");
2138  std::pair<StringRef, StringRef> CycleTokenAndValue =
2139  getToken(StageAndCycle.second, "-");
2140  if (StageTokenAndValue.first != "Stage" ||
2141  CycleTokenAndValue.first != "_Cycle") {
2143  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2144  return;
2145  }
2146 
2147  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2148  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2149 
2150  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2151 }
2152 
2153 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2154  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2156  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2157 
2158  DenseMap<MachineInstr *, int> Cycle, Stage;
2159  std::vector<MachineInstr *> Instrs;
2160  for (MachineInstr &MI : *BB) {
2161  if (MI.isTerminator())
2162  continue;
2163  Instrs.push_back(&MI);
2164  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2165  dbgs() << "Parsing post-instr symbol for " << MI;
2166  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2167  }
2168  }
2169 
2170  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2171  std::move(Stage));
2173  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2174  MSE.expand();
2175  MSE.cleanup();
2176 }
2177 
2178 //===----------------------------------------------------------------------===//
2179 // ModuloScheduleTestAnnotater implementation
2180 //===----------------------------------------------------------------------===//
2181 
2183  for (MachineInstr *MI : S.getInstructions()) {
2185  raw_svector_ostream OS(SV);
2186  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2187  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2188  MI->setPostInstrSymbol(MF, Sym);
2189  }
2190 }
i
i
Definition: README.txt:29
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1071
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:102
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:791
llvm::PeelingModuloScheduleExpander::rewriteKernel
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition: ModuloSchedule.cpp:1978
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:181
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:78
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:586
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:942
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:431
ErrorHandling.h
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:269
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:655
llvm::LPD_Front
@ LPD_Front
Peel the first iteration of the loop.
Definition: MachineLoopUtils.h:19
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1381
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:835
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1602
llvm::PeelingModuloScheduleExpander::Epilogs
SmallVector< MachineBasicBlock *, 4 > Epilogs
Definition: ModuloSchedule.h:305
llvm::PeelingModuloScheduleExpander::getStage
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
Definition: ModuloSchedule.h:357
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:742
llvm::Optional< bool >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
llvm::PeelingModuloScheduleExpander::TII
const TargetInstrInfo * TII
Definition: ModuloSchedule.h:297
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:66
llvm::MachineLoop::getBottomBlock
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout,...
Definition: MachineLoopInfo.cpp:77
llvm::PeelingModuloScheduleExpander::PeeledFront
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
Definition: ModuloSchedule.h:323
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::PeelingModuloScheduleExpander::moveStageBetweenBlocks
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition: ModuloSchedule.cpp:1618
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:589
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:295
llvm::TargetInstrInfo::insertBranch
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
Definition: TargetInstrInfo.h:695
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:751
llvm::ModuloSchedule::dump
void dump()
Definition: ModuloSchedule.h:153
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:843
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::PeelingModuloScheduleExpander::AvailableStages
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
Definition: ModuloSchedule.h:311
llvm::MachineBasicBlock::getFirstInstrTerminator
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Definition: MachineBasicBlock.cpp:251
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:828
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:191
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:380
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::TargetInstrInfo::removeBranch
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
Definition: TargetInstrInfo.h:677
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PeelingModuloScheduleExpander::Schedule
ModuloSchedule & Schedule
Definition: ModuloSchedule.h:293
llvm::PeelingModuloScheduleExpander::PeeledBack
std::deque< MachineBasicBlock * > PeeledBack
Definition: ModuloSchedule.h:323
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineInstr::setMemRefs
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
Definition: MachineInstr.cpp:374
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:334
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::PeelingModuloScheduleExpander::PhiNodeLoopIteration
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
Definition: ModuloSchedule.h:314
MCContext.h
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1337
test
Definition: README.txt:37
llvm::PeelingModuloScheduleExpander::BlockMIs
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Definition: ModuloSchedule.h:320
llvm::PeelingModuloScheduleExpander::BB
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
Definition: ModuloSchedule.h:301
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::PeelingModuloScheduleExpander::Prologs
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
Definition: ModuloSchedule.h:305
pass
modulo schedule Modulo Schedule test pass
Definition: ModuloSchedule.cpp:2121
llvm::PeelingModuloScheduleExpander::filterInstructions
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition: ModuloSchedule.cpp:1589
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::ModuloSchedule::getInstructions
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
Definition: ModuloSchedule.h:151
llvm::PeelSingleBlockLoop
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Definition: MachineLoopUtils.cpp:27
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
llvm::BitVector
Definition: BitVector.h:74
CombinerObjective::Default
@ Default
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:238
llvm::PeelingModuloScheduleExpander::peelKernel
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition: ModuloSchedule.cpp:1573
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
parseSymbolString
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition: ModuloSchedule.cpp:2134
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:751
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1353
llvm::PeelingModuloScheduleExpander::Preheader
MachineBasicBlock * Preheader
The original loop preheader.
Definition: ModuloSchedule.h:303
llvm::PeelingModuloScheduleExpander::fixupBranches
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition: ModuloSchedule.cpp:1931
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:306
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::ModuloScheduleExpander::getRewrittenKernel
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Definition: ModuloSchedule.h:274
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::PeelingModuloScheduleExpander::rewriteUsesOf
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition: ModuloSchedule.cpp:1888
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1634
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2182
LiveIntervals.h
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:816
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:359
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
StringExtras.h
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:200
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:415
replaceRegUsesAfterLoop
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition: ModuloSchedule.cpp:333
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetInstrInfo::insertUnconditionalBranch
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
Definition: TargetInstrInfo.h:703
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:332
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::PeelingModuloScheduleExpander::CreateLCSSAExitingBlock
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition: ModuloSchedule.cpp:1837
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::LoopPeelDirection
LoopPeelDirection
Definition: MachineLoopUtils.h:18
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:252
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::MachineInstr::isRegTiedToUseOperand
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
Definition: MachineInstr.h:1545
llvm::PeelingModuloScheduleExpander::getEquivalentRegisterIn
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition: ModuloSchedule.cpp:1881
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:242
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::MachineBasicBlock::phis
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
Definition: MachineBasicBlock.h:296
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
llvm::MachineInstr::isInlineAsm
bool isInlineAsm() const
Definition: MachineInstr.h:1261
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:683
hasUseAfterLoop
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition: ModuloSchedule.cpp:351
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1983
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:464
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: ModuloSchedule.cpp:35
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:881
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:375
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LiveIntervals::createEmptyInterval
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Definition: LiveIntervals.h:131
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: ModuloSchedule.cpp:59
llvm::MachineBasicBlock::transferSuccessors
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
Definition: MachineBasicBlock.cpp:870
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:261
llvm::LPD_Back
@ LPD_Back
Peel the last iteration of the loop.
Definition: MachineLoopUtils.h:20
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
operator==
bool operator==(const StringView &LHS, const StringView &RHS)
Definition: StringView.h:112
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:812
llvm::PeelingModuloScheduleExpander::LIS
LiveIntervals * LIS
Definition: ModuloSchedule.h:298
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: MachineBasicBlock.cpp:1365
llvm::PeelingModuloScheduleExpander::MRI
MachineRegisterInfo & MRI
Definition: ModuloSchedule.h:296
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:789
Epilog
@ Epilog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1317
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:624
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ModuloSchedule::getCycle
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
Definition: ModuloSchedule.h:139
llvm::ModuloSchedule::getNumStages
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
Definition: ModuloSchedule.h:122
llvm::PeelingModuloScheduleExpander::IllegalPhisToDelete
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
Definition: ModuloSchedule.h:325
llvm::ModuloSchedule::getLoop
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
Definition: ModuloSchedule.h:118
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:720
llvm::PeelingModuloScheduleExpander::peelPrologAndEpilogs
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition: ModuloSchedule.cpp:1720
llvm::PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition: ModuloSchedule.cpp:1995
getInitPhiReg
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition: ModuloSchedule.cpp:51
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:467
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:71
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator
DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
Definition: DenseMap.h:70
llvm::ModuloSchedule::print
void print(raw_ostream &OS)
Definition: ModuloSchedule.cpp:24
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::PeelingModuloScheduleExpander::LiveStages
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
Definition: ModuloSchedule.h:307
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:62
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:689
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
llvm::MachineRegisterInfo::defusechain_iterator
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Definition: MachineRegisterInfo.h:266
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:690
llvm::initializeModuloScheduleTestPass
void initializeModuloScheduleTestPass(PassRegistry &)
llvm::PeelingModuloScheduleExpander::MF
MachineFunction & MF
Definition: ModuloSchedule.h:294
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:159
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:125
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:333
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
llvm::ModuloSchedule::getStage
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
Definition: ModuloSchedule.h:133
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1102
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:480
llvm::PeelingModuloScheduleExpander::getPhiCanonicalReg
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition: ModuloSchedule.cpp:1703
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
removePhis
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Definition: ModuloSchedule.cpp:828
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::PeelingModuloScheduleExpander::CanonicalMIs
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
Definition: ModuloSchedule.h:318