LLVM  13.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/InitializePasses.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/Support/Debug.h"
20 
21 #define DEBUG_TYPE "pipeliner"
22 using namespace llvm;
23 
25  for (MachineInstr *MI : ScheduledInstrs)
26  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
27 }
28 
29 //===----------------------------------------------------------------------===//
30 // ModuloScheduleExpander implementation
31 //===----------------------------------------------------------------------===//
32 
33 /// Return the register values for the operands of a Phi instruction.
34 /// This function assume the instruction is a Phi.
36  unsigned &InitVal, unsigned &LoopVal) {
37  assert(Phi.isPHI() && "Expecting a Phi.");
38 
39  InitVal = 0;
40  LoopVal = 0;
41  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
42  if (Phi.getOperand(i + 1).getMBB() != Loop)
43  InitVal = Phi.getOperand(i).getReg();
44  else
45  LoopVal = Phi.getOperand(i).getReg();
46 
47  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
48 }
49 
50 /// Return the Phi register value that comes from the incoming block.
51 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
52  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
53  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
54  return Phi.getOperand(i).getReg();
55  return 0;
56 }
57 
58 /// Return the Phi register value that comes the loop block.
59 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
60  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
61  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
62  return Phi.getOperand(i).getReg();
63  return 0;
64 }
65 
67  BB = Schedule.getLoop()->getTopBlock();
68  Preheader = *BB->pred_begin();
69  if (Preheader == BB)
70  Preheader = *std::next(BB->pred_begin());
71 
72  // Iterate over the definitions in each instruction, and compute the
73  // stage difference for each use. Keep the maximum value.
74  for (MachineInstr *MI : Schedule.getInstructions()) {
75  int DefStage = Schedule.getStage(MI);
76  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
77  MachineOperand &Op = MI->getOperand(i);
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
85  EI = MRI.use_end();
86  UI != EI; ++UI) {
87  MachineOperand &UseOp = *UI;
88  MachineInstr *UseMI = UseOp.getParent();
89  int UseStage = Schedule.getStage(UseMI);
90  unsigned Diff = 0;
91  if (UseStage != -1 && UseStage >= DefStage)
92  Diff = UseStage - DefStage;
93  if (MI->isPHI()) {
94  if (isLoopCarried(*MI))
95  ++Diff;
96  else
97  PhiIsSwapped = true;
98  }
99  MaxDiff = std::max(Diff, MaxDiff);
100  }
101  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
102  }
103  }
104 
105  generatePipelinedLoop();
106 }
107 
108 void ModuloScheduleExpander::generatePipelinedLoop() {
110  assert(LoopInfo && "Must be able to analyze loop!");
111 
112  // Create a new basic block for the kernel and add it to the CFG.
114 
115  unsigned MaxStageCount = Schedule.getNumStages() - 1;
116 
117  // Remember the registers that are used in different stages. The index is
118  // the iteration, or stage, that the instruction is scheduled in. This is
119  // a map between register names in the original block and the names created
120  // in each stage of the pipelined loop.
121  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
122  InstrMapTy InstrMap;
123 
125 
126  // Generate the prolog instructions that set up the pipeline.
127  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
128  MF.insert(BB->getIterator(), KernelBB);
129 
130  // Rearrange the instructions to generate the new, pipelined loop,
131  // and update register names as needed.
132  for (MachineInstr *CI : Schedule.getInstructions()) {
133  if (CI->isPHI())
134  continue;
135  unsigned StageNum = Schedule.getStage(CI);
136  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
137  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
138  KernelBB->push_back(NewMI);
139  InstrMap[NewMI] = CI;
140  }
141 
142  // Copy any terminator instructions to the new kernel, and update
143  // names as needed.
145  E = BB->instr_end();
146  I != E; ++I) {
147  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
148  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
149  KernelBB->push_back(NewMI);
150  InstrMap[NewMI] = &*I;
151  }
152 
153  NewKernel = KernelBB;
154  KernelBB->transferSuccessors(BB);
155  KernelBB->replaceSuccessor(BB, KernelBB);
156 
157  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
158  InstrMap, MaxStageCount, MaxStageCount, false);
159  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
160  MaxStageCount, MaxStageCount, false);
161 
162  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
163 
165  // Generate the epilog instructions to complete the pipeline.
166  generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
167 
168  // We need this step because the register allocation doesn't handle some
169  // situations well, so we insert copies to help out.
170  splitLifetimes(KernelBB, EpilogBBs);
171 
172  // Remove dead instructions due to loop induction variables.
173  removeDeadInstructions(KernelBB, EpilogBBs);
174 
175  // Add branches between prolog and epilog blocks.
176  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
177 
178  delete[] VRMap;
179 }
180 
182  // Remove the original loop since it's no longer referenced.
183  for (auto &I : *BB)
185  BB->clear();
186  BB->eraseFromParent();
187 }
188 
189 /// Generate the pipeline prolog code.
190 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
191  MachineBasicBlock *KernelBB,
192  ValueMapTy *VRMap,
193  MBBVectorTy &PrologBBs) {
194  MachineBasicBlock *PredBB = Preheader;
195  InstrMapTy InstrMap;
196 
197  // Generate a basic block for each stage, not including the last stage,
198  // which will be generated in the kernel. Each basic block may contain
199  // instructions from multiple stages/iterations.
200  for (unsigned i = 0; i < LastStage; ++i) {
201  // Create and insert the prolog basic block prior to the original loop
202  // basic block. The original loop is removed later.
204  PrologBBs.push_back(NewBB);
205  MF.insert(BB->getIterator(), NewBB);
206  NewBB->transferSuccessors(PredBB);
207  PredBB->addSuccessor(NewBB);
208  PredBB = NewBB;
209 
210  // Generate instructions for each appropriate stage. Process instructions
211  // in original program order.
212  for (int StageNum = i; StageNum >= 0; --StageNum) {
213  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
214  BBE = BB->getFirstTerminator();
215  BBI != BBE; ++BBI) {
216  if (Schedule.getStage(&*BBI) == StageNum) {
217  if (BBI->isPHI())
218  continue;
219  MachineInstr *NewMI =
220  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
221  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
222  NewBB->push_back(NewMI);
223  InstrMap[NewMI] = &*BBI;
224  }
225  }
226  }
227  rewritePhiValues(NewBB, i, VRMap, InstrMap);
228  LLVM_DEBUG({
229  dbgs() << "prolog:\n";
230  NewBB->dump();
231  });
232  }
233 
234  PredBB->replaceSuccessor(BB, KernelBB);
235 
236  // Check if we need to remove the branch from the preheader to the original
237  // loop, and replace it with a branch to the new loop.
238  unsigned numBranches = TII->removeBranch(*Preheader);
239  if (numBranches) {
241  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
242  }
243 }
244 
245 /// Generate the pipeline epilog code. The epilog code finishes the iterations
246 /// that were started in either the prolog or the kernel. We create a basic
247 /// block for each stage that needs to complete.
248 void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
249  MachineBasicBlock *KernelBB,
250  ValueMapTy *VRMap,
251  MBBVectorTy &EpilogBBs,
252  MBBVectorTy &PrologBBs) {
253  // We need to change the branch from the kernel to the first epilog block, so
254  // this call to analyze branch uses the kernel rather than the original BB.
255  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
257  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
258  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
259  if (checkBranch)
260  return;
261 
262  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
263  if (*LoopExitI == KernelBB)
264  ++LoopExitI;
265  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
266  MachineBasicBlock *LoopExitBB = *LoopExitI;
267 
268  MachineBasicBlock *PredBB = KernelBB;
269  MachineBasicBlock *EpilogStart = LoopExitBB;
270  InstrMapTy InstrMap;
271 
272  // Generate a basic block for each stage, not including the last stage,
273  // which was generated for the kernel. Each basic block may contain
274  // instructions from multiple stages/iterations.
275  int EpilogStage = LastStage + 1;
276  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
278  EpilogBBs.push_back(NewBB);
279  MF.insert(BB->getIterator(), NewBB);
280 
281  PredBB->replaceSuccessor(LoopExitBB, NewBB);
282  NewBB->addSuccessor(LoopExitBB);
283 
284  if (EpilogStart == LoopExitBB)
285  EpilogStart = NewBB;
286 
287  // Add instructions to the epilog depending on the current block.
288  // Process instructions in original program order.
289  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
290  for (auto &BBI : *BB) {
291  if (BBI.isPHI())
292  continue;
293  MachineInstr *In = &BBI;
294  if ((unsigned)Schedule.getStage(In) == StageNum) {
295  // Instructions with memoperands in the epilog are updated with
296  // conservative values.
297  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
298  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
299  NewBB->push_back(NewMI);
300  InstrMap[NewMI] = In;
301  }
302  }
303  }
304  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
305  InstrMap, LastStage, EpilogStage, i == 1);
306  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
307  LastStage, EpilogStage, i == 1);
308  PredBB = NewBB;
309 
310  LLVM_DEBUG({
311  dbgs() << "epilog:\n";
312  NewBB->dump();
313  });
314  }
315 
316  // Fix any Phi nodes in the loop exit block.
317  LoopExitBB->replacePhiUsesWith(BB, PredBB);
318 
319  // Create a branch to the new epilog from the kernel.
320  // Remove the original branch and add a new branch to the epilog.
321  TII->removeBranch(*KernelBB);
322  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
323  // Add a branch to the loop exit.
324  if (EpilogBBs.size() > 0) {
325  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
327  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
328  }
329 }
330 
331 /// Replace all uses of FromReg that appear outside the specified
332 /// basic block with ToReg.
333 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
336  LiveIntervals &LIS) {
338  E = MRI.use_end();
339  I != E;) {
340  MachineOperand &O = *I;
341  ++I;
342  if (O.getParent()->getParent() != MBB)
343  O.setReg(ToReg);
344  }
345  if (!LIS.hasInterval(ToReg))
346  LIS.createEmptyInterval(ToReg);
347 }
348 
349 /// Return true if the register has a use that occurs outside the
350 /// specified loop.
351 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
354  E = MRI.use_end();
355  I != E; ++I)
356  if (I->getParent()->getParent() != BB)
357  return true;
358  return false;
359 }
360 
361 /// Generate Phis for the specific block in the generated pipelined code.
362 /// This function looks at the Phis from the original code to guide the
363 /// creation of new Phis.
364 void ModuloScheduleExpander::generateExistingPhis(
366  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
367  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
368  // Compute the stage number for the initial value of the Phi, which
369  // comes from the prolog. The prolog to use depends on to which kernel/
370  // epilog that we're adding the Phi.
371  unsigned PrologStage = 0;
372  unsigned PrevStage = 0;
373  bool InKernel = (LastStageNum == CurStageNum);
374  if (InKernel) {
375  PrologStage = LastStageNum - 1;
376  PrevStage = CurStageNum;
377  } else {
378  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
379  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
380  }
381 
382  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
383  BBE = BB->getFirstNonPHI();
384  BBI != BBE; ++BBI) {
385  Register Def = BBI->getOperand(0).getReg();
386 
387  unsigned InitVal = 0;
388  unsigned LoopVal = 0;
389  getPhiRegs(*BBI, BB, InitVal, LoopVal);
390 
391  unsigned PhiOp1 = 0;
392  // The Phi value from the loop body typically is defined in the loop, but
393  // not always. So, we need to check if the value is defined in the loop.
394  unsigned PhiOp2 = LoopVal;
395  if (VRMap[LastStageNum].count(LoopVal))
396  PhiOp2 = VRMap[LastStageNum][LoopVal];
397 
398  int StageScheduled = Schedule.getStage(&*BBI);
399  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
400  unsigned NumStages = getStagesForReg(Def, CurStageNum);
401  if (NumStages == 0) {
402  // We don't need to generate a Phi anymore, but we need to rename any uses
403  // of the Phi value.
404  unsigned NewReg = VRMap[PrevStage][LoopVal];
405  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
406  InitVal, NewReg);
407  if (VRMap[CurStageNum].count(LoopVal))
408  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
409  }
410  // Adjust the number of Phis needed depending on the number of prologs left,
411  // and the distance from where the Phi is first scheduled. The number of
412  // Phis cannot exceed the number of prolog stages. Each stage can
413  // potentially define two values.
414  unsigned MaxPhis = PrologStage + 2;
415  if (!InKernel && (int)PrologStage <= LoopValStage)
416  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
417  unsigned NumPhis = std::min(NumStages, MaxPhis);
418 
419  unsigned NewReg = 0;
420  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
421  // In the epilog, we may need to look back one stage to get the correct
422  // Phi name, because the epilog and prolog blocks execute the same stage.
423  // The correct name is from the previous block only when the Phi has
424  // been completely scheduled prior to the epilog, and Phi value is not
425  // needed in multiple stages.
426  int StageDiff = 0;
427  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
428  NumPhis == 1)
429  StageDiff = 1;
430  // Adjust the computations below when the phi and the loop definition
431  // are scheduled in different stages.
432  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
433  StageDiff = StageScheduled - LoopValStage;
434  for (unsigned np = 0; np < NumPhis; ++np) {
435  // If the Phi hasn't been scheduled, then use the initial Phi operand
436  // value. Otherwise, use the scheduled version of the instruction. This
437  // is a little complicated when a Phi references another Phi.
438  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
439  PhiOp1 = InitVal;
440  // Check if the Phi has already been scheduled in a prolog stage.
441  else if (PrologStage >= AccessStage + StageDiff + np &&
442  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
443  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
444  // Check if the Phi has already been scheduled, but the loop instruction
445  // is either another Phi, or doesn't occur in the loop.
446  else if (PrologStage >= AccessStage + StageDiff + np) {
447  // If the Phi references another Phi, we need to examine the other
448  // Phi to get the correct value.
449  PhiOp1 = LoopVal;
450  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
451  int Indirects = 1;
452  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
453  int PhiStage = Schedule.getStage(InstOp1);
454  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
455  PhiOp1 = getInitPhiReg(*InstOp1, BB);
456  else
457  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
458  InstOp1 = MRI.getVRegDef(PhiOp1);
459  int PhiOpStage = Schedule.getStage(InstOp1);
460  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
461  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
462  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
463  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
464  break;
465  }
466  ++Indirects;
467  }
468  } else
469  PhiOp1 = InitVal;
470  // If this references a generated Phi in the kernel, get the Phi operand
471  // from the incoming block.
472  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
473  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
474  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
475 
476  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
477  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
478  // In the epilog, a map lookup is needed to get the value from the kernel,
479  // or previous epilog block. How is does this depends on if the
480  // instruction is scheduled in the previous block.
481  if (!InKernel) {
482  int StageDiffAdj = 0;
483  if (LoopValStage != -1 && StageScheduled > LoopValStage)
484  StageDiffAdj = StageScheduled - LoopValStage;
485  // Use the loop value defined in the kernel, unless the kernel
486  // contains the last definition of the Phi.
487  if (np == 0 && PrevStage == LastStageNum &&
488  (StageScheduled != 0 || LoopValStage != 0) &&
489  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
490  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
491  // Use the value defined by the Phi. We add one because we switch
492  // from looking at the loop value to the Phi definition.
493  else if (np > 0 && PrevStage == LastStageNum &&
494  VRMap[PrevStage - np + 1].count(Def))
495  PhiOp2 = VRMap[PrevStage - np + 1][Def];
496  // Use the loop value defined in the kernel.
497  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
498  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
499  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
500  // Use the value defined by the Phi, unless we're generating the first
501  // epilog and the Phi refers to a Phi in a different stage.
502  else if (VRMap[PrevStage - np].count(Def) &&
503  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
504  (LoopValStage == StageScheduled)))
505  PhiOp2 = VRMap[PrevStage - np][Def];
506  }
507 
508  // Check if we can reuse an existing Phi. This occurs when a Phi
509  // references another Phi, and the other Phi is scheduled in an
510  // earlier stage. We can try to reuse an existing Phi up until the last
511  // stage of the current Phi.
512  if (LoopDefIsPhi) {
513  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
514  int LVNumStages = getStagesForPhi(LoopVal);
515  int StageDiff = (StageScheduled - LoopValStage);
516  LVNumStages -= StageDiff;
517  // Make sure the loop value Phi has been processed already.
518  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
519  NewReg = PhiOp2;
520  unsigned ReuseStage = CurStageNum;
521  if (isLoopCarried(*PhiInst))
522  ReuseStage -= LVNumStages;
523  // Check if the Phi to reuse has been generated yet. If not, then
524  // there is nothing to reuse.
525  if (VRMap[ReuseStage - np].count(LoopVal)) {
526  NewReg = VRMap[ReuseStage - np][LoopVal];
527 
528  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
529  Def, NewReg);
530  // Update the map with the new Phi name.
531  VRMap[CurStageNum - np][Def] = NewReg;
532  PhiOp2 = NewReg;
533  if (VRMap[LastStageNum - np - 1].count(LoopVal))
534  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
535 
536  if (IsLast && np == NumPhis - 1)
537  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
538  continue;
539  }
540  }
541  }
542  if (InKernel && StageDiff > 0 &&
543  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
544  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
545  }
546 
547  const TargetRegisterClass *RC = MRI.getRegClass(Def);
548  NewReg = MRI.createVirtualRegister(RC);
549 
550  MachineInstrBuilder NewPhi =
551  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
552  TII->get(TargetOpcode::PHI), NewReg);
553  NewPhi.addReg(PhiOp1).addMBB(BB1);
554  NewPhi.addReg(PhiOp2).addMBB(BB2);
555  if (np == 0)
556  InstrMap[NewPhi] = &*BBI;
557 
558  // We define the Phis after creating the new pipelined code, so
559  // we need to rename the Phi values in scheduled instructions.
560 
561  unsigned PrevReg = 0;
562  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
563  PrevReg = VRMap[PrevStage - np][LoopVal];
564  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
565  NewReg, PrevReg);
566  // If the Phi has been scheduled, use the new name for rewriting.
567  if (VRMap[CurStageNum - np].count(Def)) {
568  unsigned R = VRMap[CurStageNum - np][Def];
569  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
570  NewReg);
571  }
572 
573  // Check if we need to rename any uses that occurs after the loop. The
574  // register to replace depends on whether the Phi is scheduled in the
575  // epilog.
576  if (IsLast && np == NumPhis - 1)
577  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
578 
579  // In the kernel, a dependent Phi uses the value from this Phi.
580  if (InKernel)
581  PhiOp2 = NewReg;
582 
583  // Update the map with the new Phi name.
584  VRMap[CurStageNum - np][Def] = NewReg;
585  }
586 
587  while (NumPhis++ < NumStages) {
588  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
589  NewReg, 0);
590  }
591 
592  // Check if we need to rename a Phi that has been eliminated due to
593  // scheduling.
594  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
595  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
596  }
597 }
598 
599 /// Generate Phis for the specified block in the generated pipelined code.
600 /// These are new Phis needed because the definition is scheduled after the
601 /// use in the pipelined sequence.
602 void ModuloScheduleExpander::generatePhis(
604  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
605  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
606  // Compute the stage number that contains the initial Phi value, and
607  // the Phi from the previous stage.
608  unsigned PrologStage = 0;
609  unsigned PrevStage = 0;
610  unsigned StageDiff = CurStageNum - LastStageNum;
611  bool InKernel = (StageDiff == 0);
612  if (InKernel) {
613  PrologStage = LastStageNum - 1;
614  PrevStage = CurStageNum;
615  } else {
616  PrologStage = LastStageNum - StageDiff;
617  PrevStage = LastStageNum + StageDiff - 1;
618  }
619 
620  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
621  BBE = BB->instr_end();
622  BBI != BBE; ++BBI) {
623  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
624  MachineOperand &MO = BBI->getOperand(i);
625  if (!MO.isReg() || !MO.isDef() ||
627  continue;
628 
629  int StageScheduled = Schedule.getStage(&*BBI);
630  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
631  Register Def = MO.getReg();
632  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
633  // An instruction scheduled in stage 0 and is used after the loop
634  // requires a phi in the epilog for the last definition from either
635  // the kernel or prolog.
636  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
637  hasUseAfterLoop(Def, BB, MRI))
638  NumPhis = 1;
639  if (!InKernel && (unsigned)StageScheduled > PrologStage)
640  continue;
641 
642  unsigned PhiOp2 = VRMap[PrevStage][Def];
643  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
644  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
645  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
646  // The number of Phis can't exceed the number of prolog stages. The
647  // prolog stage number is zero based.
648  if (NumPhis > PrologStage + 1 - StageScheduled)
649  NumPhis = PrologStage + 1 - StageScheduled;
650  for (unsigned np = 0; np < NumPhis; ++np) {
651  unsigned PhiOp1 = VRMap[PrologStage][Def];
652  if (np <= PrologStage)
653  PhiOp1 = VRMap[PrologStage - np][Def];
654  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
655  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
656  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
657  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
658  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
659  }
660  if (!InKernel)
661  PhiOp2 = VRMap[PrevStage - np][Def];
662 
663  const TargetRegisterClass *RC = MRI.getRegClass(Def);
664  Register NewReg = MRI.createVirtualRegister(RC);
665 
666  MachineInstrBuilder NewPhi =
667  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
668  TII->get(TargetOpcode::PHI), NewReg);
669  NewPhi.addReg(PhiOp1).addMBB(BB1);
670  NewPhi.addReg(PhiOp2).addMBB(BB2);
671  if (np == 0)
672  InstrMap[NewPhi] = &*BBI;
673 
674  // Rewrite uses and update the map. The actions depend upon whether
675  // we generating code for the kernel or epilog blocks.
676  if (InKernel) {
677  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
678  NewReg);
679  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
680  NewReg);
681 
682  PhiOp2 = NewReg;
683  VRMap[PrevStage - np - 1][Def] = NewReg;
684  } else {
685  VRMap[CurStageNum - np][Def] = NewReg;
686  if (np == NumPhis - 1)
687  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
688  NewReg);
689  }
690  if (IsLast && np == NumPhis - 1)
691  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
692  }
693  }
694  }
695 }
696 
697 /// Remove instructions that generate values with no uses.
698 /// Typically, these are induction variable operations that generate values
699 /// used in the loop itself. A dead instruction has a definition with
700 /// no uses, or uses that occur in the original loop only.
701 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
702  MBBVectorTy &EpilogBBs) {
703  // For each epilog block, check that the value defined by each instruction
704  // is used. If not, delete it.
705  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
706  MBE = EpilogBBs.rend();
707  MBB != MBE; ++MBB)
708  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
709  ME = (*MBB)->instr_rend();
710  MI != ME;) {
711  // From DeadMachineInstructionElem. Don't delete inline assembly.
712  if (MI->isInlineAsm()) {
713  ++MI;
714  continue;
715  }
716  bool SawStore = false;
717  // Check if it's safe to remove the instruction due to side effects.
718  // We can, and want to, remove Phis here.
719  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
720  ++MI;
721  continue;
722  }
723  bool used = true;
724  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
725  MOE = MI->operands_end();
726  MOI != MOE; ++MOI) {
727  if (!MOI->isReg() || !MOI->isDef())
728  continue;
729  Register reg = MOI->getReg();
730  // Assume physical registers are used, unless they are marked dead.
731  if (Register::isPhysicalRegister(reg)) {
732  used = !MOI->isDead();
733  if (used)
734  break;
735  continue;
736  }
737  unsigned realUses = 0;
738  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
739  EI = MRI.use_end();
740  UI != EI; ++UI) {
741  // Check if there are any uses that occur only in the original
742  // loop. If so, that's not a real use.
743  if (UI->getParent()->getParent() != BB) {
744  realUses++;
745  used = true;
746  break;
747  }
748  }
749  if (realUses > 0)
750  break;
751  used = false;
752  }
753  if (!used) {
755  MI++->eraseFromParent();
756  continue;
757  }
758  ++MI;
759  }
760  // In the kernel block, check if we can remove a Phi that generates a value
761  // used in an instruction removed in the epilog block.
762  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
763  BBE = KernelBB->getFirstNonPHI();
764  BBI != BBE;) {
765  MachineInstr *MI = &*BBI;
766  ++BBI;
767  Register reg = MI->getOperand(0).getReg();
768  if (MRI.use_begin(reg) == MRI.use_end()) {
770  MI->eraseFromParent();
771  }
772  }
773 }
774 
775 /// For loop carried definitions, we split the lifetime of a virtual register
776 /// that has uses past the definition in the next iteration. A copy with a new
777 /// virtual register is inserted before the definition, which helps with
778 /// generating a better register assignment.
779 ///
780 /// v1 = phi(a, v2) v1 = phi(a, v2)
781 /// v2 = phi(b, v3) v2 = phi(b, v3)
782 /// v3 = .. v4 = copy v1
783 /// .. = V1 v3 = ..
784 /// .. = v4
785 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
786  MBBVectorTy &EpilogBBs) {
788  for (auto &PHI : KernelBB->phis()) {
789  Register Def = PHI.getOperand(0).getReg();
790  // Check for any Phi definition that used as an operand of another Phi
791  // in the same block.
793  E = MRI.use_instr_end();
794  I != E; ++I) {
795  if (I->isPHI() && I->getParent() == KernelBB) {
796  // Get the loop carried definition.
797  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
798  if (!LCDef)
799  continue;
800  MachineInstr *MI = MRI.getVRegDef(LCDef);
801  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
802  continue;
803  // Search through the rest of the block looking for uses of the Phi
804  // definition. If one occurs, then split the lifetime.
805  unsigned SplitReg = 0;
807  KernelBB->instr_end()))
808  if (BBJ.readsRegister(Def)) {
809  // We split the lifetime when we find the first use.
810  if (SplitReg == 0) {
811  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
812  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
813  TII->get(TargetOpcode::COPY), SplitReg)
814  .addReg(Def);
815  }
816  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
817  }
818  if (!SplitReg)
819  continue;
820  // Search through each of the epilog blocks for any uses to be renamed.
821  for (auto &Epilog : EpilogBBs)
822  for (auto &I : *Epilog)
823  if (I.readsRegister(Def))
824  I.substituteRegister(Def, SplitReg, 0, *TRI);
825  break;
826  }
827  }
828  }
829 }
830 
831 /// Remove the incoming block from the Phis in a basic block.
833  for (MachineInstr &MI : *BB) {
834  if (!MI.isPHI())
835  break;
836  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
837  if (MI.getOperand(i + 1).getMBB() == Incoming) {
838  MI.RemoveOperand(i + 1);
839  MI.RemoveOperand(i);
840  break;
841  }
842  }
843 }
844 
845 /// Create branches from each prolog basic block to the appropriate epilog
846 /// block. These edges are needed if the loop ends before reaching the
847 /// kernel.
848 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
849  MBBVectorTy &PrologBBs,
850  MachineBasicBlock *KernelBB,
851  MBBVectorTy &EpilogBBs,
852  ValueMapTy *VRMap) {
853  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
854  MachineBasicBlock *LastPro = KernelBB;
855  MachineBasicBlock *LastEpi = KernelBB;
856 
857  // Start from the blocks connected to the kernel and work "out"
858  // to the first prolog and the last epilog blocks.
860  unsigned MaxIter = PrologBBs.size() - 1;
861  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
862  // Add branches to the prolog that go to the corresponding
863  // epilog, and the fall-thru prolog/kernel block.
864  MachineBasicBlock *Prolog = PrologBBs[j];
865  MachineBasicBlock *Epilog = EpilogBBs[i];
866 
868  Optional<bool> StaticallyGreater =
869  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
870  unsigned numAdded = 0;
871  if (!StaticallyGreater.hasValue()) {
872  Prolog->addSuccessor(Epilog);
873  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
874  } else if (*StaticallyGreater == false) {
875  Prolog->addSuccessor(Epilog);
876  Prolog->removeSuccessor(LastPro);
877  LastEpi->removeSuccessor(Epilog);
878  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
879  removePhis(Epilog, LastEpi);
880  // Remove the blocks that are no longer referenced.
881  if (LastPro != LastEpi) {
882  LastEpi->clear();
883  LastEpi->eraseFromParent();
884  }
885  if (LastPro == KernelBB) {
886  LoopInfo->disposed();
887  NewKernel = nullptr;
888  }
889  LastPro->clear();
890  LastPro->eraseFromParent();
891  } else {
892  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
894  }
895  LastPro = Prolog;
896  LastEpi = Epilog;
897  for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
898  E = Prolog->instr_rend();
899  I != E && numAdded > 0; ++I, --numAdded)
900  updateInstruction(&*I, false, j, 0, VRMap);
901  }
902 
903  if (NewKernel) {
904  LoopInfo->setPreheader(PrologBBs[MaxIter]);
905  LoopInfo->adjustTripCount(-(MaxIter + 1));
906  }
907 }
908 
909 /// Return true if we can compute the amount the instruction changes
910 /// during each iteration. Set Delta to the amount of the change.
911 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
913  const MachineOperand *BaseOp;
914  int64_t Offset;
915  bool OffsetIsScalable;
916  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
917  return false;
918 
919  // FIXME: This algorithm assumes instructions have fixed-size offsets.
920  if (OffsetIsScalable)
921  return false;
922 
923  if (!BaseOp->isReg())
924  return false;
925 
926  Register BaseReg = BaseOp->getReg();
927 
928  MachineRegisterInfo &MRI = MF.getRegInfo();
929  // Check if there is a Phi. If so, get the definition in the loop.
930  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
931  if (BaseDef && BaseDef->isPHI()) {
932  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
933  BaseDef = MRI.getVRegDef(BaseReg);
934  }
935  if (!BaseDef)
936  return false;
937 
938  int D = 0;
939  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
940  return false;
941 
942  Delta = D;
943  return true;
944 }
945 
946 /// Update the memory operand with a new offset when the pipeliner
947 /// generates a new copy of the instruction that refers to a
948 /// different memory location.
949 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
950  MachineInstr &OldMI,
951  unsigned Num) {
952  if (Num == 0)
953  return;
954  // If the instruction has memory operands, then adjust the offset
955  // when the instruction appears in different stages.
956  if (NewMI.memoperands_empty())
957  return;
959  for (MachineMemOperand *MMO : NewMI.memoperands()) {
960  // TODO: Figure out whether isAtomic is really necessary (see D57601).
961  if (MMO->isVolatile() || MMO->isAtomic() ||
962  (MMO->isInvariant() && MMO->isDereferenceable()) ||
963  (!MMO->getValue())) {
964  NewMMOs.push_back(MMO);
965  continue;
966  }
967  unsigned Delta;
968  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
969  int64_t AdjOffset = Delta * Num;
970  NewMMOs.push_back(
971  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
972  } else {
973  NewMMOs.push_back(
975  }
976  }
977  NewMI.setMemRefs(MF, NewMMOs);
978 }
979 
980 /// Clone the instruction for the new pipelined loop and update the
981 /// memory operands, if needed.
982 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
983  unsigned CurStageNum,
984  unsigned InstStageNum) {
985  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
986  // Check for tied operands in inline asm instructions. This should be handled
987  // elsewhere, but I'm not sure of the best solution.
988  if (OldMI->isInlineAsm())
989  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
990  const auto &MO = OldMI->getOperand(i);
991  if (MO.isReg() && MO.isUse())
992  break;
993  unsigned UseIdx;
994  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
995  NewMI->tieOperands(i, UseIdx);
996  }
997  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
998  return NewMI;
999 }
1000 
1001 /// Clone the instruction for the new pipelined loop. If needed, this
1002 /// function updates the instruction using the values saved in the
1003 /// InstrChanges structure.
1004 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1005  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1006  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1007  auto It = InstrChanges.find(OldMI);
1008  if (It != InstrChanges.end()) {
1009  std::pair<unsigned, int64_t> RegAndOffset = It->second;
1010  unsigned BasePos, OffsetPos;
1011  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1012  return nullptr;
1013  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1014  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1015  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1016  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1017  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1018  }
1019  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1020  return NewMI;
1021 }
1022 
1023 /// Update the machine instruction with new virtual registers. This
1024 /// function may change the defintions and/or uses.
1025 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1026  bool LastDef,
1027  unsigned CurStageNum,
1028  unsigned InstrStageNum,
1029  ValueMapTy *VRMap) {
1030  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
1031  MachineOperand &MO = NewMI->getOperand(i);
1032  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1033  continue;
1034  Register reg = MO.getReg();
1035  if (MO.isDef()) {
1036  // Create a new virtual register for the definition.
1037  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1038  Register NewReg = MRI.createVirtualRegister(RC);
1039  MO.setReg(NewReg);
1040  VRMap[CurStageNum][reg] = NewReg;
1041  if (LastDef)
1042  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1043  } else if (MO.isUse()) {
1044  MachineInstr *Def = MRI.getVRegDef(reg);
1045  // Compute the stage that contains the last definition for instruction.
1046  int DefStageNum = Schedule.getStage(Def);
1047  unsigned StageNum = CurStageNum;
1048  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1049  // Compute the difference in stages between the defintion and the use.
1050  unsigned StageDiff = (InstrStageNum - DefStageNum);
1051  // Make an adjustment to get the last definition.
1052  StageNum -= StageDiff;
1053  }
1054  if (VRMap[StageNum].count(reg))
1055  MO.setReg(VRMap[StageNum][reg]);
1056  }
1057  }
1058 }
1059 
1060 /// Return the instruction in the loop that defines the register.
1061 /// If the definition is a Phi, then follow the Phi operand to
1062 /// the instruction in the loop.
1063 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1066  while (Def->isPHI()) {
1067  if (!Visited.insert(Def).second)
1068  break;
1069  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1070  if (Def->getOperand(i + 1).getMBB() == BB) {
1071  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1072  break;
1073  }
1074  }
1075  return Def;
1076 }
1077 
1078 /// Return the new name for the value from the previous stage.
1079 unsigned ModuloScheduleExpander::getPrevMapVal(
1080  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1081  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1082  unsigned PrevVal = 0;
1083  if (StageNum > PhiStage) {
1084  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1085  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1086  // The name is defined in the previous stage.
1087  PrevVal = VRMap[StageNum - 1][LoopVal];
1088  else if (VRMap[StageNum].count(LoopVal))
1089  // The previous name is defined in the current stage when the instruction
1090  // order is swapped.
1091  PrevVal = VRMap[StageNum][LoopVal];
1092  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1093  // The loop value hasn't yet been scheduled.
1094  PrevVal = LoopVal;
1095  else if (StageNum == PhiStage + 1)
1096  // The loop value is another phi, which has not been scheduled.
1097  PrevVal = getInitPhiReg(*LoopInst, BB);
1098  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1099  // The loop value is another phi, which has been scheduled.
1100  PrevVal =
1101  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1102  LoopStage, VRMap, BB);
1103  }
1104  return PrevVal;
1105 }
1106 
1107 /// Rewrite the Phi values in the specified block to use the mappings
1108 /// from the initial operand. Once the Phi is scheduled, we switch
1109 /// to using the loop value instead of the Phi value, so those names
1110 /// do not need to be rewritten.
1111 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1112  unsigned StageNum,
1113  ValueMapTy *VRMap,
1114  InstrMapTy &InstrMap) {
1115  for (auto &PHI : BB->phis()) {
1116  unsigned InitVal = 0;
1117  unsigned LoopVal = 0;
1118  getPhiRegs(PHI, BB, InitVal, LoopVal);
1119  Register PhiDef = PHI.getOperand(0).getReg();
1120 
1121  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1122  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1123  unsigned NumPhis = getStagesForPhi(PhiDef);
1124  if (NumPhis > StageNum)
1125  NumPhis = StageNum;
1126  for (unsigned np = 0; np <= NumPhis; ++np) {
1127  unsigned NewVal =
1128  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1129  if (!NewVal)
1130  NewVal = InitVal;
1131  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1132  NewVal);
1133  }
1134  }
1135 }
1136 
1137 /// Rewrite a previously scheduled instruction to use the register value
1138 /// from the new instruction. Make sure the instruction occurs in the
1139 /// basic block, and we don't change the uses in the new instruction.
1140 void ModuloScheduleExpander::rewriteScheduledInstr(
1141  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1142  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1143  unsigned PrevReg) {
1144  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1145  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1146  // Rewrite uses that have been scheduled already to use the new
1147  // Phi register.
1149  EI = MRI.use_end();
1150  UI != EI;) {
1151  MachineOperand &UseOp = *UI;
1152  MachineInstr *UseMI = UseOp.getParent();
1153  ++UI;
1154  if (UseMI->getParent() != BB)
1155  continue;
1156  if (UseMI->isPHI()) {
1157  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1158  continue;
1159  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1160  continue;
1161  }
1162  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1163  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1164  MachineInstr *OrigMI = OrigInstr->second;
1165  int StageSched = Schedule.getStage(OrigMI);
1166  int CycleSched = Schedule.getCycle(OrigMI);
1167  unsigned ReplaceReg = 0;
1168  // This is the stage for the scheduled instruction.
1169  if (StagePhi == StageSched && Phi->isPHI()) {
1170  int CyclePhi = Schedule.getCycle(Phi);
1171  if (PrevReg && InProlog)
1172  ReplaceReg = PrevReg;
1173  else if (PrevReg && !isLoopCarried(*Phi) &&
1174  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1175  ReplaceReg = PrevReg;
1176  else
1177  ReplaceReg = NewReg;
1178  }
1179  // The scheduled instruction occurs before the scheduled Phi, and the
1180  // Phi is not loop carried.
1181  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1182  ReplaceReg = NewReg;
1183  if (StagePhi > StageSched && Phi->isPHI())
1184  ReplaceReg = NewReg;
1185  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1186  ReplaceReg = NewReg;
1187  if (ReplaceReg) {
1188  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1189  UseOp.setReg(ReplaceReg);
1190  }
1191  }
1192 }
1193 
1194 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1195  if (!Phi.isPHI())
1196  return false;
1197  int DefCycle = Schedule.getCycle(&Phi);
1198  int DefStage = Schedule.getStage(&Phi);
1199 
1200  unsigned InitVal = 0;
1201  unsigned LoopVal = 0;
1202  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1203  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1204  if (!Use || Use->isPHI())
1205  return true;
1206  int LoopCycle = Schedule.getCycle(Use);
1207  int LoopStage = Schedule.getStage(Use);
1208  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1209 }
1210 
1211 //===----------------------------------------------------------------------===//
1212 // PeelingModuloScheduleExpander implementation
1213 //===----------------------------------------------------------------------===//
1214 // This is a reimplementation of ModuloScheduleExpander that works by creating
1215 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1216 //===----------------------------------------------------------------------===//
1217 
1218 namespace {
1219 // Remove any dead phis in MBB. Dead phis either have only one block as input
1220 // (in which case they are the identity) or have no uses.
1221 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1222  LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1223  bool Changed = true;
1224  while (Changed) {
1225  Changed = false;
1226  for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
1227  MachineInstr &MI = *I++;
1228  assert(MI.isPHI());
1229  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1230  if (LIS)
1232  MI.eraseFromParent();
1233  Changed = true;
1234  } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1235  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1236  MRI.getRegClass(MI.getOperand(0).getReg()));
1237  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1238  MI.getOperand(1).getReg());
1239  if (LIS)
1241  MI.eraseFromParent();
1242  Changed = true;
1243  }
1244  }
1245  }
1246 }
1247 
1248 /// Rewrites the kernel block in-place to adhere to the given schedule.
1249 /// KernelRewriter holds all of the state required to perform the rewriting.
1250 class KernelRewriter {
1251  ModuloSchedule &S;
1253  MachineBasicBlock *PreheaderBB, *ExitBB;
1255  const TargetInstrInfo *TII;
1256  LiveIntervals *LIS;
1257 
1258  // Map from register class to canonical undef register for that class.
1260  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1261  // this map is only used when InitReg is non-undef.
1263  // Map from LoopReg to phi register where the InitReg is undef.
1264  DenseMap<Register, Register> UndefPhis;
1265 
1266  // Reg is used by MI. Return the new register MI should use to adhere to the
1267  // schedule. Insert phis as necessary.
1268  Register remapUse(Register Reg, MachineInstr &MI);
1269  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1270  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1271  // or will be chosen so as to share another phi.
1272  Register phi(Register LoopReg, Optional<Register> InitReg = {},
1273  const TargetRegisterClass *RC = nullptr);
1274  // Create an undef register of the given register class.
1275  Register undef(const TargetRegisterClass *RC);
1276 
1277 public:
1278  KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1279  LiveIntervals *LIS = nullptr);
1280  void rewrite();
1281 };
1282 } // namespace
1283 
1284 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1285  LiveIntervals *LIS)
1286  : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
1287  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1288  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1289  PreheaderBB = *BB->pred_begin();
1290  if (PreheaderBB == BB)
1291  PreheaderBB = *std::next(BB->pred_begin());
1292 }
1293 
1294 void KernelRewriter::rewrite() {
1295  // Rearrange the loop to be in schedule order. Note that the schedule may
1296  // contain instructions that are not owned by the loop block (InstrChanges and
1297  // friends), so we gracefully handle unowned instructions and delete any
1298  // instructions that weren't in the schedule.
1299  auto InsertPt = BB->getFirstTerminator();
1300  MachineInstr *FirstMI = nullptr;
1301  for (MachineInstr *MI : S.getInstructions()) {
1302  if (MI->isPHI())
1303  continue;
1304  if (MI->getParent())
1305  MI->removeFromParent();
1306  BB->insert(InsertPt, MI);
1307  if (!FirstMI)
1308  FirstMI = MI;
1309  }
1310  assert(FirstMI && "Failed to find first MI in schedule");
1311 
1312  // At this point all of the scheduled instructions are between FirstMI
1313  // and the end of the block. Kill from the first non-phi to FirstMI.
1314  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1315  if (LIS)
1317  (I++)->eraseFromParent();
1318  }
1319 
1320  // Now remap every instruction in the loop.
1321  for (MachineInstr &MI : *BB) {
1322  if (MI.isPHI() || MI.isTerminator())
1323  continue;
1324  for (MachineOperand &MO : MI.uses()) {
1325  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1326  continue;
1327  Register Reg = remapUse(MO.getReg(), MI);
1328  MO.setReg(Reg);
1329  }
1330  }
1331  EliminateDeadPhis(BB, MRI, LIS);
1332 
1333  // Ensure a phi exists for all instructions that are either referenced by
1334  // an illegal phi or by an instruction outside the loop. This allows us to
1335  // treat remaps of these values the same as "normal" values that come from
1336  // loop-carried phis.
1337  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1338  if (MI->isPHI()) {
1339  Register R = MI->getOperand(0).getReg();
1340  phi(R);
1341  continue;
1342  }
1343 
1344  for (MachineOperand &Def : MI->defs()) {
1345  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1346  if (MI.getParent() != BB) {
1347  phi(Def.getReg());
1348  break;
1349  }
1350  }
1351  }
1352  }
1353 }
1354 
1355 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1356  MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1357  if (!Producer)
1358  return Reg;
1359 
1360  int ConsumerStage = S.getStage(&MI);
1361  if (!Producer->isPHI()) {
1362  // Non-phi producers are simple to remap. Insert as many phis as the
1363  // difference between the consumer and producer stages.
1364  if (Producer->getParent() != BB)
1365  // Producer was not inside the loop. Use the register as-is.
1366  return Reg;
1367  int ProducerStage = S.getStage(Producer);
1368  assert(ConsumerStage != -1 &&
1369  "In-loop consumer should always be scheduled!");
1370  assert(ConsumerStage >= ProducerStage);
1371  unsigned StageDiff = ConsumerStage - ProducerStage;
1372 
1373  for (unsigned I = 0; I < StageDiff; ++I)
1374  Reg = phi(Reg);
1375  return Reg;
1376  }
1377 
1378  // First, dive through the phi chain to find the defaults for the generated
1379  // phis.
1380  SmallVector<Optional<Register>, 4> Defaults;
1381  Register LoopReg = Reg;
1382  auto LoopProducer = Producer;
1383  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1384  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1385  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1386  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1387  assert(LoopProducer);
1388  }
1389  int LoopProducerStage = S.getStage(LoopProducer);
1390 
1391  Optional<Register> IllegalPhiDefault;
1392 
1393  if (LoopProducerStage == -1) {
1394  // Do nothing.
1395  } else if (LoopProducerStage > ConsumerStage) {
1396  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1397  // In addition, Consumer's cycle must be scheduled after Producer in the
1398  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1399  // functions.
1400 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1401  int LoopProducerCycle = S.getCycle(LoopProducer);
1402  int ConsumerCycle = S.getCycle(&MI);
1403 #endif
1404  assert(LoopProducerCycle <= ConsumerCycle);
1405  assert(LoopProducerStage == ConsumerStage + 1);
1406  // Peel off the first phi from Defaults and insert a phi between producer
1407  // and consumer. This phi will not be at the front of the block so we
1408  // consider it illegal. It will only exist during the rewrite process; it
1409  // needs to exist while we peel off prologs because these could take the
1410  // default value. After that we can replace all uses with the loop producer
1411  // value.
1412  IllegalPhiDefault = Defaults.front();
1413  Defaults.erase(Defaults.begin());
1414  } else {
1415  assert(ConsumerStage >= LoopProducerStage);
1416  int StageDiff = ConsumerStage - LoopProducerStage;
1417  if (StageDiff > 0) {
1418  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1419  << " to " << (Defaults.size() + StageDiff) << "\n");
1420  // If we need more phis than we have defaults for, pad out with undefs for
1421  // the earliest phis, which are at the end of the defaults chain (the
1422  // chain is in reverse order).
1423  Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1424  ? Optional<Register>()
1425  : Defaults.back());
1426  }
1427  }
1428 
1429  // Now we know the number of stages to jump back, insert the phi chain.
1430  auto DefaultI = Defaults.rbegin();
1431  while (DefaultI != Defaults.rend())
1432  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1433 
1434  if (IllegalPhiDefault.hasValue()) {
1435  // The consumer optionally consumes LoopProducer in the same iteration
1436  // (because the producer is scheduled at an earlier cycle than the consumer)
1437  // or the initial value. To facilitate this we create an illegal block here
1438  // by embedding a phi in the middle of the block. We will fix this up
1439  // immediately prior to pruning.
1440  auto RC = MRI.getRegClass(Reg);
1442  MachineInstr *IllegalPhi =
1443  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1444  .addReg(IllegalPhiDefault.getValue())
1445  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1446  .addReg(LoopReg)
1447  .addMBB(BB); // Block choice is arbitrary and has no effect.
1448  // Illegal phi should belong to the producer stage so that it can be
1449  // filtered correctly during peeling.
1450  S.setStage(IllegalPhi, LoopProducerStage);
1451  return R;
1452  }
1453 
1454  return LoopReg;
1455 }
1456 
1458  const TargetRegisterClass *RC) {
1459  // If the init register is not undef, try and find an existing phi.
1460  if (InitReg.hasValue()) {
1461  auto I = Phis.find({LoopReg, InitReg.getValue()});
1462  if (I != Phis.end())
1463  return I->second;
1464  } else {
1465  for (auto &KV : Phis) {
1466  if (KV.first.first == LoopReg)
1467  return KV.second;
1468  }
1469  }
1470 
1471  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1472  // find a phi that takes undef as input.
1473  auto I = UndefPhis.find(LoopReg);
1474  if (I != UndefPhis.end()) {
1475  Register R = I->second;
1476  if (!InitReg.hasValue())
1477  // Found a phi taking undef as input, and this input is undef so return
1478  // without any more changes.
1479  return R;
1480  // Found a phi taking undef as input, so rewrite it to take InitReg.
1482  MI->getOperand(1).setReg(InitReg.getValue());
1483  Phis.insert({{LoopReg, InitReg.getValue()}, R});
1485  UndefPhis.erase(I);
1486  return R;
1487  }
1488 
1489  // Failed to find any existing phi to reuse, so create a new one.
1490  if (!RC)
1491  RC = MRI.getRegClass(LoopReg);
1493  if (InitReg.hasValue())
1494  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1495  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1496  .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1497  .addMBB(PreheaderBB)
1498  .addReg(LoopReg)
1499  .addMBB(BB);
1500  if (!InitReg.hasValue())
1501  UndefPhis[LoopReg] = R;
1502  else
1503  Phis[{LoopReg, *InitReg}] = R;
1504  return R;
1505 }
1506 
1507 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1508  Register &R = Undefs[RC];
1509  if (R == 0) {
1510  // Create an IMPLICIT_DEF that defines this register if we need it.
1511  // All uses of this should be removed by the time we have finished unrolling
1512  // prologs and epilogs.
1513  R = MRI.createVirtualRegister(RC);
1514  auto *InsertBB = &PreheaderBB->getParent()->front();
1515  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1516  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1517  }
1518  return R;
1519 }
1520 
1521 namespace {
1522 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1523 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1524 /// through and defaults for these phis.
1525 class KernelOperandInfo {
1528  SmallVector<Register, 4> PhiDefaults;
1531 
1532 public:
1533  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1534  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1535  : MRI(MRI) {
1536  Source = MO;
1537  BB = MO->getParent()->getParent();
1538  while (isRegInLoop(MO)) {
1539  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1540  if (MI->isFullCopy()) {
1541  MO = &MI->getOperand(1);
1542  continue;
1543  }
1544  if (!MI->isPHI())
1545  break;
1546  // If this is an illegal phi, don't count it in distance.
1547  if (IllegalPhis.count(MI)) {
1548  MO = &MI->getOperand(3);
1549  continue;
1550  }
1551 
1553  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1554  : &MI->getOperand(3);
1555  PhiDefaults.push_back(Default);
1556  }
1557  Target = MO;
1558  }
1559 
1560  bool operator==(const KernelOperandInfo &Other) const {
1561  return PhiDefaults.size() == Other.PhiDefaults.size();
1562  }
1563 
1564  void print(raw_ostream &OS) const {
1565  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1566  << *Source->getParent();
1567  }
1568 
1569 private:
1570  bool isRegInLoop(MachineOperand *MO) {
1571  return MO->isReg() && MO->getReg().isVirtual() &&
1572  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1573  }
1574 };
1575 } // namespace
1576 
1578 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1579  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1580  if (LPD == LPD_Front)
1581  PeeledFront.push_back(NewBB);
1582  else
1583  PeeledBack.push_front(NewBB);
1584  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1585  ++I, ++NI) {
1586  CanonicalMIs[&*I] = &*I;
1587  CanonicalMIs[&*NI] = &*I;
1588  BlockMIs[{NewBB, &*I}] = &*NI;
1589  BlockMIs[{BB, &*I}] = &*I;
1590  }
1591  return NewBB;
1592 }
1593 
1595  int MinStage) {
1596  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1597  I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1598  MachineInstr *MI = &*I++;
1599  int Stage = getStage(MI);
1600  if (Stage == -1 || Stage >= MinStage)
1601  continue;
1602 
1603  for (MachineOperand &DefMO : MI->defs()) {
1605  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1606  // Only PHIs can use values from this block by construction.
1607  // Match with the equivalent PHI in B.
1608  assert(UseMI.isPHI());
1609  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1610  MI->getParent());
1611  Subs.emplace_back(&UseMI, Reg);
1612  }
1613  for (auto &Sub : Subs)
1614  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1616  }
1617  if (LIS)
1619  MI->eraseFromParent();
1620  }
1621 }
1622 
1624  MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1625  auto InsertPt = DestBB->getFirstNonPHI();
1627  for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
1628  MachineInstr *MI = &*I++;
1629  if (MI->isPHI()) {
1630  // This is an illegal PHI. If we move any instructions using an illegal
1631  // PHI, we need to create a legal Phi.
1632  if (getStage(MI) != Stage) {
1633  // The legal Phi is not necessary if the illegal phi's stage
1634  // is being moved.
1635  Register PhiR = MI->getOperand(0).getReg();
1636  auto RC = MRI.getRegClass(PhiR);
1638  MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1639  DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1640  .addReg(PhiR)
1641  .addMBB(SourceBB);
1642  BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
1643  CanonicalMIs[NI] = CanonicalMIs[MI];
1644  Remaps[PhiR] = NR;
1645  }
1646  }
1647  if (getStage(MI) != Stage)
1648  continue;
1649  MI->removeFromParent();
1650  DestBB->insert(InsertPt, MI);
1651  auto *KernelMI = CanonicalMIs[MI];
1652  BlockMIs[{DestBB, KernelMI}] = MI;
1653  BlockMIs.erase({SourceBB, KernelMI});
1654  }
1655  SmallVector<MachineInstr *, 4> PhiToDelete;
1656  for (MachineInstr &MI : DestBB->phis()) {
1657  assert(MI.getNumOperands() == 3);
1658  MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1659  // If the instruction referenced by the phi is moved inside the block
1660  // we don't need the phi anymore.
1661  if (getStage(Def) == Stage) {
1662  Register PhiReg = MI.getOperand(0).getReg();
1663  assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1664  MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1665  MI.getOperand(0).setReg(PhiReg);
1666  PhiToDelete.push_back(&MI);
1667  }
1668  }
1669  for (auto *P : PhiToDelete)
1670  P->eraseFromParent();
1671  InsertPt = DestBB->getFirstNonPHI();
1672  // Helper to clone Phi instructions into the destination block. We clone Phi
1673  // greedily to avoid combinatorial explosion of Phi instructions.
1674  auto clonePhi = [&](MachineInstr *Phi) {
1675  MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1676  DestBB->insert(InsertPt, NewMI);
1677  Register OrigR = Phi->getOperand(0).getReg();
1679  NewMI->getOperand(0).setReg(R);
1680  NewMI->getOperand(1).setReg(OrigR);
1681  NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1682  Remaps[OrigR] = R;
1683  CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1684  BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1686  return R;
1687  };
1688  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1689  for (MachineOperand &MO : I->uses()) {
1690  if (!MO.isReg())
1691  continue;
1692  if (Remaps.count(MO.getReg()))
1693  MO.setReg(Remaps[MO.getReg()]);
1694  else {
1695  // If we are using a phi from the source block we need to add a new phi
1696  // pointing to the old one.
1698  if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1699  Register R = clonePhi(Use);
1700  MO.setReg(R);
1701  }
1702  }
1703  }
1704  }
1705 }
1706 
1707 Register
1709  MachineInstr *Phi) {
1710  unsigned distance = PhiNodeLoopIteration[Phi];
1711  MachineInstr *CanonicalUse = CanonicalPhi;
1712  Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1713  for (unsigned I = 0; I < distance; ++I) {
1714  assert(CanonicalUse->isPHI());
1715  assert(CanonicalUse->getNumOperands() == 5);
1716  unsigned LoopRegIdx = 3, InitRegIdx = 1;
1717  if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1718  std::swap(LoopRegIdx, InitRegIdx);
1719  CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1720  CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1721  }
1722  return CanonicalUseReg;
1723 }
1724 
1726  BitVector LS(Schedule.getNumStages(), true);
1727  BitVector AS(Schedule.getNumStages(), true);
1728  LiveStages[BB] = LS;
1729  AvailableStages[BB] = AS;
1730 
1731  // Peel out the prologs.
1732  LS.reset();
1733  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1734  LS[I] = 1;
1735  Prologs.push_back(peelKernel(LPD_Front));
1736  LiveStages[Prologs.back()] = LS;
1737  AvailableStages[Prologs.back()] = LS;
1738  }
1739 
1740  // Create a block that will end up as the new loop exiting block (dominated by
1741  // all prologs and epilogs). It will only contain PHIs, in the same order as
1742  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1743  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1744  // property that any value deffed in BB but used outside of BB is used by a
1745  // PHI in the exiting block.
1747  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1748  // Push out the epilogs, again in reverse order.
1749  // We can't assume anything about the minumum loop trip count at this point,
1750  // so emit a fairly complex epilog.
1751 
1752  // We first peel number of stages minus one epilogue. Then we remove dead
1753  // stages and reorder instructions based on their stage. If we have 3 stages
1754  // we generate first:
1755  // E0[3, 2, 1]
1756  // E1[3', 2']
1757  // E2[3'']
1758  // And then we move instructions based on their stages to have:
1759  // E0[3]
1760  // E1[2, 3']
1761  // E2[1, 2', 3'']
1762  // The transformation is legal because we only move instructions past
1763  // instructions of a previous loop iteration.
1764  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1765  Epilogs.push_back(peelKernel(LPD_Back));
1766  MachineBasicBlock *B = Epilogs.back();
1768  // Keep track at which iteration each phi belongs to. We need it to know
1769  // what version of the variable to use during prologue/epilogue stitching.
1770  EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1771  for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
1773  }
1774  for (size_t I = 0; I < Epilogs.size(); I++) {
1775  LS.reset();
1776  for (size_t J = I; J < Epilogs.size(); J++) {
1777  int Iteration = J;
1778  unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1779  // Move stage one block at a time so that Phi nodes are updated correctly.
1780  for (size_t K = Iteration; K > I; K--)
1781  moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1782  LS[Stage] = 1;
1783  }
1784  LiveStages[Epilogs[I]] = LS;
1785  AvailableStages[Epilogs[I]] = AS;
1786  }
1787 
1788  // Now we've defined all the prolog and epilog blocks as a fallthrough
1789  // sequence, add the edges that will be followed if the loop trip count is
1790  // lower than the number of stages (connecting prologs directly with epilogs).
1791  auto PI = Prologs.begin();
1792  auto EI = Epilogs.begin();
1793  assert(Prologs.size() == Epilogs.size());
1794  for (; PI != Prologs.end(); ++PI, ++EI) {
1795  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1796  (*PI)->addSuccessor(*EI);
1797  for (MachineInstr &MI : (*EI)->phis()) {
1798  Register Reg = MI.getOperand(1).getReg();
1800  if (Use && Use->getParent() == Pred) {
1801  MachineInstr *CanonicalUse = CanonicalMIs[Use];
1802  if (CanonicalUse->isPHI()) {
1803  // If the use comes from a phi we need to skip as many phi as the
1804  // distance between the epilogue and the kernel. Trace through the phi
1805  // chain to find the right value.
1806  Reg = getPhiCanonicalReg(CanonicalUse, Use);
1807  }
1809  }
1810  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1811  MI.addOperand(MachineOperand::CreateMBB(*PI));
1812  }
1813  }
1814 
1815  // Create a list of all blocks in order.
1817  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1818  Blocks.push_back(BB);
1819  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1820 
1821  // Iterate in reverse order over all instructions, remapping as we go.
1822  for (MachineBasicBlock *B : reverse(Blocks)) {
1823  for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1824  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1825  MachineInstr *MI = &*I++;
1826  rewriteUsesOf(MI);
1827  }
1828  }
1829  for (auto *MI : IllegalPhisToDelete) {
1830  if (LIS)
1832  MI->eraseFromParent();
1833  }
1835 
1836  // Now all remapping has been done, we're free to optimize the generated code.
1837  for (MachineBasicBlock *B : reverse(Blocks))
1838  EliminateDeadPhis(B, MRI, LIS);
1839  EliminateDeadPhis(ExitingBB, MRI, LIS);
1840 }
1841 
1843  MachineFunction &MF = *BB->getParent();
1844  MachineBasicBlock *Exit = *BB->succ_begin();
1845  if (Exit == BB)
1846  Exit = *std::next(BB->succ_begin());
1847 
1849  MF.insert(std::next(BB->getIterator()), NewBB);
1850 
1851  // Clone all phis in BB into NewBB and rewrite.
1852  for (MachineInstr &MI : BB->phis()) {
1853  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1854  Register OldR = MI.getOperand(3).getReg();
1857  for (MachineInstr &Use : MRI.use_instructions(OldR))
1858  if (Use.getParent() != BB)
1859  Uses.push_back(&Use);
1860  for (MachineInstr *Use : Uses)
1861  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1863  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1864  .addReg(OldR)
1865  .addMBB(BB);
1866  BlockMIs[{NewBB, &MI}] = NI;
1867  CanonicalMIs[NI] = &MI;
1868  }
1869  BB->replaceSuccessor(Exit, NewBB);
1870  Exit->replacePhiUsesWith(BB, NewBB);
1871  NewBB->addSuccessor(Exit);
1872 
1873  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1875  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1876  (void)CanAnalyzeBr;
1877  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1878  TII->removeBranch(*BB);
1879  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1880  Cond, DebugLoc());
1881  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1882  return NewBB;
1883 }
1884 
1885 Register
1887  MachineBasicBlock *BB) {
1889  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1890  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1891 }
1892 
1894  if (MI->isPHI()) {
1895  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1896  // and it is produced by this block.
1897  Register PhiR = MI->getOperand(0).getReg();
1898  Register R = MI->getOperand(3).getReg();
1899  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1900  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1901  R = MI->getOperand(1).getReg();
1902  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1903  MRI.replaceRegWith(PhiR, R);
1904  // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1905  // later to figure out how to remap registers.
1906  MI->getOperand(0).setReg(PhiR);
1907  IllegalPhisToDelete.push_back(MI);
1908  return;
1909  }
1910 
1911  int Stage = getStage(MI);
1912  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1913  LiveStages[MI->getParent()].test(Stage))
1914  // Instruction is live, no rewriting to do.
1915  return;
1916 
1917  for (MachineOperand &DefMO : MI->defs()) {
1919  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1920  // Only PHIs can use values from this block by construction.
1921  // Match with the equivalent PHI in B.
1922  assert(UseMI.isPHI());
1923  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1924  MI->getParent());
1925  Subs.emplace_back(&UseMI, Reg);
1926  }
1927  for (auto &Sub : Subs)
1928  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1930  }
1931  if (LIS)
1933  MI->eraseFromParent();
1934 }
1935 
1937  // Work outwards from the kernel.
1938  bool KernelDisposed = false;
1939  int TC = Schedule.getNumStages() - 1;
1940  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1941  ++PI, ++EI, --TC) {
1942  MachineBasicBlock *Prolog = *PI;
1943  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1944  MachineBasicBlock *Epilog = *EI;
1946  TII->removeBranch(*Prolog);
1947  Optional<bool> StaticallyGreater =
1948  LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1949  if (!StaticallyGreater.hasValue()) {
1950  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1951  // Dynamically branch based on Cond.
1952  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1953  } else if (*StaticallyGreater == false) {
1954  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1955  // Prolog never falls through; branch to epilog and orphan interior
1956  // blocks. Leave it to unreachable-block-elim to clean up.
1957  Prolog->removeSuccessor(Fallthrough);
1958  for (MachineInstr &P : Fallthrough->phis()) {
1959  P.RemoveOperand(2);
1960  P.RemoveOperand(1);
1961  }
1963  KernelDisposed = true;
1964  } else {
1965  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1966  // Prolog always falls through; remove incoming values in epilog.
1967  Prolog->removeSuccessor(Epilog);
1968  for (MachineInstr &P : Epilog->phis()) {
1969  P.RemoveOperand(4);
1970  P.RemoveOperand(3);
1971  }
1972  }
1973  }
1974 
1975  if (!KernelDisposed) {
1976  LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1977  LoopInfo->setPreheader(Prologs.back());
1978  } else {
1979  LoopInfo->disposed();
1980  }
1981 }
1982 
1984  KernelRewriter KR(*Schedule.getLoop(), Schedule);
1985  KR.rewrite();
1986 }
1987 
1989  BB = Schedule.getLoop()->getTopBlock();
1993  assert(LoopInfo);
1994 
1995  rewriteKernel();
1997  fixupBranches();
1998 }
1999 
2001  BB = Schedule.getLoop()->getTopBlock();
2003 
2004  // Dump the schedule before we invalidate and remap all its instructions.
2005  // Stash it in a string so we can print it if we found an error.
2006  std::string ScheduleDump;
2007  raw_string_ostream OS(ScheduleDump);
2008  Schedule.print(OS);
2009  OS.flush();
2010 
2011  // First, run the normal ModuleScheduleExpander. We don't support any
2012  // InstrChanges.
2013  assert(LIS && "Requires LiveIntervals!");
2016  MSE.expand();
2017  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2018  if (!ExpandedKernel) {
2019  // The expander optimized away the kernel. We can't do any useful checking.
2020  MSE.cleanup();
2021  return;
2022  }
2023  // Before running the KernelRewriter, re-add BB into the CFG.
2025 
2026  // Now run the new expansion algorithm.
2027  KernelRewriter KR(*Schedule.getLoop(), Schedule);
2028  KR.rewrite();
2030 
2031  // Collect all illegal phis that the new algorithm created. We'll give these
2032  // to KernelOperandInfo.
2033  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2034  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2035  if (NI->isPHI())
2036  IllegalPhis.insert(&*NI);
2037  }
2038 
2039  // Co-iterate across both kernels. We expect them to be identical apart from
2040  // phis and full COPYs (we look through both).
2042  auto OI = ExpandedKernel->begin();
2043  auto NI = BB->begin();
2044  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2045  while (OI->isPHI() || OI->isFullCopy())
2046  ++OI;
2047  while (NI->isPHI() || NI->isFullCopy())
2048  ++NI;
2049  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2050  // Analyze every operand separately.
2051  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2052  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2053  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2054  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2055  }
2056 
2057  bool Failed = false;
2058  for (auto &OldAndNew : KOIs) {
2059  if (OldAndNew.first == OldAndNew.second)
2060  continue;
2061  Failed = true;
2062  errs() << "Modulo kernel validation error: [\n";
2063  errs() << " [golden] ";
2064  OldAndNew.first.print(errs());
2065  errs() << " ";
2066  OldAndNew.second.print(errs());
2067  errs() << "]\n";
2068  }
2069 
2070  if (Failed) {
2071  errs() << "Golden reference kernel:\n";
2072  ExpandedKernel->print(errs());
2073  errs() << "New kernel:\n";
2074  BB->print(errs());
2075  errs() << ScheduleDump;
2077  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2078  }
2079 
2080  // Cleanup by removing BB from the CFG again as the original
2081  // ModuloScheduleExpander intended.
2083  MSE.cleanup();
2084 }
2085 
2086 //===----------------------------------------------------------------------===//
2087 // ModuloScheduleTestPass implementation
2088 //===----------------------------------------------------------------------===//
2089 // This pass constructs a ModuloSchedule from its module and runs
2090 // ModuloScheduleExpander.
2091 //
2092 // The module is expected to contain a single-block analyzable loop.
2093 // The total order of instructions is taken from the loop as-is.
2094 // Instructions are expected to be annotated with a PostInstrSymbol.
2095 // This PostInstrSymbol must have the following format:
2096 // "Stage=%d Cycle=%d".
2097 //===----------------------------------------------------------------------===//
2098 
2099 namespace {
2100 class ModuloScheduleTest : public MachineFunctionPass {
2101 public:
2102  static char ID;
2103 
2104  ModuloScheduleTest() : MachineFunctionPass(ID) {
2106  }
2107 
2108  bool runOnMachineFunction(MachineFunction &MF) override;
2109  void runOnLoop(MachineFunction &MF, MachineLoop &L);
2110 
2111  void getAnalysisUsage(AnalysisUsage &AU) const override {
2113  AU.addRequired<LiveIntervals>();
2115  }
2116 };
2117 } // namespace
2118 
2119 char ModuloScheduleTest::ID = 0;
2120 
2121 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2122  "Modulo Schedule test pass", false, false)
2125 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2126  "Modulo Schedule test pass", false, false)
2127 
2128 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2129  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2130  for (auto *L : MLI) {
2131  if (L->getTopBlock() != L->getBottomBlock())
2132  continue;
2133  runOnLoop(MF, *L);
2134  return false;
2135  }
2136  return false;
2137 }
2138 
2139 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2140  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2141  std::pair<StringRef, StringRef> StageTokenAndValue =
2142  getToken(StageAndCycle.first, "-");
2143  std::pair<StringRef, StringRef> CycleTokenAndValue =
2144  getToken(StageAndCycle.second, "-");
2145  if (StageTokenAndValue.first != "Stage" ||
2146  CycleTokenAndValue.first != "_Cycle") {
2148  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2149  return;
2150  }
2151 
2152  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2153  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2154 
2155  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2156 }
2157 
2158 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2159  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2161  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2162 
2163  DenseMap<MachineInstr *, int> Cycle, Stage;
2164  std::vector<MachineInstr *> Instrs;
2165  for (MachineInstr &MI : *BB) {
2166  if (MI.isTerminator())
2167  continue;
2168  Instrs.push_back(&MI);
2169  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2170  dbgs() << "Parsing post-instr symbol for " << MI;
2171  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2172  }
2173  }
2174 
2175  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2176  std::move(Stage));
2178  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2179  MSE.expand();
2180  MSE.cleanup();
2181 }
2182 
2183 //===----------------------------------------------------------------------===//
2184 // ModuloScheduleTestAnnotater implementation
2185 //===----------------------------------------------------------------------===//
2186 
2188  for (MachineInstr *MI : S.getInstructions()) {
2190  raw_svector_ostream OS(SV);
2191  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2192  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2193  MI->setPostInstrSymbol(MF, Sym);
2194  }
2195 }
i
i
Definition: README.txt:29
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1071
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:100
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:788
llvm::PeelingModuloScheduleExpander::rewriteKernel
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition: ModuloSchedule.cpp:1983
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:181
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:78
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:507
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:851
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
ErrorHandling.h
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:153
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:269
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:652
llvm::LPD_Front
@ LPD_Front
Peel the first iteration of the loop.
Definition: MachineLoopUtils.h:19
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1369
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:756
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1556
llvm::PeelingModuloScheduleExpander::Epilogs
SmallVector< MachineBasicBlock *, 4 > Epilogs
Definition: ModuloSchedule.h:305
llvm::PeelingModuloScheduleExpander::getStage
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
Definition: ModuloSchedule.h:357
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:732
llvm::Optional< bool >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:894
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
llvm::PeelingModuloScheduleExpander::TII
const TargetInstrInfo * TII
Definition: ModuloSchedule.h:297
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:66
llvm::MachineLoop::getBottomBlock
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout,...
Definition: MachineLoopInfo.cpp:76
llvm::PeelingModuloScheduleExpander::PeeledFront
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
Definition: ModuloSchedule.h:323
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::PeelingModuloScheduleExpander::moveStageBetweenBlocks
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition: ModuloSchedule.cpp:1623
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:292
llvm::TargetInstrInfo::insertBranch
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
Definition: TargetInstrInfo.h:683
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:286
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:743
llvm::ModuloSchedule::dump
void dump()
Definition: ModuloSchedule.h:153
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:752
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::PeelingModuloScheduleExpander::AvailableStages
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
Definition: ModuloSchedule.h:311
llvm::MachineBasicBlock::getFirstInstrTerminator
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Definition: MachineBasicBlock.cpp:248
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:157
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:377
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::TargetInstrInfo::removeBranch
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
Definition: TargetInstrInfo.h:665
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:147
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:177
llvm::PeelingModuloScheduleExpander::Schedule
ModuloSchedule & Schedule
Definition: ModuloSchedule.h:293
llvm::PeelingModuloScheduleExpander::PeeledBack
std::deque< MachineBasicBlock * > PeeledBack
Definition: ModuloSchedule.h:323
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::MachineInstr::setMemRefs
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
Definition: MachineInstr.cpp:371
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:334
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:367
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::PeelingModuloScheduleExpander::PhiNodeLoopIteration
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
Definition: ModuloSchedule.h:314
MCContext.h
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1355
test
Definition: README.txt:37
llvm::PeelingModuloScheduleExpander::BlockMIs
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Definition: ModuloSchedule.h:320
llvm::PeelingModuloScheduleExpander::BB
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
Definition: ModuloSchedule.h:301
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::PeelingModuloScheduleExpander::Prologs
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
Definition: ModuloSchedule.h:305
pass
modulo schedule Modulo Schedule test pass
Definition: ModuloSchedule.cpp:2126
llvm::PeelingModuloScheduleExpander::filterInstructions
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition: ModuloSchedule.cpp:1594
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:183
llvm::ModuloSchedule::getInstructions
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
Definition: ModuloSchedule.h:151
llvm::PeelSingleBlockLoop
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Definition: MachineLoopUtils.cpp:27
llvm::BitVector
Definition: BitVector.h:74
CombinerObjective::Default
@ Default
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:235
llvm::PeelingModuloScheduleExpander::peelKernel
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition: ModuloSchedule.cpp:1578
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
parseSymbolString
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition: ModuloSchedule.cpp:2139
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:95
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:739
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1341
llvm::PeelingModuloScheduleExpander::Preheader
MachineBasicBlock * Preheader
The original loop preheader.
Definition: ModuloSchedule.h:303
llvm::PeelingModuloScheduleExpander::fixupBranches
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition: ModuloSchedule.cpp:1936
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:306
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::ModuloScheduleExpander::getRewrittenKernel
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Definition: ModuloSchedule.h:274
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::PeelingModuloScheduleExpander::rewriteUsesOf
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition: ModuloSchedule.cpp:1893
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1588
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2187
LiveIntervals.h
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:813
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:358
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
StringExtras.h
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:199
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:414
replaceRegUsesAfterLoop
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition: ModuloSchedule.cpp:333
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetInstrInfo::insertUnconditionalBranch
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
Definition: TargetInstrInfo.h:691
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:332
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:958
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1243
llvm::PeelingModuloScheduleExpander::CreateLCSSAExitingBlock
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition: ModuloSchedule.cpp:1842
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::LoopPeelDirection
LoopPeelDirection
Definition: MachineLoopUtils.h:18
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:252
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::MachineInstr::isRegTiedToUseOperand
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
Definition: MachineInstr.h:1534
llvm::PeelingModuloScheduleExpander::getEquivalentRegisterIn
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition: ModuloSchedule.cpp:1886
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::MachineBasicBlock::phis
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
Definition: MachineBasicBlock.h:296
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:549
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
llvm::MachineInstr::isInlineAsm
bool isInlineAsm() const
Definition: MachineInstr.h:1249
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:672
hasUseAfterLoop
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition: ModuloSchedule.cpp:351
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1988
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:464
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: ModuloSchedule.cpp:35
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:759
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:372
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LiveIntervals::createEmptyInterval
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Definition: LiveIntervals.h:131
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: ModuloSchedule.cpp:59
llvm::MachineBasicBlock::transferSuccessors
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
Definition: MachineBasicBlock.cpp:862
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:124
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:261
llvm::LPD_Back
@ LPD_Back
Peel the last iteration of the loop.
Definition: MachineLoopUtils.h:20
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
operator==
bool operator==(const StringView &LHS, const StringView &RHS)
Definition: StringView.h:119
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:804
llvm::PeelingModuloScheduleExpander::LIS
LiveIntervals * LIS
Definition: ModuloSchedule.h:298
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: MachineBasicBlock.cpp:1383
llvm::PeelingModuloScheduleExpander::MRI
MachineRegisterInfo & MRI
Definition: ModuloSchedule.h:296
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:781
Epilog
@ Epilog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:124
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1335
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:612
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:245
llvm::ModuloSchedule::getCycle
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
Definition: ModuloSchedule.h:139
llvm::ModuloSchedule::getNumStages
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
Definition: ModuloSchedule.h:122
llvm::PeelingModuloScheduleExpander::IllegalPhisToDelete
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
Definition: ModuloSchedule.h:325
llvm::ModuloSchedule::getLoop
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
Definition: ModuloSchedule.h:118
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:709
llvm::PeelingModuloScheduleExpander::peelPrologAndEpilogs
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition: ModuloSchedule.cpp:1725
llvm::PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition: ModuloSchedule.cpp:2000
getInitPhiReg
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition: ModuloSchedule.cpp:51
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:467
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:72
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator
DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
Definition: DenseMap.h:70
llvm::ModuloSchedule::print
void print(raw_ostream &OS)
Definition: ModuloSchedule.cpp:24
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::PeelingModuloScheduleExpander::LiveStages
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
Definition: ModuloSchedule.h:307
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:481
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:686
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:647
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
llvm::MachineRegisterInfo::defusechain_iterator
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
Definition: MachineRegisterInfo.h:266
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:679
llvm::initializeModuloScheduleTestPass
void initializeModuloScheduleTestPass(PassRegistry &)
llvm::PeelingModuloScheduleExpander::MF
MachineFunction & MF
Definition: ModuloSchedule.h:294
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:159
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:125
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:330
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
llvm::ModuloSchedule::getStage
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
Definition: ModuloSchedule.h:133
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1099
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:480
llvm::PeelingModuloScheduleExpander::getPhiCanonicalReg
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition: ModuloSchedule.cpp:1708
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:58
removePhis
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Definition: ModuloSchedule.cpp:832
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1161
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PeelingModuloScheduleExpander::CanonicalMIs
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
Definition: ModuloSchedule.h:318