LLVM  16.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "pipeliner"
23 using namespace llvm;
24 
26  for (MachineInstr *MI : ScheduledInstrs)
27  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
28 }
29 
30 //===----------------------------------------------------------------------===//
31 // ModuloScheduleExpander implementation
32 //===----------------------------------------------------------------------===//
33 
34 /// Return the register values for the operands of a Phi instruction.
35 /// This function assume the instruction is a Phi.
37  unsigned &InitVal, unsigned &LoopVal) {
38  assert(Phi.isPHI() && "Expecting a Phi.");
39 
40  InitVal = 0;
41  LoopVal = 0;
42  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
43  if (Phi.getOperand(i + 1).getMBB() != Loop)
44  InitVal = Phi.getOperand(i).getReg();
45  else
46  LoopVal = Phi.getOperand(i).getReg();
47 
48  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
49 }
50 
51 /// Return the Phi register value that comes from the incoming block.
52 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
53  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
54  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
55  return Phi.getOperand(i).getReg();
56  return 0;
57 }
58 
59 /// Return the Phi register value that comes the loop block.
60 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
61  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
62  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
63  return Phi.getOperand(i).getReg();
64  return 0;
65 }
66 
68  BB = Schedule.getLoop()->getTopBlock();
69  Preheader = *BB->pred_begin();
70  if (Preheader == BB)
71  Preheader = *std::next(BB->pred_begin());
72 
73  // Iterate over the definitions in each instruction, and compute the
74  // stage difference for each use. Keep the maximum value.
75  for (MachineInstr *MI : Schedule.getInstructions()) {
76  int DefStage = Schedule.getStage(MI);
77  for (const MachineOperand &Op : MI->operands()) {
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
84  for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
85  MachineInstr *UseMI = UseOp.getParent();
86  int UseStage = Schedule.getStage(UseMI);
87  unsigned Diff = 0;
88  if (UseStage != -1 && UseStage >= DefStage)
89  Diff = UseStage - DefStage;
90  if (MI->isPHI()) {
91  if (isLoopCarried(*MI))
92  ++Diff;
93  else
94  PhiIsSwapped = true;
95  }
96  MaxDiff = std::max(Diff, MaxDiff);
97  }
98  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
99  }
100  }
101 
102  generatePipelinedLoop();
103 }
104 
105 void ModuloScheduleExpander::generatePipelinedLoop() {
107  assert(LoopInfo && "Must be able to analyze loop!");
108 
109  // Create a new basic block for the kernel and add it to the CFG.
111 
112  unsigned MaxStageCount = Schedule.getNumStages() - 1;
113 
114  // Remember the registers that are used in different stages. The index is
115  // the iteration, or stage, that the instruction is scheduled in. This is
116  // a map between register names in the original block and the names created
117  // in each stage of the pipelined loop.
118  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
119 
120  // The renaming destination by Phis for the registers across stages.
121  // This map is updated during Phis generation to point to the most recent
122  // renaming destination.
123  ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
124 
125  InstrMapTy InstrMap;
126 
128 
129  // Generate the prolog instructions that set up the pipeline.
130  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
131  MF.insert(BB->getIterator(), KernelBB);
132 
133  // Rearrange the instructions to generate the new, pipelined loop,
134  // and update register names as needed.
135  for (MachineInstr *CI : Schedule.getInstructions()) {
136  if (CI->isPHI())
137  continue;
138  unsigned StageNum = Schedule.getStage(CI);
139  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
140  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
141  KernelBB->push_back(NewMI);
142  InstrMap[NewMI] = CI;
143  }
144 
145  // Copy any terminator instructions to the new kernel, and update
146  // names as needed.
147  for (MachineInstr &MI : BB->terminators()) {
148  MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
149  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
150  KernelBB->push_back(NewMI);
151  InstrMap[NewMI] = &MI;
152  }
153 
154  NewKernel = KernelBB;
155  KernelBB->transferSuccessors(BB);
156  KernelBB->replaceSuccessor(BB, KernelBB);
157 
158  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
159  InstrMap, MaxStageCount, MaxStageCount, false);
160  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
161  InstrMap, MaxStageCount, MaxStageCount, false);
162 
163  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
164 
166  // Generate the epilog instructions to complete the pipeline.
167  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
168  PrologBBs);
169 
170  // We need this step because the register allocation doesn't handle some
171  // situations well, so we insert copies to help out.
172  splitLifetimes(KernelBB, EpilogBBs);
173 
174  // Remove dead instructions due to loop induction variables.
175  removeDeadInstructions(KernelBB, EpilogBBs);
176 
177  // Add branches between prolog and epilog blocks.
178  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
179 
180  delete[] VRMap;
181  delete[] VRMapPhi;
182 }
183 
185  // Remove the original loop since it's no longer referenced.
186  for (auto &I : *BB)
188  BB->clear();
189  BB->eraseFromParent();
190 }
191 
192 /// Generate the pipeline prolog code.
193 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
194  MachineBasicBlock *KernelBB,
195  ValueMapTy *VRMap,
196  MBBVectorTy &PrologBBs) {
197  MachineBasicBlock *PredBB = Preheader;
198  InstrMapTy InstrMap;
199 
200  // Generate a basic block for each stage, not including the last stage,
201  // which will be generated in the kernel. Each basic block may contain
202  // instructions from multiple stages/iterations.
203  for (unsigned i = 0; i < LastStage; ++i) {
204  // Create and insert the prolog basic block prior to the original loop
205  // basic block. The original loop is removed later.
207  PrologBBs.push_back(NewBB);
208  MF.insert(BB->getIterator(), NewBB);
209  NewBB->transferSuccessors(PredBB);
210  PredBB->addSuccessor(NewBB);
211  PredBB = NewBB;
212 
213  // Generate instructions for each appropriate stage. Process instructions
214  // in original program order.
215  for (int StageNum = i; StageNum >= 0; --StageNum) {
216  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
217  BBE = BB->getFirstTerminator();
218  BBI != BBE; ++BBI) {
219  if (Schedule.getStage(&*BBI) == StageNum) {
220  if (BBI->isPHI())
221  continue;
222  MachineInstr *NewMI =
223  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
224  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
225  NewBB->push_back(NewMI);
226  InstrMap[NewMI] = &*BBI;
227  }
228  }
229  }
230  rewritePhiValues(NewBB, i, VRMap, InstrMap);
231  LLVM_DEBUG({
232  dbgs() << "prolog:\n";
233  NewBB->dump();
234  });
235  }
236 
237  PredBB->replaceSuccessor(BB, KernelBB);
238 
239  // Check if we need to remove the branch from the preheader to the original
240  // loop, and replace it with a branch to the new loop.
241  unsigned numBranches = TII->removeBranch(*Preheader);
242  if (numBranches) {
244  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
245  }
246 }
247 
248 /// Generate the pipeline epilog code. The epilog code finishes the iterations
249 /// that were started in either the prolog or the kernel. We create a basic
250 /// block for each stage that needs to complete.
251 void ModuloScheduleExpander::generateEpilog(
252  unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
253  ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
254  MBBVectorTy &PrologBBs) {
255  // We need to change the branch from the kernel to the first epilog block, so
256  // this call to analyze branch uses the kernel rather than the original BB.
257  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
259  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
260  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
261  if (checkBranch)
262  return;
263 
264  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
265  if (*LoopExitI == KernelBB)
266  ++LoopExitI;
267  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
268  MachineBasicBlock *LoopExitBB = *LoopExitI;
269 
270  MachineBasicBlock *PredBB = KernelBB;
271  MachineBasicBlock *EpilogStart = LoopExitBB;
272  InstrMapTy InstrMap;
273 
274  // Generate a basic block for each stage, not including the last stage,
275  // which was generated for the kernel. Each basic block may contain
276  // instructions from multiple stages/iterations.
277  int EpilogStage = LastStage + 1;
278  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
280  EpilogBBs.push_back(NewBB);
281  MF.insert(BB->getIterator(), NewBB);
282 
283  PredBB->replaceSuccessor(LoopExitBB, NewBB);
284  NewBB->addSuccessor(LoopExitBB);
285 
286  if (EpilogStart == LoopExitBB)
287  EpilogStart = NewBB;
288 
289  // Add instructions to the epilog depending on the current block.
290  // Process instructions in original program order.
291  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
292  for (auto &BBI : *BB) {
293  if (BBI.isPHI())
294  continue;
295  MachineInstr *In = &BBI;
296  if ((unsigned)Schedule.getStage(In) == StageNum) {
297  // Instructions with memoperands in the epilog are updated with
298  // conservative values.
299  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
300  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
301  NewBB->push_back(NewMI);
302  InstrMap[NewMI] = In;
303  }
304  }
305  }
306  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
307  InstrMap, LastStage, EpilogStage, i == 1);
308  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
309  InstrMap, LastStage, EpilogStage, i == 1);
310  PredBB = NewBB;
311 
312  LLVM_DEBUG({
313  dbgs() << "epilog:\n";
314  NewBB->dump();
315  });
316  }
317 
318  // Fix any Phi nodes in the loop exit block.
319  LoopExitBB->replacePhiUsesWith(BB, PredBB);
320 
321  // Create a branch to the new epilog from the kernel.
322  // Remove the original branch and add a new branch to the epilog.
323  TII->removeBranch(*KernelBB);
324  assert((OrigBB == TBB || OrigBB == FBB) &&
325  "Unable to determine looping branch direction");
326  if (OrigBB != TBB)
327  TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
328  else
329  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
330  // Add a branch to the loop exit.
331  if (EpilogBBs.size() > 0) {
332  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
334  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
335  }
336 }
337 
338 /// Replace all uses of FromReg that appear outside the specified
339 /// basic block with ToReg.
340 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
343  LiveIntervals &LIS) {
344  for (MachineOperand &O :
346  if (O.getParent()->getParent() != MBB)
347  O.setReg(ToReg);
348  if (!LIS.hasInterval(ToReg))
349  LIS.createEmptyInterval(ToReg);
350 }
351 
352 /// Return true if the register has a use that occurs outside the
353 /// specified loop.
354 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
356  for (const MachineOperand &MO : MRI.use_operands(Reg))
357  if (MO.getParent()->getParent() != BB)
358  return true;
359  return false;
360 }
361 
362 /// Generate Phis for the specific block in the generated pipelined code.
363 /// This function looks at the Phis from the original code to guide the
364 /// creation of new Phis.
365 void ModuloScheduleExpander::generateExistingPhis(
367  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
368  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
369  // Compute the stage number for the initial value of the Phi, which
370  // comes from the prolog. The prolog to use depends on to which kernel/
371  // epilog that we're adding the Phi.
372  unsigned PrologStage = 0;
373  unsigned PrevStage = 0;
374  bool InKernel = (LastStageNum == CurStageNum);
375  if (InKernel) {
376  PrologStage = LastStageNum - 1;
377  PrevStage = CurStageNum;
378  } else {
379  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
380  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
381  }
382 
383  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
384  BBE = BB->getFirstNonPHI();
385  BBI != BBE; ++BBI) {
386  Register Def = BBI->getOperand(0).getReg();
387 
388  unsigned InitVal = 0;
389  unsigned LoopVal = 0;
390  getPhiRegs(*BBI, BB, InitVal, LoopVal);
391 
392  unsigned PhiOp1 = 0;
393  // The Phi value from the loop body typically is defined in the loop, but
394  // not always. So, we need to check if the value is defined in the loop.
395  unsigned PhiOp2 = LoopVal;
396  if (VRMap[LastStageNum].count(LoopVal))
397  PhiOp2 = VRMap[LastStageNum][LoopVal];
398 
399  int StageScheduled = Schedule.getStage(&*BBI);
400  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
401  unsigned NumStages = getStagesForReg(Def, CurStageNum);
402  if (NumStages == 0) {
403  // We don't need to generate a Phi anymore, but we need to rename any uses
404  // of the Phi value.
405  unsigned NewReg = VRMap[PrevStage][LoopVal];
406  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
407  InitVal, NewReg);
408  if (VRMap[CurStageNum].count(LoopVal))
409  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
410  }
411  // Adjust the number of Phis needed depending on the number of prologs left,
412  // and the distance from where the Phi is first scheduled. The number of
413  // Phis cannot exceed the number of prolog stages. Each stage can
414  // potentially define two values.
415  unsigned MaxPhis = PrologStage + 2;
416  if (!InKernel && (int)PrologStage <= LoopValStage)
417  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
418  unsigned NumPhis = std::min(NumStages, MaxPhis);
419 
420  unsigned NewReg = 0;
421  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
422  // In the epilog, we may need to look back one stage to get the correct
423  // Phi name, because the epilog and prolog blocks execute the same stage.
424  // The correct name is from the previous block only when the Phi has
425  // been completely scheduled prior to the epilog, and Phi value is not
426  // needed in multiple stages.
427  int StageDiff = 0;
428  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
429  NumPhis == 1)
430  StageDiff = 1;
431  // Adjust the computations below when the phi and the loop definition
432  // are scheduled in different stages.
433  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
434  StageDiff = StageScheduled - LoopValStage;
435  for (unsigned np = 0; np < NumPhis; ++np) {
436  // If the Phi hasn't been scheduled, then use the initial Phi operand
437  // value. Otherwise, use the scheduled version of the instruction. This
438  // is a little complicated when a Phi references another Phi.
439  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
440  PhiOp1 = InitVal;
441  // Check if the Phi has already been scheduled in a prolog stage.
442  else if (PrologStage >= AccessStage + StageDiff + np &&
443  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
444  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
445  // Check if the Phi has already been scheduled, but the loop instruction
446  // is either another Phi, or doesn't occur in the loop.
447  else if (PrologStage >= AccessStage + StageDiff + np) {
448  // If the Phi references another Phi, we need to examine the other
449  // Phi to get the correct value.
450  PhiOp1 = LoopVal;
451  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
452  int Indirects = 1;
453  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
454  int PhiStage = Schedule.getStage(InstOp1);
455  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
456  PhiOp1 = getInitPhiReg(*InstOp1, BB);
457  else
458  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
459  InstOp1 = MRI.getVRegDef(PhiOp1);
460  int PhiOpStage = Schedule.getStage(InstOp1);
461  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
462  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
463  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
464  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
465  break;
466  }
467  ++Indirects;
468  }
469  } else
470  PhiOp1 = InitVal;
471  // If this references a generated Phi in the kernel, get the Phi operand
472  // from the incoming block.
473  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
474  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
475  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
476 
477  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
478  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
479  // In the epilog, a map lookup is needed to get the value from the kernel,
480  // or previous epilog block. How is does this depends on if the
481  // instruction is scheduled in the previous block.
482  if (!InKernel) {
483  int StageDiffAdj = 0;
484  if (LoopValStage != -1 && StageScheduled > LoopValStage)
485  StageDiffAdj = StageScheduled - LoopValStage;
486  // Use the loop value defined in the kernel, unless the kernel
487  // contains the last definition of the Phi.
488  if (np == 0 && PrevStage == LastStageNum &&
489  (StageScheduled != 0 || LoopValStage != 0) &&
490  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
491  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
492  // Use the value defined by the Phi. We add one because we switch
493  // from looking at the loop value to the Phi definition.
494  else if (np > 0 && PrevStage == LastStageNum &&
495  VRMap[PrevStage - np + 1].count(Def))
496  PhiOp2 = VRMap[PrevStage - np + 1][Def];
497  // Use the loop value defined in the kernel.
498  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
499  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
500  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
501  // Use the value defined by the Phi, unless we're generating the first
502  // epilog and the Phi refers to a Phi in a different stage.
503  else if (VRMap[PrevStage - np].count(Def) &&
504  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
505  (LoopValStage == StageScheduled)))
506  PhiOp2 = VRMap[PrevStage - np][Def];
507  }
508 
509  // Check if we can reuse an existing Phi. This occurs when a Phi
510  // references another Phi, and the other Phi is scheduled in an
511  // earlier stage. We can try to reuse an existing Phi up until the last
512  // stage of the current Phi.
513  if (LoopDefIsPhi) {
514  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
515  int LVNumStages = getStagesForPhi(LoopVal);
516  int StageDiff = (StageScheduled - LoopValStage);
517  LVNumStages -= StageDiff;
518  // Make sure the loop value Phi has been processed already.
519  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
520  NewReg = PhiOp2;
521  unsigned ReuseStage = CurStageNum;
522  if (isLoopCarried(*PhiInst))
523  ReuseStage -= LVNumStages;
524  // Check if the Phi to reuse has been generated yet. If not, then
525  // there is nothing to reuse.
526  if (VRMap[ReuseStage - np].count(LoopVal)) {
527  NewReg = VRMap[ReuseStage - np][LoopVal];
528 
529  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
530  Def, NewReg);
531  // Update the map with the new Phi name.
532  VRMap[CurStageNum - np][Def] = NewReg;
533  PhiOp2 = NewReg;
534  if (VRMap[LastStageNum - np - 1].count(LoopVal))
535  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
536 
537  if (IsLast && np == NumPhis - 1)
538  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
539  continue;
540  }
541  }
542  }
543  if (InKernel && StageDiff > 0 &&
544  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
545  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
546  }
547 
548  const TargetRegisterClass *RC = MRI.getRegClass(Def);
549  NewReg = MRI.createVirtualRegister(RC);
550 
551  MachineInstrBuilder NewPhi =
552  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
553  TII->get(TargetOpcode::PHI), NewReg);
554  NewPhi.addReg(PhiOp1).addMBB(BB1);
555  NewPhi.addReg(PhiOp2).addMBB(BB2);
556  if (np == 0)
557  InstrMap[NewPhi] = &*BBI;
558 
559  // We define the Phis after creating the new pipelined code, so
560  // we need to rename the Phi values in scheduled instructions.
561 
562  unsigned PrevReg = 0;
563  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
564  PrevReg = VRMap[PrevStage - np][LoopVal];
565  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
566  NewReg, PrevReg);
567  // If the Phi has been scheduled, use the new name for rewriting.
568  if (VRMap[CurStageNum - np].count(Def)) {
569  unsigned R = VRMap[CurStageNum - np][Def];
570  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
571  NewReg);
572  }
573 
574  // Check if we need to rename any uses that occurs after the loop. The
575  // register to replace depends on whether the Phi is scheduled in the
576  // epilog.
577  if (IsLast && np == NumPhis - 1)
578  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
579 
580  // In the kernel, a dependent Phi uses the value from this Phi.
581  if (InKernel)
582  PhiOp2 = NewReg;
583 
584  // Update the map with the new Phi name.
585  VRMap[CurStageNum - np][Def] = NewReg;
586  }
587 
588  while (NumPhis++ < NumStages) {
589  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
590  NewReg, 0);
591  }
592 
593  // Check if we need to rename a Phi that has been eliminated due to
594  // scheduling.
595  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
596  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
597  }
598 }
599 
600 /// Generate Phis for the specified block in the generated pipelined code.
601 /// These are new Phis needed because the definition is scheduled after the
602 /// use in the pipelined sequence.
603 void ModuloScheduleExpander::generatePhis(
605  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
606  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
607  bool IsLast) {
608  // Compute the stage number that contains the initial Phi value, and
609  // the Phi from the previous stage.
610  unsigned PrologStage = 0;
611  unsigned PrevStage = 0;
612  unsigned StageDiff = CurStageNum - LastStageNum;
613  bool InKernel = (StageDiff == 0);
614  if (InKernel) {
615  PrologStage = LastStageNum - 1;
616  PrevStage = CurStageNum;
617  } else {
618  PrologStage = LastStageNum - StageDiff;
619  PrevStage = LastStageNum + StageDiff - 1;
620  }
621 
622  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
623  BBE = BB->instr_end();
624  BBI != BBE; ++BBI) {
625  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
626  MachineOperand &MO = BBI->getOperand(i);
627  if (!MO.isReg() || !MO.isDef() ||
629  continue;
630 
631  int StageScheduled = Schedule.getStage(&*BBI);
632  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
633  Register Def = MO.getReg();
634  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
635  // An instruction scheduled in stage 0 and is used after the loop
636  // requires a phi in the epilog for the last definition from either
637  // the kernel or prolog.
638  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
639  hasUseAfterLoop(Def, BB, MRI))
640  NumPhis = 1;
641  if (!InKernel && (unsigned)StageScheduled > PrologStage)
642  continue;
643 
644  unsigned PhiOp2;
645  if (InKernel) {
646  PhiOp2 = VRMap[PrevStage][Def];
647  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
648  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
649  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
650  }
651  // The number of Phis can't exceed the number of prolog stages. The
652  // prolog stage number is zero based.
653  if (NumPhis > PrologStage + 1 - StageScheduled)
654  NumPhis = PrologStage + 1 - StageScheduled;
655  for (unsigned np = 0; np < NumPhis; ++np) {
656  // Example for
657  // Org:
658  // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
659  //
660  // Prolog0 (Stage0):
661  // %Clone0 = ...
662  // Prolog1 (Stage1):
663  // %Clone1 = ...
664  // Kernel (Stage2):
665  // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
666  // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
667  // %Clone2 = ...
668  // Epilog0 (Stage3):
669  // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
670  // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
671  // Epilog1 (Stage4):
672  // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
673  //
674  // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
675  // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
676  // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
677 
678  unsigned PhiOp1 = VRMap[PrologStage][Def];
679  if (np <= PrologStage)
680  PhiOp1 = VRMap[PrologStage - np][Def];
681  if (!InKernel) {
682  if (PrevStage == LastStageNum && np == 0)
683  PhiOp2 = VRMap[LastStageNum][Def];
684  else
685  PhiOp2 = VRMapPhi[PrevStage - np][Def];
686  }
687 
688  const TargetRegisterClass *RC = MRI.getRegClass(Def);
689  Register NewReg = MRI.createVirtualRegister(RC);
690 
691  MachineInstrBuilder NewPhi =
692  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
693  TII->get(TargetOpcode::PHI), NewReg);
694  NewPhi.addReg(PhiOp1).addMBB(BB1);
695  NewPhi.addReg(PhiOp2).addMBB(BB2);
696  if (np == 0)
697  InstrMap[NewPhi] = &*BBI;
698 
699  // Rewrite uses and update the map. The actions depend upon whether
700  // we generating code for the kernel or epilog blocks.
701  if (InKernel) {
702  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
703  NewReg);
704  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
705  NewReg);
706 
707  PhiOp2 = NewReg;
708  VRMapPhi[PrevStage - np - 1][Def] = NewReg;
709  } else {
710  VRMapPhi[CurStageNum - np][Def] = NewReg;
711  if (np == NumPhis - 1)
712  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
713  NewReg);
714  }
715  if (IsLast && np == NumPhis - 1)
716  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
717  }
718  }
719  }
720 }
721 
722 /// Remove instructions that generate values with no uses.
723 /// Typically, these are induction variable operations that generate values
724 /// used in the loop itself. A dead instruction has a definition with
725 /// no uses, or uses that occur in the original loop only.
726 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
727  MBBVectorTy &EpilogBBs) {
728  // For each epilog block, check that the value defined by each instruction
729  // is used. If not, delete it.
730  for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
732  ME = MBB->instr_rend();
733  MI != ME;) {
734  // From DeadMachineInstructionElem. Don't delete inline assembly.
735  if (MI->isInlineAsm()) {
736  ++MI;
737  continue;
738  }
739  bool SawStore = false;
740  // Check if it's safe to remove the instruction due to side effects.
741  // We can, and want to, remove Phis here.
742  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
743  ++MI;
744  continue;
745  }
746  bool used = true;
747  for (const MachineOperand &MO : MI->operands()) {
748  if (!MO.isReg() || !MO.isDef())
749  continue;
750  Register reg = MO.getReg();
751  // Assume physical registers are used, unless they are marked dead.
752  if (Register::isPhysicalRegister(reg)) {
753  used = !MO.isDead();
754  if (used)
755  break;
756  continue;
757  }
758  unsigned realUses = 0;
759  for (const MachineOperand &U : MRI.use_operands(reg)) {
760  // Check if there are any uses that occur only in the original
761  // loop. If so, that's not a real use.
762  if (U.getParent()->getParent() != BB) {
763  realUses++;
764  used = true;
765  break;
766  }
767  }
768  if (realUses > 0)
769  break;
770  used = false;
771  }
772  if (!used) {
774  MI++->eraseFromParent();
775  continue;
776  }
777  ++MI;
778  }
779  // In the kernel block, check if we can remove a Phi that generates a value
780  // used in an instruction removed in the epilog block.
781  for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
782  Register reg = MI.getOperand(0).getReg();
783  if (MRI.use_begin(reg) == MRI.use_end()) {
785  MI.eraseFromParent();
786  }
787  }
788 }
789 
790 /// For loop carried definitions, we split the lifetime of a virtual register
791 /// that has uses past the definition in the next iteration. A copy with a new
792 /// virtual register is inserted before the definition, which helps with
793 /// generating a better register assignment.
794 ///
795 /// v1 = phi(a, v2) v1 = phi(a, v2)
796 /// v2 = phi(b, v3) v2 = phi(b, v3)
797 /// v3 = .. v4 = copy v1
798 /// .. = V1 v3 = ..
799 /// .. = v4
800 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
801  MBBVectorTy &EpilogBBs) {
803  for (auto &PHI : KernelBB->phis()) {
804  Register Def = PHI.getOperand(0).getReg();
805  // Check for any Phi definition that used as an operand of another Phi
806  // in the same block.
808  E = MRI.use_instr_end();
809  I != E; ++I) {
810  if (I->isPHI() && I->getParent() == KernelBB) {
811  // Get the loop carried definition.
812  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
813  if (!LCDef)
814  continue;
815  MachineInstr *MI = MRI.getVRegDef(LCDef);
816  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
817  continue;
818  // Search through the rest of the block looking for uses of the Phi
819  // definition. If one occurs, then split the lifetime.
820  unsigned SplitReg = 0;
822  KernelBB->instr_end()))
823  if (BBJ.readsRegister(Def)) {
824  // We split the lifetime when we find the first use.
825  if (SplitReg == 0) {
826  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
827  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
828  TII->get(TargetOpcode::COPY), SplitReg)
829  .addReg(Def);
830  }
831  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
832  }
833  if (!SplitReg)
834  continue;
835  // Search through each of the epilog blocks for any uses to be renamed.
836  for (auto &Epilog : EpilogBBs)
837  for (auto &I : *Epilog)
838  if (I.readsRegister(Def))
839  I.substituteRegister(Def, SplitReg, 0, *TRI);
840  break;
841  }
842  }
843  }
844 }
845 
846 /// Remove the incoming block from the Phis in a basic block.
848  for (MachineInstr &MI : *BB) {
849  if (!MI.isPHI())
850  break;
851  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
852  if (MI.getOperand(i + 1).getMBB() == Incoming) {
853  MI.removeOperand(i + 1);
854  MI.removeOperand(i);
855  break;
856  }
857  }
858 }
859 
860 /// Create branches from each prolog basic block to the appropriate epilog
861 /// block. These edges are needed if the loop ends before reaching the
862 /// kernel.
863 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
864  MBBVectorTy &PrologBBs,
865  MachineBasicBlock *KernelBB,
866  MBBVectorTy &EpilogBBs,
867  ValueMapTy *VRMap) {
868  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
869  MachineBasicBlock *LastPro = KernelBB;
870  MachineBasicBlock *LastEpi = KernelBB;
871 
872  // Start from the blocks connected to the kernel and work "out"
873  // to the first prolog and the last epilog blocks.
875  unsigned MaxIter = PrologBBs.size() - 1;
876  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
877  // Add branches to the prolog that go to the corresponding
878  // epilog, and the fall-thru prolog/kernel block.
879  MachineBasicBlock *Prolog = PrologBBs[j];
880  MachineBasicBlock *Epilog = EpilogBBs[i];
881 
883  Optional<bool> StaticallyGreater =
884  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
885  unsigned numAdded = 0;
886  if (!StaticallyGreater) {
887  Prolog->addSuccessor(Epilog);
888  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
889  } else if (*StaticallyGreater == false) {
890  Prolog->addSuccessor(Epilog);
891  Prolog->removeSuccessor(LastPro);
892  LastEpi->removeSuccessor(Epilog);
893  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
894  removePhis(Epilog, LastEpi);
895  // Remove the blocks that are no longer referenced.
896  if (LastPro != LastEpi) {
897  LastEpi->clear();
898  LastEpi->eraseFromParent();
899  }
900  if (LastPro == KernelBB) {
901  LoopInfo->disposed();
902  NewKernel = nullptr;
903  }
904  LastPro->clear();
905  LastPro->eraseFromParent();
906  } else {
907  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
909  }
910  LastPro = Prolog;
911  LastEpi = Epilog;
912  for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
913  E = Prolog->instr_rend();
914  I != E && numAdded > 0; ++I, --numAdded)
915  updateInstruction(&*I, false, j, 0, VRMap);
916  }
917 
918  if (NewKernel) {
919  LoopInfo->setPreheader(PrologBBs[MaxIter]);
920  LoopInfo->adjustTripCount(-(MaxIter + 1));
921  }
922 }
923 
924 /// Return true if we can compute the amount the instruction changes
925 /// during each iteration. Set Delta to the amount of the change.
926 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
928  const MachineOperand *BaseOp;
929  int64_t Offset;
930  bool OffsetIsScalable;
931  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
932  return false;
933 
934  // FIXME: This algorithm assumes instructions have fixed-size offsets.
935  if (OffsetIsScalable)
936  return false;
937 
938  if (!BaseOp->isReg())
939  return false;
940 
941  Register BaseReg = BaseOp->getReg();
942 
943  MachineRegisterInfo &MRI = MF.getRegInfo();
944  // Check if there is a Phi. If so, get the definition in the loop.
945  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
946  if (BaseDef && BaseDef->isPHI()) {
947  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
948  BaseDef = MRI.getVRegDef(BaseReg);
949  }
950  if (!BaseDef)
951  return false;
952 
953  int D = 0;
954  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
955  return false;
956 
957  Delta = D;
958  return true;
959 }
960 
961 /// Update the memory operand with a new offset when the pipeliner
962 /// generates a new copy of the instruction that refers to a
963 /// different memory location.
964 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
965  MachineInstr &OldMI,
966  unsigned Num) {
967  if (Num == 0)
968  return;
969  // If the instruction has memory operands, then adjust the offset
970  // when the instruction appears in different stages.
971  if (NewMI.memoperands_empty())
972  return;
974  for (MachineMemOperand *MMO : NewMI.memoperands()) {
975  // TODO: Figure out whether isAtomic is really necessary (see D57601).
976  if (MMO->isVolatile() || MMO->isAtomic() ||
977  (MMO->isInvariant() && MMO->isDereferenceable()) ||
978  (!MMO->getValue())) {
979  NewMMOs.push_back(MMO);
980  continue;
981  }
982  unsigned Delta;
983  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
984  int64_t AdjOffset = Delta * Num;
985  NewMMOs.push_back(
986  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
987  } else {
988  NewMMOs.push_back(
990  }
991  }
992  NewMI.setMemRefs(MF, NewMMOs);
993 }
994 
995 /// Clone the instruction for the new pipelined loop and update the
996 /// memory operands, if needed.
997 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
998  unsigned CurStageNum,
999  unsigned InstStageNum) {
1000  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1001  // Check for tied operands in inline asm instructions. This should be handled
1002  // elsewhere, but I'm not sure of the best solution.
1003  if (OldMI->isInlineAsm())
1004  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
1005  const auto &MO = OldMI->getOperand(i);
1006  if (MO.isReg() && MO.isUse())
1007  break;
1008  unsigned UseIdx;
1009  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
1010  NewMI->tieOperands(i, UseIdx);
1011  }
1012  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1013  return NewMI;
1014 }
1015 
1016 /// Clone the instruction for the new pipelined loop. If needed, this
1017 /// function updates the instruction using the values saved in the
1018 /// InstrChanges structure.
1019 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1020  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1021  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1022  auto It = InstrChanges.find(OldMI);
1023  if (It != InstrChanges.end()) {
1024  std::pair<unsigned, int64_t> RegAndOffset = It->second;
1025  unsigned BasePos, OffsetPos;
1026  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1027  return nullptr;
1028  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1029  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1030  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1031  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1032  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1033  }
1034  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1035  return NewMI;
1036 }
1037 
1038 /// Update the machine instruction with new virtual registers. This
1039 /// function may change the definitions and/or uses.
1040 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1041  bool LastDef,
1042  unsigned CurStageNum,
1043  unsigned InstrStageNum,
1044  ValueMapTy *VRMap) {
1045  for (MachineOperand &MO : NewMI->operands()) {
1046  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1047  continue;
1048  Register reg = MO.getReg();
1049  if (MO.isDef()) {
1050  // Create a new virtual register for the definition.
1051  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1052  Register NewReg = MRI.createVirtualRegister(RC);
1053  MO.setReg(NewReg);
1054  VRMap[CurStageNum][reg] = NewReg;
1055  if (LastDef)
1056  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1057  } else if (MO.isUse()) {
1058  MachineInstr *Def = MRI.getVRegDef(reg);
1059  // Compute the stage that contains the last definition for instruction.
1060  int DefStageNum = Schedule.getStage(Def);
1061  unsigned StageNum = CurStageNum;
1062  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1063  // Compute the difference in stages between the defintion and the use.
1064  unsigned StageDiff = (InstrStageNum - DefStageNum);
1065  // Make an adjustment to get the last definition.
1066  StageNum -= StageDiff;
1067  }
1068  if (VRMap[StageNum].count(reg))
1069  MO.setReg(VRMap[StageNum][reg]);
1070  }
1071  }
1072 }
1073 
1074 /// Return the instruction in the loop that defines the register.
1075 /// If the definition is a Phi, then follow the Phi operand to
1076 /// the instruction in the loop.
1077 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1080  while (Def->isPHI()) {
1081  if (!Visited.insert(Def).second)
1082  break;
1083  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1084  if (Def->getOperand(i + 1).getMBB() == BB) {
1085  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1086  break;
1087  }
1088  }
1089  return Def;
1090 }
1091 
1092 /// Return the new name for the value from the previous stage.
1093 unsigned ModuloScheduleExpander::getPrevMapVal(
1094  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1095  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1096  unsigned PrevVal = 0;
1097  if (StageNum > PhiStage) {
1098  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1099  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1100  // The name is defined in the previous stage.
1101  PrevVal = VRMap[StageNum - 1][LoopVal];
1102  else if (VRMap[StageNum].count(LoopVal))
1103  // The previous name is defined in the current stage when the instruction
1104  // order is swapped.
1105  PrevVal = VRMap[StageNum][LoopVal];
1106  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1107  // The loop value hasn't yet been scheduled.
1108  PrevVal = LoopVal;
1109  else if (StageNum == PhiStage + 1)
1110  // The loop value is another phi, which has not been scheduled.
1111  PrevVal = getInitPhiReg(*LoopInst, BB);
1112  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1113  // The loop value is another phi, which has been scheduled.
1114  PrevVal =
1115  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1116  LoopStage, VRMap, BB);
1117  }
1118  return PrevVal;
1119 }
1120 
1121 /// Rewrite the Phi values in the specified block to use the mappings
1122 /// from the initial operand. Once the Phi is scheduled, we switch
1123 /// to using the loop value instead of the Phi value, so those names
1124 /// do not need to be rewritten.
1125 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1126  unsigned StageNum,
1127  ValueMapTy *VRMap,
1128  InstrMapTy &InstrMap) {
1129  for (auto &PHI : BB->phis()) {
1130  unsigned InitVal = 0;
1131  unsigned LoopVal = 0;
1132  getPhiRegs(PHI, BB, InitVal, LoopVal);
1133  Register PhiDef = PHI.getOperand(0).getReg();
1134 
1135  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1136  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1137  unsigned NumPhis = getStagesForPhi(PhiDef);
1138  if (NumPhis > StageNum)
1139  NumPhis = StageNum;
1140  for (unsigned np = 0; np <= NumPhis; ++np) {
1141  unsigned NewVal =
1142  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1143  if (!NewVal)
1144  NewVal = InitVal;
1145  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1146  NewVal);
1147  }
1148  }
1149 }
1150 
1151 /// Rewrite a previously scheduled instruction to use the register value
1152 /// from the new instruction. Make sure the instruction occurs in the
1153 /// basic block, and we don't change the uses in the new instruction.
1154 void ModuloScheduleExpander::rewriteScheduledInstr(
1155  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1156  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1157  unsigned PrevReg) {
1158  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1159  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1160  // Rewrite uses that have been scheduled already to use the new
1161  // Phi register.
1162  for (MachineOperand &UseOp :
1164  MachineInstr *UseMI = UseOp.getParent();
1165  if (UseMI->getParent() != BB)
1166  continue;
1167  if (UseMI->isPHI()) {
1168  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1169  continue;
1170  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1171  continue;
1172  }
1173  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1174  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1175  MachineInstr *OrigMI = OrigInstr->second;
1176  int StageSched = Schedule.getStage(OrigMI);
1177  int CycleSched = Schedule.getCycle(OrigMI);
1178  unsigned ReplaceReg = 0;
1179  // This is the stage for the scheduled instruction.
1180  if (StagePhi == StageSched && Phi->isPHI()) {
1181  int CyclePhi = Schedule.getCycle(Phi);
1182  if (PrevReg && InProlog)
1183  ReplaceReg = PrevReg;
1184  else if (PrevReg && !isLoopCarried(*Phi) &&
1185  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1186  ReplaceReg = PrevReg;
1187  else
1188  ReplaceReg = NewReg;
1189  }
1190  // The scheduled instruction occurs before the scheduled Phi, and the
1191  // Phi is not loop carried.
1192  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1193  ReplaceReg = NewReg;
1194  if (StagePhi > StageSched && Phi->isPHI())
1195  ReplaceReg = NewReg;
1196  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1197  ReplaceReg = NewReg;
1198  if (ReplaceReg) {
1199  const TargetRegisterClass *NRC =
1200  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1201  if (NRC)
1202  UseOp.setReg(ReplaceReg);
1203  else {
1204  Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1205  BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1206  SplitReg)
1207  .addReg(ReplaceReg);
1208  UseOp.setReg(SplitReg);
1209  }
1210  }
1211  }
1212 }
1213 
1214 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1215  if (!Phi.isPHI())
1216  return false;
1217  int DefCycle = Schedule.getCycle(&Phi);
1218  int DefStage = Schedule.getStage(&Phi);
1219 
1220  unsigned InitVal = 0;
1221  unsigned LoopVal = 0;
1222  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1223  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1224  if (!Use || Use->isPHI())
1225  return true;
1226  int LoopCycle = Schedule.getCycle(Use);
1227  int LoopStage = Schedule.getStage(Use);
1228  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1229 }
1230 
1231 //===----------------------------------------------------------------------===//
1232 // PeelingModuloScheduleExpander implementation
1233 //===----------------------------------------------------------------------===//
1234 // This is a reimplementation of ModuloScheduleExpander that works by creating
1235 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1236 //===----------------------------------------------------------------------===//
1237 
1238 namespace {
1239 // Remove any dead phis in MBB. Dead phis either have only one block as input
1240 // (in which case they are the identity) or have no uses.
1241 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1242  LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1243  bool Changed = true;
1244  while (Changed) {
1245  Changed = false;
1247  assert(MI.isPHI());
1248  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1249  if (LIS)
1251  MI.eraseFromParent();
1252  Changed = true;
1253  } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1254  const TargetRegisterClass *ConstrainRegClass =
1255  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1256  MRI.getRegClass(MI.getOperand(0).getReg()));
1257  assert(ConstrainRegClass &&
1258  "Expected a valid constrained register class!");
1259  (void)ConstrainRegClass;
1260  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1261  MI.getOperand(1).getReg());
1262  if (LIS)
1264  MI.eraseFromParent();
1265  Changed = true;
1266  }
1267  }
1268  }
1269 }
1270 
1271 /// Rewrites the kernel block in-place to adhere to the given schedule.
1272 /// KernelRewriter holds all of the state required to perform the rewriting.
1273 class KernelRewriter {
1274  ModuloSchedule &S;
1276  MachineBasicBlock *PreheaderBB, *ExitBB;
1278  const TargetInstrInfo *TII;
1279  LiveIntervals *LIS;
1280 
1281  // Map from register class to canonical undef register for that class.
1283  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1284  // this map is only used when InitReg is non-undef.
1286  // Map from LoopReg to phi register where the InitReg is undef.
1287  DenseMap<Register, Register> UndefPhis;
1288 
1289  // Reg is used by MI. Return the new register MI should use to adhere to the
1290  // schedule. Insert phis as necessary.
1291  Register remapUse(Register Reg, MachineInstr &MI);
1292  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1293  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1294  // or will be chosen so as to share another phi.
1295  Register phi(Register LoopReg, Optional<Register> InitReg = {},
1296  const TargetRegisterClass *RC = nullptr);
1297  // Create an undef register of the given register class.
1298  Register undef(const TargetRegisterClass *RC);
1299 
1300 public:
1301  KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1302  LiveIntervals *LIS = nullptr);
1303  void rewrite();
1304 };
1305 } // namespace
1306 
1307 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1308  MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1309  : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1310  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1311  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1312  PreheaderBB = *BB->pred_begin();
1313  if (PreheaderBB == BB)
1314  PreheaderBB = *std::next(BB->pred_begin());
1315 }
1316 
1317 void KernelRewriter::rewrite() {
1318  // Rearrange the loop to be in schedule order. Note that the schedule may
1319  // contain instructions that are not owned by the loop block (InstrChanges and
1320  // friends), so we gracefully handle unowned instructions and delete any
1321  // instructions that weren't in the schedule.
1322  auto InsertPt = BB->getFirstTerminator();
1323  MachineInstr *FirstMI = nullptr;
1324  for (MachineInstr *MI : S.getInstructions()) {
1325  if (MI->isPHI())
1326  continue;
1327  if (MI->getParent())
1328  MI->removeFromParent();
1329  BB->insert(InsertPt, MI);
1330  if (!FirstMI)
1331  FirstMI = MI;
1332  }
1333  assert(FirstMI && "Failed to find first MI in schedule");
1334 
1335  // At this point all of the scheduled instructions are between FirstMI
1336  // and the end of the block. Kill from the first non-phi to FirstMI.
1337  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1338  if (LIS)
1340  (I++)->eraseFromParent();
1341  }
1342 
1343  // Now remap every instruction in the loop.
1344  for (MachineInstr &MI : *BB) {
1345  if (MI.isPHI() || MI.isTerminator())
1346  continue;
1347  for (MachineOperand &MO : MI.uses()) {
1348  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1349  continue;
1350  Register Reg = remapUse(MO.getReg(), MI);
1351  MO.setReg(Reg);
1352  }
1353  }
1354  EliminateDeadPhis(BB, MRI, LIS);
1355 
1356  // Ensure a phi exists for all instructions that are either referenced by
1357  // an illegal phi or by an instruction outside the loop. This allows us to
1358  // treat remaps of these values the same as "normal" values that come from
1359  // loop-carried phis.
1360  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1361  if (MI->isPHI()) {
1362  Register R = MI->getOperand(0).getReg();
1363  phi(R);
1364  continue;
1365  }
1366 
1367  for (MachineOperand &Def : MI->defs()) {
1368  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1369  if (MI.getParent() != BB) {
1370  phi(Def.getReg());
1371  break;
1372  }
1373  }
1374  }
1375  }
1376 }
1377 
1378 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1379  MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1380  if (!Producer)
1381  return Reg;
1382 
1383  int ConsumerStage = S.getStage(&MI);
1384  if (!Producer->isPHI()) {
1385  // Non-phi producers are simple to remap. Insert as many phis as the
1386  // difference between the consumer and producer stages.
1387  if (Producer->getParent() != BB)
1388  // Producer was not inside the loop. Use the register as-is.
1389  return Reg;
1390  int ProducerStage = S.getStage(Producer);
1391  assert(ConsumerStage != -1 &&
1392  "In-loop consumer should always be scheduled!");
1393  assert(ConsumerStage >= ProducerStage);
1394  unsigned StageDiff = ConsumerStage - ProducerStage;
1395 
1396  for (unsigned I = 0; I < StageDiff; ++I)
1397  Reg = phi(Reg);
1398  return Reg;
1399  }
1400 
1401  // First, dive through the phi chain to find the defaults for the generated
1402  // phis.
1403  SmallVector<Optional<Register>, 4> Defaults;
1404  Register LoopReg = Reg;
1405  auto LoopProducer = Producer;
1406  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1407  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1408  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1409  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1410  assert(LoopProducer);
1411  }
1412  int LoopProducerStage = S.getStage(LoopProducer);
1413 
1414  Optional<Register> IllegalPhiDefault;
1415 
1416  if (LoopProducerStage == -1) {
1417  // Do nothing.
1418  } else if (LoopProducerStage > ConsumerStage) {
1419  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1420  // In addition, Consumer's cycle must be scheduled after Producer in the
1421  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1422  // functions.
1423 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1424  int LoopProducerCycle = S.getCycle(LoopProducer);
1425  int ConsumerCycle = S.getCycle(&MI);
1426 #endif
1427  assert(LoopProducerCycle <= ConsumerCycle);
1428  assert(LoopProducerStage == ConsumerStage + 1);
1429  // Peel off the first phi from Defaults and insert a phi between producer
1430  // and consumer. This phi will not be at the front of the block so we
1431  // consider it illegal. It will only exist during the rewrite process; it
1432  // needs to exist while we peel off prologs because these could take the
1433  // default value. After that we can replace all uses with the loop producer
1434  // value.
1435  IllegalPhiDefault = Defaults.front();
1436  Defaults.erase(Defaults.begin());
1437  } else {
1438  assert(ConsumerStage >= LoopProducerStage);
1439  int StageDiff = ConsumerStage - LoopProducerStage;
1440  if (StageDiff > 0) {
1441  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1442  << " to " << (Defaults.size() + StageDiff) << "\n");
1443  // If we need more phis than we have defaults for, pad out with undefs for
1444  // the earliest phis, which are at the end of the defaults chain (the
1445  // chain is in reverse order).
1446  Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1447  ? Optional<Register>()
1448  : Defaults.back());
1449  }
1450  }
1451 
1452  // Now we know the number of stages to jump back, insert the phi chain.
1453  auto DefaultI = Defaults.rbegin();
1454  while (DefaultI != Defaults.rend())
1455  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1456 
1457  if (IllegalPhiDefault) {
1458  // The consumer optionally consumes LoopProducer in the same iteration
1459  // (because the producer is scheduled at an earlier cycle than the consumer)
1460  // or the initial value. To facilitate this we create an illegal block here
1461  // by embedding a phi in the middle of the block. We will fix this up
1462  // immediately prior to pruning.
1463  auto RC = MRI.getRegClass(Reg);
1465  MachineInstr *IllegalPhi =
1466  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1467  .addReg(*IllegalPhiDefault)
1468  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1469  .addReg(LoopReg)
1470  .addMBB(BB); // Block choice is arbitrary and has no effect.
1471  // Illegal phi should belong to the producer stage so that it can be
1472  // filtered correctly during peeling.
1473  S.setStage(IllegalPhi, LoopProducerStage);
1474  return R;
1475  }
1476 
1477  return LoopReg;
1478 }
1479 
1481  const TargetRegisterClass *RC) {
1482  // If the init register is not undef, try and find an existing phi.
1483  if (InitReg) {
1484  auto I = Phis.find({LoopReg, InitReg.value()});
1485  if (I != Phis.end())
1486  return I->second;
1487  } else {
1488  for (auto &KV : Phis) {
1489  if (KV.first.first == LoopReg)
1490  return KV.second;
1491  }
1492  }
1493 
1494  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1495  // find a phi that takes undef as input.
1496  auto I = UndefPhis.find(LoopReg);
1497  if (I != UndefPhis.end()) {
1498  Register R = I->second;
1499  if (!InitReg)
1500  // Found a phi taking undef as input, and this input is undef so return
1501  // without any more changes.
1502  return R;
1503  // Found a phi taking undef as input, so rewrite it to take InitReg.
1505  MI->getOperand(1).setReg(InitReg.value());
1506  Phis.insert({{LoopReg, InitReg.value()}, R});
1507  const TargetRegisterClass *ConstrainRegClass =
1508  MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value()));
1509  assert(ConstrainRegClass && "Expected a valid constrained register class!");
1510  (void)ConstrainRegClass;
1511  UndefPhis.erase(I);
1512  return R;
1513  }
1514 
1515  // Failed to find any existing phi to reuse, so create a new one.
1516  if (!RC)
1517  RC = MRI.getRegClass(LoopReg);
1519  if (InitReg) {
1520  const TargetRegisterClass *ConstrainRegClass =
1521  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1522  assert(ConstrainRegClass && "Expected a valid constrained register class!");
1523  (void)ConstrainRegClass;
1524  }
1525  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1526  .addReg(InitReg ? *InitReg : undef(RC))
1527  .addMBB(PreheaderBB)
1528  .addReg(LoopReg)
1529  .addMBB(BB);
1530  if (!InitReg)
1531  UndefPhis[LoopReg] = R;
1532  else
1533  Phis[{LoopReg, *InitReg}] = R;
1534  return R;
1535 }
1536 
1537 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1538  Register &R = Undefs[RC];
1539  if (R == 0) {
1540  // Create an IMPLICIT_DEF that defines this register if we need it.
1541  // All uses of this should be removed by the time we have finished unrolling
1542  // prologs and epilogs.
1543  R = MRI.createVirtualRegister(RC);
1544  auto *InsertBB = &PreheaderBB->getParent()->front();
1545  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1546  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1547  }
1548  return R;
1549 }
1550 
1551 namespace {
1552 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1553 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1554 /// through and defaults for these phis.
1555 class KernelOperandInfo {
1558  SmallVector<Register, 4> PhiDefaults;
1561 
1562 public:
1563  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1564  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1565  : MRI(MRI) {
1566  Source = MO;
1567  BB = MO->getParent()->getParent();
1568  while (isRegInLoop(MO)) {
1569  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1570  if (MI->isFullCopy()) {
1571  MO = &MI->getOperand(1);
1572  continue;
1573  }
1574  if (!MI->isPHI())
1575  break;
1576  // If this is an illegal phi, don't count it in distance.
1577  if (IllegalPhis.count(MI)) {
1578  MO = &MI->getOperand(3);
1579  continue;
1580  }
1581 
1583  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1584  : &MI->getOperand(3);
1585  PhiDefaults.push_back(Default);
1586  }
1587  Target = MO;
1588  }
1589 
1590  bool operator==(const KernelOperandInfo &Other) const {
1591  return PhiDefaults.size() == Other.PhiDefaults.size();
1592  }
1593 
1594  void print(raw_ostream &OS) const {
1595  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1596  << *Source->getParent();
1597  }
1598 
1599 private:
1600  bool isRegInLoop(MachineOperand *MO) {
1601  return MO->isReg() && MO->getReg().isVirtual() &&
1602  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1603  }
1604 };
1605 } // namespace
1606 
1608 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1609  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1610  if (LPD == LPD_Front)
1611  PeeledFront.push_back(NewBB);
1612  else
1613  PeeledBack.push_front(NewBB);
1614  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1615  ++I, ++NI) {
1616  CanonicalMIs[&*I] = &*I;
1617  CanonicalMIs[&*NI] = &*I;
1618  BlockMIs[{NewBB, &*I}] = &*NI;
1619  BlockMIs[{BB, &*I}] = &*I;
1620  }
1621  return NewBB;
1622 }
1623 
1625  int MinStage) {
1626  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1627  I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1628  MachineInstr *MI = &*I++;
1629  int Stage = getStage(MI);
1630  if (Stage == -1 || Stage >= MinStage)
1631  continue;
1632 
1633  for (MachineOperand &DefMO : MI->defs()) {
1635  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1636  // Only PHIs can use values from this block by construction.
1637  // Match with the equivalent PHI in B.
1638  assert(UseMI.isPHI());
1639  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1640  MI->getParent());
1641  Subs.emplace_back(&UseMI, Reg);
1642  }
1643  for (auto &Sub : Subs)
1644  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1646  }
1647  if (LIS)
1649  MI->eraseFromParent();
1650  }
1651 }
1652 
1654  MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1655  auto InsertPt = DestBB->getFirstNonPHI();
1658  llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1659  if (MI.isPHI()) {
1660  // This is an illegal PHI. If we move any instructions using an illegal
1661  // PHI, we need to create a legal Phi.
1662  if (getStage(&MI) != Stage) {
1663  // The legal Phi is not necessary if the illegal phi's stage
1664  // is being moved.
1665  Register PhiR = MI.getOperand(0).getReg();
1666  auto RC = MRI.getRegClass(PhiR);
1668  MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1669  DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1670  .addReg(PhiR)
1671  .addMBB(SourceBB);
1672  BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1673  CanonicalMIs[NI] = CanonicalMIs[&MI];
1674  Remaps[PhiR] = NR;
1675  }
1676  }
1677  if (getStage(&MI) != Stage)
1678  continue;
1679  MI.removeFromParent();
1680  DestBB->insert(InsertPt, &MI);
1681  auto *KernelMI = CanonicalMIs[&MI];
1682  BlockMIs[{DestBB, KernelMI}] = &MI;
1683  BlockMIs.erase({SourceBB, KernelMI});
1684  }
1685  SmallVector<MachineInstr *, 4> PhiToDelete;
1686  for (MachineInstr &MI : DestBB->phis()) {
1687  assert(MI.getNumOperands() == 3);
1688  MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1689  // If the instruction referenced by the phi is moved inside the block
1690  // we don't need the phi anymore.
1691  if (getStage(Def) == Stage) {
1692  Register PhiReg = MI.getOperand(0).getReg();
1693  assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1694  MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1695  MI.getOperand(0).setReg(PhiReg);
1696  PhiToDelete.push_back(&MI);
1697  }
1698  }
1699  for (auto *P : PhiToDelete)
1700  P->eraseFromParent();
1701  InsertPt = DestBB->getFirstNonPHI();
1702  // Helper to clone Phi instructions into the destination block. We clone Phi
1703  // greedily to avoid combinatorial explosion of Phi instructions.
1704  auto clonePhi = [&](MachineInstr *Phi) {
1705  MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1706  DestBB->insert(InsertPt, NewMI);
1707  Register OrigR = Phi->getOperand(0).getReg();
1709  NewMI->getOperand(0).setReg(R);
1710  NewMI->getOperand(1).setReg(OrigR);
1711  NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1712  Remaps[OrigR] = R;
1713  CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1714  BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1716  return R;
1717  };
1718  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1719  for (MachineOperand &MO : I->uses()) {
1720  if (!MO.isReg())
1721  continue;
1722  if (Remaps.count(MO.getReg()))
1723  MO.setReg(Remaps[MO.getReg()]);
1724  else {
1725  // If we are using a phi from the source block we need to add a new phi
1726  // pointing to the old one.
1728  if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1729  Register R = clonePhi(Use);
1730  MO.setReg(R);
1731  }
1732  }
1733  }
1734  }
1735 }
1736 
1737 Register
1739  MachineInstr *Phi) {
1740  unsigned distance = PhiNodeLoopIteration[Phi];
1741  MachineInstr *CanonicalUse = CanonicalPhi;
1742  Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1743  for (unsigned I = 0; I < distance; ++I) {
1744  assert(CanonicalUse->isPHI());
1745  assert(CanonicalUse->getNumOperands() == 5);
1746  unsigned LoopRegIdx = 3, InitRegIdx = 1;
1747  if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1748  std::swap(LoopRegIdx, InitRegIdx);
1749  CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1750  CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1751  }
1752  return CanonicalUseReg;
1753 }
1754 
1756  BitVector LS(Schedule.getNumStages(), true);
1757  BitVector AS(Schedule.getNumStages(), true);
1758  LiveStages[BB] = LS;
1759  AvailableStages[BB] = AS;
1760 
1761  // Peel out the prologs.
1762  LS.reset();
1763  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1764  LS[I] = true;
1765  Prologs.push_back(peelKernel(LPD_Front));
1766  LiveStages[Prologs.back()] = LS;
1767  AvailableStages[Prologs.back()] = LS;
1768  }
1769 
1770  // Create a block that will end up as the new loop exiting block (dominated by
1771  // all prologs and epilogs). It will only contain PHIs, in the same order as
1772  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1773  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1774  // property that any value deffed in BB but used outside of BB is used by a
1775  // PHI in the exiting block.
1777  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1778  // Push out the epilogs, again in reverse order.
1779  // We can't assume anything about the minumum loop trip count at this point,
1780  // so emit a fairly complex epilog.
1781 
1782  // We first peel number of stages minus one epilogue. Then we remove dead
1783  // stages and reorder instructions based on their stage. If we have 3 stages
1784  // we generate first:
1785  // E0[3, 2, 1]
1786  // E1[3', 2']
1787  // E2[3'']
1788  // And then we move instructions based on their stages to have:
1789  // E0[3]
1790  // E1[2, 3']
1791  // E2[1, 2', 3'']
1792  // The transformation is legal because we only move instructions past
1793  // instructions of a previous loop iteration.
1794  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1795  Epilogs.push_back(peelKernel(LPD_Back));
1796  MachineBasicBlock *B = Epilogs.back();
1798  // Keep track at which iteration each phi belongs to. We need it to know
1799  // what version of the variable to use during prologue/epilogue stitching.
1800  EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1801  for (MachineInstr &Phi : B->phis())
1803  }
1804  for (size_t I = 0; I < Epilogs.size(); I++) {
1805  LS.reset();
1806  for (size_t J = I; J < Epilogs.size(); J++) {
1807  int Iteration = J;
1808  unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1809  // Move stage one block at a time so that Phi nodes are updated correctly.
1810  for (size_t K = Iteration; K > I; K--)
1811  moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1812  LS[Stage] = true;
1813  }
1814  LiveStages[Epilogs[I]] = LS;
1815  AvailableStages[Epilogs[I]] = AS;
1816  }
1817 
1818  // Now we've defined all the prolog and epilog blocks as a fallthrough
1819  // sequence, add the edges that will be followed if the loop trip count is
1820  // lower than the number of stages (connecting prologs directly with epilogs).
1821  auto PI = Prologs.begin();
1822  auto EI = Epilogs.begin();
1823  assert(Prologs.size() == Epilogs.size());
1824  for (; PI != Prologs.end(); ++PI, ++EI) {
1825  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1826  (*PI)->addSuccessor(*EI);
1827  for (MachineInstr &MI : (*EI)->phis()) {
1828  Register Reg = MI.getOperand(1).getReg();
1830  if (Use && Use->getParent() == Pred) {
1831  MachineInstr *CanonicalUse = CanonicalMIs[Use];
1832  if (CanonicalUse->isPHI()) {
1833  // If the use comes from a phi we need to skip as many phi as the
1834  // distance between the epilogue and the kernel. Trace through the phi
1835  // chain to find the right value.
1836  Reg = getPhiCanonicalReg(CanonicalUse, Use);
1837  }
1839  }
1840  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1841  MI.addOperand(MachineOperand::CreateMBB(*PI));
1842  }
1843  }
1844 
1845  // Create a list of all blocks in order.
1847  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1848  Blocks.push_back(BB);
1849  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1850 
1851  // Iterate in reverse order over all instructions, remapping as we go.
1852  for (MachineBasicBlock *B : reverse(Blocks)) {
1853  for (auto I = B->instr_rbegin();
1854  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1856  rewriteUsesOf(&*MI);
1857  }
1858  }
1859  for (auto *MI : IllegalPhisToDelete) {
1860  if (LIS)
1862  MI->eraseFromParent();
1863  }
1865 
1866  // Now all remapping has been done, we're free to optimize the generated code.
1867  for (MachineBasicBlock *B : reverse(Blocks))
1868  EliminateDeadPhis(B, MRI, LIS);
1869  EliminateDeadPhis(ExitingBB, MRI, LIS);
1870 }
1871 
1873  MachineFunction &MF = *BB->getParent();
1874  MachineBasicBlock *Exit = *BB->succ_begin();
1875  if (Exit == BB)
1876  Exit = *std::next(BB->succ_begin());
1877 
1879  MF.insert(std::next(BB->getIterator()), NewBB);
1880 
1881  // Clone all phis in BB into NewBB and rewrite.
1882  for (MachineInstr &MI : BB->phis()) {
1883  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1884  Register OldR = MI.getOperand(3).getReg();
1887  for (MachineInstr &Use : MRI.use_instructions(OldR))
1888  if (Use.getParent() != BB)
1889  Uses.push_back(&Use);
1890  for (MachineInstr *Use : Uses)
1891  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1893  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1894  .addReg(OldR)
1895  .addMBB(BB);
1896  BlockMIs[{NewBB, &MI}] = NI;
1897  CanonicalMIs[NI] = &MI;
1898  }
1899  BB->replaceSuccessor(Exit, NewBB);
1900  Exit->replacePhiUsesWith(BB, NewBB);
1901  NewBB->addSuccessor(Exit);
1902 
1903  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1905  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1906  (void)CanAnalyzeBr;
1907  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1908  TII->removeBranch(*BB);
1909  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1910  Cond, DebugLoc());
1911  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1912  return NewBB;
1913 }
1914 
1915 Register
1917  MachineBasicBlock *BB) {
1919  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1920  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1921 }
1922 
1924  if (MI->isPHI()) {
1925  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1926  // and it is produced by this block.
1927  Register PhiR = MI->getOperand(0).getReg();
1928  Register R = MI->getOperand(3).getReg();
1929  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1930  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1931  R = MI->getOperand(1).getReg();
1932  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1933  MRI.replaceRegWith(PhiR, R);
1934  // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1935  // later to figure out how to remap registers.
1936  MI->getOperand(0).setReg(PhiR);
1937  IllegalPhisToDelete.push_back(MI);
1938  return;
1939  }
1940 
1941  int Stage = getStage(MI);
1942  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1943  LiveStages[MI->getParent()].test(Stage))
1944  // Instruction is live, no rewriting to do.
1945  return;
1946 
1947  for (MachineOperand &DefMO : MI->defs()) {
1949  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1950  // Only PHIs can use values from this block by construction.
1951  // Match with the equivalent PHI in B.
1952  assert(UseMI.isPHI());
1953  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1954  MI->getParent());
1955  Subs.emplace_back(&UseMI, Reg);
1956  }
1957  for (auto &Sub : Subs)
1958  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1960  }
1961  if (LIS)
1963  MI->eraseFromParent();
1964 }
1965 
1967  // Work outwards from the kernel.
1968  bool KernelDisposed = false;
1969  int TC = Schedule.getNumStages() - 1;
1970  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1971  ++PI, ++EI, --TC) {
1972  MachineBasicBlock *Prolog = *PI;
1973  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1974  MachineBasicBlock *Epilog = *EI;
1976  TII->removeBranch(*Prolog);
1977  Optional<bool> StaticallyGreater =
1978  LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1979  if (!StaticallyGreater) {
1980  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1981  // Dynamically branch based on Cond.
1982  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1983  } else if (*StaticallyGreater == false) {
1984  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1985  // Prolog never falls through; branch to epilog and orphan interior
1986  // blocks. Leave it to unreachable-block-elim to clean up.
1987  Prolog->removeSuccessor(Fallthrough);
1988  for (MachineInstr &P : Fallthrough->phis()) {
1989  P.removeOperand(2);
1990  P.removeOperand(1);
1991  }
1993  KernelDisposed = true;
1994  } else {
1995  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1996  // Prolog always falls through; remove incoming values in epilog.
1997  Prolog->removeSuccessor(Epilog);
1998  for (MachineInstr &P : Epilog->phis()) {
1999  P.removeOperand(4);
2000  P.removeOperand(3);
2001  }
2002  }
2003  }
2004 
2005  if (!KernelDisposed) {
2006  LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
2007  LoopInfo->setPreheader(Prologs.back());
2008  } else {
2009  LoopInfo->disposed();
2010  }
2011 }
2012 
2014  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2015  KR.rewrite();
2016 }
2017 
2019  BB = Schedule.getLoop()->getTopBlock();
2023  assert(LoopInfo);
2024 
2025  rewriteKernel();
2027  fixupBranches();
2028 }
2029 
2031  BB = Schedule.getLoop()->getTopBlock();
2033 
2034  // Dump the schedule before we invalidate and remap all its instructions.
2035  // Stash it in a string so we can print it if we found an error.
2036  std::string ScheduleDump;
2037  raw_string_ostream OS(ScheduleDump);
2038  Schedule.print(OS);
2039  OS.flush();
2040 
2041  // First, run the normal ModuleScheduleExpander. We don't support any
2042  // InstrChanges.
2043  assert(LIS && "Requires LiveIntervals!");
2046  MSE.expand();
2047  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2048  if (!ExpandedKernel) {
2049  // The expander optimized away the kernel. We can't do any useful checking.
2050  MSE.cleanup();
2051  return;
2052  }
2053  // Before running the KernelRewriter, re-add BB into the CFG.
2055 
2056  // Now run the new expansion algorithm.
2057  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2058  KR.rewrite();
2060 
2061  // Collect all illegal phis that the new algorithm created. We'll give these
2062  // to KernelOperandInfo.
2063  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2064  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2065  if (NI->isPHI())
2066  IllegalPhis.insert(&*NI);
2067  }
2068 
2069  // Co-iterate across both kernels. We expect them to be identical apart from
2070  // phis and full COPYs (we look through both).
2072  auto OI = ExpandedKernel->begin();
2073  auto NI = BB->begin();
2074  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2075  while (OI->isPHI() || OI->isFullCopy())
2076  ++OI;
2077  while (NI->isPHI() || NI->isFullCopy())
2078  ++NI;
2079  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2080  // Analyze every operand separately.
2081  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2082  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2083  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2084  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2085  }
2086 
2087  bool Failed = false;
2088  for (auto &OldAndNew : KOIs) {
2089  if (OldAndNew.first == OldAndNew.second)
2090  continue;
2091  Failed = true;
2092  errs() << "Modulo kernel validation error: [\n";
2093  errs() << " [golden] ";
2094  OldAndNew.first.print(errs());
2095  errs() << " ";
2096  OldAndNew.second.print(errs());
2097  errs() << "]\n";
2098  }
2099 
2100  if (Failed) {
2101  errs() << "Golden reference kernel:\n";
2102  ExpandedKernel->print(errs());
2103  errs() << "New kernel:\n";
2104  BB->print(errs());
2105  errs() << ScheduleDump;
2107  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2108  }
2109 
2110  // Cleanup by removing BB from the CFG again as the original
2111  // ModuloScheduleExpander intended.
2113  MSE.cleanup();
2114 }
2115 
2116 //===----------------------------------------------------------------------===//
2117 // ModuloScheduleTestPass implementation
2118 //===----------------------------------------------------------------------===//
2119 // This pass constructs a ModuloSchedule from its module and runs
2120 // ModuloScheduleExpander.
2121 //
2122 // The module is expected to contain a single-block analyzable loop.
2123 // The total order of instructions is taken from the loop as-is.
2124 // Instructions are expected to be annotated with a PostInstrSymbol.
2125 // This PostInstrSymbol must have the following format:
2126 // "Stage=%d Cycle=%d".
2127 //===----------------------------------------------------------------------===//
2128 
2129 namespace {
2130 class ModuloScheduleTest : public MachineFunctionPass {
2131 public:
2132  static char ID;
2133 
2134  ModuloScheduleTest() : MachineFunctionPass(ID) {
2136  }
2137 
2138  bool runOnMachineFunction(MachineFunction &MF) override;
2139  void runOnLoop(MachineFunction &MF, MachineLoop &L);
2140 
2141  void getAnalysisUsage(AnalysisUsage &AU) const override {
2143  AU.addRequired<LiveIntervals>();
2145  }
2146 };
2147 } // namespace
2148 
2149 char ModuloScheduleTest::ID = 0;
2150 
2151 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2152  "Modulo Schedule test pass", false, false)
2155 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2156  "Modulo Schedule test pass", false, false)
2157 
2158 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2159  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2160  for (auto *L : MLI) {
2161  if (L->getTopBlock() != L->getBottomBlock())
2162  continue;
2163  runOnLoop(MF, *L);
2164  return false;
2165  }
2166  return false;
2167 }
2168 
2169 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2170  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2171  std::pair<StringRef, StringRef> StageTokenAndValue =
2172  getToken(StageAndCycle.first, "-");
2173  std::pair<StringRef, StringRef> CycleTokenAndValue =
2174  getToken(StageAndCycle.second, "-");
2175  if (StageTokenAndValue.first != "Stage" ||
2176  CycleTokenAndValue.first != "_Cycle") {
2178  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2179  return;
2180  }
2181 
2182  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2183  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2184 
2185  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2186 }
2187 
2188 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2189  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2191  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2192 
2194  std::vector<MachineInstr *> Instrs;
2195  for (MachineInstr &MI : *BB) {
2196  if (MI.isTerminator())
2197  continue;
2198  Instrs.push_back(&MI);
2199  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2200  dbgs() << "Parsing post-instr symbol for " << MI;
2201  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2202  }
2203  }
2204 
2205  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2206  std::move(Stage));
2208  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2209  MSE.expand();
2210  MSE.cleanup();
2211 }
2212 
2213 //===----------------------------------------------------------------------===//
2214 // ModuloScheduleTestAnnotater implementation
2215 //===----------------------------------------------------------------------===//
2216 
2218  for (MachineInstr *MI : S.getInstructions()) {
2220  raw_svector_ostream OS(SV);
2221  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2222  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2223  MI->setPostInstrSymbol(MF, Sym);
2224  }
2225 }
i
i
Definition: README.txt:29
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1070
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:353
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:105
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:800
llvm::PeelingModuloScheduleExpander::rewriteKernel
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition: ModuloSchedule.cpp:2013
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:209
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:184
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:79
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:608
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:1001
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:145
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:151
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:277
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::LPD_Front
@ LPD_Front
Peel the first iteration of the loop.
Definition: MachineLoopUtils.h:18
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1395
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:477
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:872
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1641
llvm::PeelingModuloScheduleExpander::Epilogs
SmallVector< MachineBasicBlock *, 4 > Epilogs
Definition: ModuloSchedule.h:308
llvm::PeelingModuloScheduleExpander::getStage
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
Definition: ModuloSchedule.h:360
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:858
llvm::Optional< bool >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:285
llvm::PeelingModuloScheduleExpander::TII
const TargetInstrInfo * TII
Definition: ModuloSchedule.h:300
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:325
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67
llvm::MachineLoop::getBottomBlock
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout,...
Definition: MachineLoopInfo.cpp:76
llvm::PeelingModuloScheduleExpander::PeeledFront
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
Definition: ModuloSchedule.h:326
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::PeelingModuloScheduleExpander::moveStageBetweenBlocks
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition: ModuloSchedule.cpp:1653
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:291
llvm::TargetInstrInfo::insertBranch
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
Definition: TargetInstrInfo.h:707
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:761
llvm::ModuloSchedule::dump
void dump()
Definition: ModuloSchedule.h:154
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:902
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::PeelingModuloScheduleExpander::AvailableStages
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
Definition: ModuloSchedule.h:314
llvm::MachineBasicBlock::getFirstInstrTerminator
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Definition: MachineBasicBlock.cpp:247
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:865
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:207
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
MachineLoopInfo.h
llvm::TargetInstrInfo::removeBranch
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
Definition: TargetInstrInfo.h:689
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PeelingModuloScheduleExpander::Schedule
ModuloSchedule & Schedule
Definition: ModuloSchedule.h:296
llvm::PeelingModuloScheduleExpander::PeeledBack
std::deque< MachineBasicBlock * > PeeledBack
Definition: ModuloSchedule.h:326
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineInstr::setMemRefs
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
Definition: MachineInstr.cpp:349
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:371
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::PeelingModuloScheduleExpander::PhiNodeLoopIteration
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
Definition: ModuloSchedule.h:317
MCContext.h
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1346
llvm::PeelingModuloScheduleExpander::BlockMIs
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Definition: ModuloSchedule.h:323
llvm::PeelingModuloScheduleExpander::BB
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
Definition: ModuloSchedule.h:304
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition: RISCVRedundantCopyElimination.cpp:76
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::PeelingModuloScheduleExpander::Prologs
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
Definition: ModuloSchedule.h:308
pass
modulo schedule Modulo Schedule test pass
Definition: ModuloSchedule.cpp:2156
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::PeelingModuloScheduleExpander::filterInstructions
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition: ModuloSchedule.cpp:1624
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:625
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:185
llvm::ModuloSchedule::getInstructions
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
Definition: ModuloSchedule.h:152
llvm::PeelSingleBlockLoop
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Definition: MachineLoopUtils.cpp:26
llvm::BitVector
Definition: BitVector.h:75
CombinerObjective::Default
@ Default
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:237
llvm::PeelingModuloScheduleExpander::peelKernel
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition: ModuloSchedule.cpp:1608
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
parseSymbolString
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition: ModuloSchedule.cpp:2169
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:774
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1367
llvm::PeelingModuloScheduleExpander::Preheader
MachineBasicBlock * Preheader
The original loop preheader.
Definition: ModuloSchedule.h:306
llvm::PeelingModuloScheduleExpander::fixupBranches
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition: ModuloSchedule.cpp:1966
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:343
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::ModuloScheduleExpander::getRewrittenKernel
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Definition: ModuloSchedule.h:277
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::PeelingModuloScheduleExpander::rewriteUsesOf
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition: ModuloSchedule.cpp:1923
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:295
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1700
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2217
LiveIntervals.h
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:825
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:383
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:485
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:293
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:384
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:596
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:196
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:439
replaceRegUsesAfterLoop
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition: ModuloSchedule.cpp:340
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetInstrInfo::insertUnconditionalBranch
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
Definition: TargetInstrInfo.h:715
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:369
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1990
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1300
llvm::PeelingModuloScheduleExpander::CreateLCSSAExitingBlock
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition: ModuloSchedule.cpp:1872
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::LoopPeelDirection
LoopPeelDirection
Definition: MachineLoopUtils.h:17
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:289
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:291
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineInstr::isRegTiedToUseOperand
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
Definition: MachineInstr.h:1566
llvm::PeelingModuloScheduleExpander::getEquivalentRegisterIn
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition: ModuloSchedule.cpp:1916
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::MachineBasicBlock::phis
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
Definition: MachineBasicBlock.h:333
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineInstr::isInlineAsm
bool isInlineAsm() const
Definition: MachineInstr.h:1306
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:683
hasUseAfterLoop
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition: ModuloSchedule.cpp:354
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:2018
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:472
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: ModuloSchedule.cpp:36
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:847
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LiveIntervals::createEmptyInterval
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Definition: LiveIntervals.h:129
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: ModuloSchedule.cpp:60
llvm::MachineBasicBlock::transferSuccessors
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
Definition: MachineBasicBlock.cpp:880
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:255
llvm::LPD_Back
@ LPD_Back
Peel the last iteration of the loop.
Definition: MachineLoopUtils.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:822
llvm::PeelingModuloScheduleExpander::LIS
LiveIntervals * LIS
Definition: ModuloSchedule.h:301
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: MachineBasicBlock.cpp:1374
llvm::PeelingModuloScheduleExpander::MRI
MachineRegisterInfo & MRI
Definition: ModuloSchedule.h:299
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:799
Epilog
@ Epilog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1326
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:636
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ModuloSchedule::getCycle
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
Definition: ModuloSchedule.h:140
llvm::ModuloSchedule::getNumStages
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
Definition: ModuloSchedule.h:123
llvm::PeelingModuloScheduleExpander::IllegalPhisToDelete
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
Definition: ModuloSchedule.h:328
llvm::ModuloSchedule::getLoop
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
Definition: ModuloSchedule.h:119
llvm::LiveIntervals
Definition: LiveIntervals.h:53
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:597
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:743
llvm::PeelingModuloScheduleExpander::peelPrologAndEpilogs
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition: ModuloSchedule.cpp:1755
llvm::PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition: ModuloSchedule.cpp:2030
getInitPhiReg
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition: ModuloSchedule.cpp:52
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:475
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:68
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator
DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
Definition: DenseMap.h:71
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::ModuloSchedule::print
void print(raw_ostream &OS)
Definition: ModuloSchedule.cpp:25
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
MachineInstrBuilder.h
llvm::PeelingModuloScheduleExpander::LiveStages
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
Definition: ModuloSchedule.h:310
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:270
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:698
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:82
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:313
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:713
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
llvm::initializeModuloScheduleTestPass
void initializeModuloScheduleTestPass(PassRegistry &)
llvm::PeelingModuloScheduleExpander::MF
MachineFunction & MF
Definition: ModuloSchedule.h:297
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:160
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:123
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:329
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
llvm::ModuloSchedule::getStage
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
Definition: ModuloSchedule.h:134
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1086
test
modulo schedule test
Definition: ModuloSchedule.cpp:2155
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:488
llvm::PeelingModuloScheduleExpander::getPhiCanonicalReg
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition: ModuloSchedule.cpp:1738
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:56
removePhis
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Definition: ModuloSchedule.cpp:847
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1247
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:924
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PeelingModuloScheduleExpander::CanonicalMIs
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
Definition: ModuloSchedule.h:321