LLVM  15.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1 //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/Support/Debug.h"
21 
22 #define DEBUG_TYPE "pipeliner"
23 using namespace llvm;
24 
26  for (MachineInstr *MI : ScheduledInstrs)
27  OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
28 }
29 
30 //===----------------------------------------------------------------------===//
31 // ModuloScheduleExpander implementation
32 //===----------------------------------------------------------------------===//
33 
34 /// Return the register values for the operands of a Phi instruction.
35 /// This function assume the instruction is a Phi.
37  unsigned &InitVal, unsigned &LoopVal) {
38  assert(Phi.isPHI() && "Expecting a Phi.");
39 
40  InitVal = 0;
41  LoopVal = 0;
42  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
43  if (Phi.getOperand(i + 1).getMBB() != Loop)
44  InitVal = Phi.getOperand(i).getReg();
45  else
46  LoopVal = Phi.getOperand(i).getReg();
47 
48  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
49 }
50 
51 /// Return the Phi register value that comes from the incoming block.
52 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
53  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
54  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
55  return Phi.getOperand(i).getReg();
56  return 0;
57 }
58 
59 /// Return the Phi register value that comes the loop block.
60 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
61  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
62  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
63  return Phi.getOperand(i).getReg();
64  return 0;
65 }
66 
68  BB = Schedule.getLoop()->getTopBlock();
69  Preheader = *BB->pred_begin();
70  if (Preheader == BB)
71  Preheader = *std::next(BB->pred_begin());
72 
73  // Iterate over the definitions in each instruction, and compute the
74  // stage difference for each use. Keep the maximum value.
75  for (MachineInstr *MI : Schedule.getInstructions()) {
76  int DefStage = Schedule.getStage(MI);
77  for (const MachineOperand &Op : MI->operands()) {
78  if (!Op.isReg() || !Op.isDef())
79  continue;
80 
81  Register Reg = Op.getReg();
82  unsigned MaxDiff = 0;
83  bool PhiIsSwapped = false;
84  for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
85  MachineInstr *UseMI = UseOp.getParent();
86  int UseStage = Schedule.getStage(UseMI);
87  unsigned Diff = 0;
88  if (UseStage != -1 && UseStage >= DefStage)
89  Diff = UseStage - DefStage;
90  if (MI->isPHI()) {
91  if (isLoopCarried(*MI))
92  ++Diff;
93  else
94  PhiIsSwapped = true;
95  }
96  MaxDiff = std::max(Diff, MaxDiff);
97  }
98  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
99  }
100  }
101 
102  generatePipelinedLoop();
103 }
104 
105 void ModuloScheduleExpander::generatePipelinedLoop() {
107  assert(LoopInfo && "Must be able to analyze loop!");
108 
109  // Create a new basic block for the kernel and add it to the CFG.
111 
112  unsigned MaxStageCount = Schedule.getNumStages() - 1;
113 
114  // Remember the registers that are used in different stages. The index is
115  // the iteration, or stage, that the instruction is scheduled in. This is
116  // a map between register names in the original block and the names created
117  // in each stage of the pipelined loop.
118  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
119  InstrMapTy InstrMap;
120 
122 
123  // Generate the prolog instructions that set up the pipeline.
124  generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
125  MF.insert(BB->getIterator(), KernelBB);
126 
127  // Rearrange the instructions to generate the new, pipelined loop,
128  // and update register names as needed.
129  for (MachineInstr *CI : Schedule.getInstructions()) {
130  if (CI->isPHI())
131  continue;
132  unsigned StageNum = Schedule.getStage(CI);
133  MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
134  updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
135  KernelBB->push_back(NewMI);
136  InstrMap[NewMI] = CI;
137  }
138 
139  // Copy any terminator instructions to the new kernel, and update
140  // names as needed.
141  for (MachineInstr &MI : BB->terminators()) {
142  MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
143  updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
144  KernelBB->push_back(NewMI);
145  InstrMap[NewMI] = &MI;
146  }
147 
148  NewKernel = KernelBB;
149  KernelBB->transferSuccessors(BB);
150  KernelBB->replaceSuccessor(BB, KernelBB);
151 
152  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
153  InstrMap, MaxStageCount, MaxStageCount, false);
154  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
155  MaxStageCount, MaxStageCount, false);
156 
157  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
158 
160  // Generate the epilog instructions to complete the pipeline.
161  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
162 
163  // We need this step because the register allocation doesn't handle some
164  // situations well, so we insert copies to help out.
165  splitLifetimes(KernelBB, EpilogBBs);
166 
167  // Remove dead instructions due to loop induction variables.
168  removeDeadInstructions(KernelBB, EpilogBBs);
169 
170  // Add branches between prolog and epilog blocks.
171  addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
172 
173  delete[] VRMap;
174 }
175 
177  // Remove the original loop since it's no longer referenced.
178  for (auto &I : *BB)
180  BB->clear();
181  BB->eraseFromParent();
182 }
183 
184 /// Generate the pipeline prolog code.
185 void ModuloScheduleExpander::generateProlog(unsigned LastStage,
186  MachineBasicBlock *KernelBB,
187  ValueMapTy *VRMap,
188  MBBVectorTy &PrologBBs) {
189  MachineBasicBlock *PredBB = Preheader;
190  InstrMapTy InstrMap;
191 
192  // Generate a basic block for each stage, not including the last stage,
193  // which will be generated in the kernel. Each basic block may contain
194  // instructions from multiple stages/iterations.
195  for (unsigned i = 0; i < LastStage; ++i) {
196  // Create and insert the prolog basic block prior to the original loop
197  // basic block. The original loop is removed later.
199  PrologBBs.push_back(NewBB);
200  MF.insert(BB->getIterator(), NewBB);
201  NewBB->transferSuccessors(PredBB);
202  PredBB->addSuccessor(NewBB);
203  PredBB = NewBB;
204 
205  // Generate instructions for each appropriate stage. Process instructions
206  // in original program order.
207  for (int StageNum = i; StageNum >= 0; --StageNum) {
208  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
209  BBE = BB->getFirstTerminator();
210  BBI != BBE; ++BBI) {
211  if (Schedule.getStage(&*BBI) == StageNum) {
212  if (BBI->isPHI())
213  continue;
214  MachineInstr *NewMI =
215  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
216  updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
217  NewBB->push_back(NewMI);
218  InstrMap[NewMI] = &*BBI;
219  }
220  }
221  }
222  rewritePhiValues(NewBB, i, VRMap, InstrMap);
223  LLVM_DEBUG({
224  dbgs() << "prolog:\n";
225  NewBB->dump();
226  });
227  }
228 
229  PredBB->replaceSuccessor(BB, KernelBB);
230 
231  // Check if we need to remove the branch from the preheader to the original
232  // loop, and replace it with a branch to the new loop.
233  unsigned numBranches = TII->removeBranch(*Preheader);
234  if (numBranches) {
236  TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
237  }
238 }
239 
240 /// Generate the pipeline epilog code. The epilog code finishes the iterations
241 /// that were started in either the prolog or the kernel. We create a basic
242 /// block for each stage that needs to complete.
243 void ModuloScheduleExpander::generateEpilog(
244  unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
245  ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
246  // We need to change the branch from the kernel to the first epilog block, so
247  // this call to analyze branch uses the kernel rather than the original BB.
248  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
250  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
251  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
252  if (checkBranch)
253  return;
254 
255  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
256  if (*LoopExitI == KernelBB)
257  ++LoopExitI;
258  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
259  MachineBasicBlock *LoopExitBB = *LoopExitI;
260 
261  MachineBasicBlock *PredBB = KernelBB;
262  MachineBasicBlock *EpilogStart = LoopExitBB;
263  InstrMapTy InstrMap;
264 
265  // Generate a basic block for each stage, not including the last stage,
266  // which was generated for the kernel. Each basic block may contain
267  // instructions from multiple stages/iterations.
268  int EpilogStage = LastStage + 1;
269  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
271  EpilogBBs.push_back(NewBB);
272  MF.insert(BB->getIterator(), NewBB);
273 
274  PredBB->replaceSuccessor(LoopExitBB, NewBB);
275  NewBB->addSuccessor(LoopExitBB);
276 
277  if (EpilogStart == LoopExitBB)
278  EpilogStart = NewBB;
279 
280  // Add instructions to the epilog depending on the current block.
281  // Process instructions in original program order.
282  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
283  for (auto &BBI : *BB) {
284  if (BBI.isPHI())
285  continue;
286  MachineInstr *In = &BBI;
287  if ((unsigned)Schedule.getStage(In) == StageNum) {
288  // Instructions with memoperands in the epilog are updated with
289  // conservative values.
290  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
291  updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
292  NewBB->push_back(NewMI);
293  InstrMap[NewMI] = In;
294  }
295  }
296  }
297  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
298  InstrMap, LastStage, EpilogStage, i == 1);
299  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
300  LastStage, EpilogStage, i == 1);
301  PredBB = NewBB;
302 
303  LLVM_DEBUG({
304  dbgs() << "epilog:\n";
305  NewBB->dump();
306  });
307  }
308 
309  // Fix any Phi nodes in the loop exit block.
310  LoopExitBB->replacePhiUsesWith(BB, PredBB);
311 
312  // Create a branch to the new epilog from the kernel.
313  // Remove the original branch and add a new branch to the epilog.
314  TII->removeBranch(*KernelBB);
315  assert((OrigBB == TBB || OrigBB == FBB) &&
316  "Unable to determine looping branch direction");
317  if (OrigBB != TBB)
318  TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
319  else
320  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
321  // Add a branch to the loop exit.
322  if (EpilogBBs.size() > 0) {
323  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
325  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
326  }
327 }
328 
329 /// Replace all uses of FromReg that appear outside the specified
330 /// basic block with ToReg.
331 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
334  LiveIntervals &LIS) {
335  for (MachineOperand &O :
337  if (O.getParent()->getParent() != MBB)
338  O.setReg(ToReg);
339  if (!LIS.hasInterval(ToReg))
340  LIS.createEmptyInterval(ToReg);
341 }
342 
343 /// Return true if the register has a use that occurs outside the
344 /// specified loop.
345 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
347  for (const MachineOperand &MO : MRI.use_operands(Reg))
348  if (MO.getParent()->getParent() != BB)
349  return true;
350  return false;
351 }
352 
353 /// Generate Phis for the specific block in the generated pipelined code.
354 /// This function looks at the Phis from the original code to guide the
355 /// creation of new Phis.
356 void ModuloScheduleExpander::generateExistingPhis(
358  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
359  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
360  // Compute the stage number for the initial value of the Phi, which
361  // comes from the prolog. The prolog to use depends on to which kernel/
362  // epilog that we're adding the Phi.
363  unsigned PrologStage = 0;
364  unsigned PrevStage = 0;
365  bool InKernel = (LastStageNum == CurStageNum);
366  if (InKernel) {
367  PrologStage = LastStageNum - 1;
368  PrevStage = CurStageNum;
369  } else {
370  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
371  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
372  }
373 
374  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
375  BBE = BB->getFirstNonPHI();
376  BBI != BBE; ++BBI) {
377  Register Def = BBI->getOperand(0).getReg();
378 
379  unsigned InitVal = 0;
380  unsigned LoopVal = 0;
381  getPhiRegs(*BBI, BB, InitVal, LoopVal);
382 
383  unsigned PhiOp1 = 0;
384  // The Phi value from the loop body typically is defined in the loop, but
385  // not always. So, we need to check if the value is defined in the loop.
386  unsigned PhiOp2 = LoopVal;
387  if (VRMap[LastStageNum].count(LoopVal))
388  PhiOp2 = VRMap[LastStageNum][LoopVal];
389 
390  int StageScheduled = Schedule.getStage(&*BBI);
391  int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
392  unsigned NumStages = getStagesForReg(Def, CurStageNum);
393  if (NumStages == 0) {
394  // We don't need to generate a Phi anymore, but we need to rename any uses
395  // of the Phi value.
396  unsigned NewReg = VRMap[PrevStage][LoopVal];
397  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
398  InitVal, NewReg);
399  if (VRMap[CurStageNum].count(LoopVal))
400  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
401  }
402  // Adjust the number of Phis needed depending on the number of prologs left,
403  // and the distance from where the Phi is first scheduled. The number of
404  // Phis cannot exceed the number of prolog stages. Each stage can
405  // potentially define two values.
406  unsigned MaxPhis = PrologStage + 2;
407  if (!InKernel && (int)PrologStage <= LoopValStage)
408  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
409  unsigned NumPhis = std::min(NumStages, MaxPhis);
410 
411  unsigned NewReg = 0;
412  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
413  // In the epilog, we may need to look back one stage to get the correct
414  // Phi name, because the epilog and prolog blocks execute the same stage.
415  // The correct name is from the previous block only when the Phi has
416  // been completely scheduled prior to the epilog, and Phi value is not
417  // needed in multiple stages.
418  int StageDiff = 0;
419  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
420  NumPhis == 1)
421  StageDiff = 1;
422  // Adjust the computations below when the phi and the loop definition
423  // are scheduled in different stages.
424  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
425  StageDiff = StageScheduled - LoopValStage;
426  for (unsigned np = 0; np < NumPhis; ++np) {
427  // If the Phi hasn't been scheduled, then use the initial Phi operand
428  // value. Otherwise, use the scheduled version of the instruction. This
429  // is a little complicated when a Phi references another Phi.
430  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
431  PhiOp1 = InitVal;
432  // Check if the Phi has already been scheduled in a prolog stage.
433  else if (PrologStage >= AccessStage + StageDiff + np &&
434  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
435  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
436  // Check if the Phi has already been scheduled, but the loop instruction
437  // is either another Phi, or doesn't occur in the loop.
438  else if (PrologStage >= AccessStage + StageDiff + np) {
439  // If the Phi references another Phi, we need to examine the other
440  // Phi to get the correct value.
441  PhiOp1 = LoopVal;
442  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
443  int Indirects = 1;
444  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
445  int PhiStage = Schedule.getStage(InstOp1);
446  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
447  PhiOp1 = getInitPhiReg(*InstOp1, BB);
448  else
449  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
450  InstOp1 = MRI.getVRegDef(PhiOp1);
451  int PhiOpStage = Schedule.getStage(InstOp1);
452  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
453  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
454  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
455  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
456  break;
457  }
458  ++Indirects;
459  }
460  } else
461  PhiOp1 = InitVal;
462  // If this references a generated Phi in the kernel, get the Phi operand
463  // from the incoming block.
464  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
465  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
466  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
467 
468  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
469  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
470  // In the epilog, a map lookup is needed to get the value from the kernel,
471  // or previous epilog block. How is does this depends on if the
472  // instruction is scheduled in the previous block.
473  if (!InKernel) {
474  int StageDiffAdj = 0;
475  if (LoopValStage != -1 && StageScheduled > LoopValStage)
476  StageDiffAdj = StageScheduled - LoopValStage;
477  // Use the loop value defined in the kernel, unless the kernel
478  // contains the last definition of the Phi.
479  if (np == 0 && PrevStage == LastStageNum &&
480  (StageScheduled != 0 || LoopValStage != 0) &&
481  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
482  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
483  // Use the value defined by the Phi. We add one because we switch
484  // from looking at the loop value to the Phi definition.
485  else if (np > 0 && PrevStage == LastStageNum &&
486  VRMap[PrevStage - np + 1].count(Def))
487  PhiOp2 = VRMap[PrevStage - np + 1][Def];
488  // Use the loop value defined in the kernel.
489  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
490  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
491  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
492  // Use the value defined by the Phi, unless we're generating the first
493  // epilog and the Phi refers to a Phi in a different stage.
494  else if (VRMap[PrevStage - np].count(Def) &&
495  (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
496  (LoopValStage == StageScheduled)))
497  PhiOp2 = VRMap[PrevStage - np][Def];
498  }
499 
500  // Check if we can reuse an existing Phi. This occurs when a Phi
501  // references another Phi, and the other Phi is scheduled in an
502  // earlier stage. We can try to reuse an existing Phi up until the last
503  // stage of the current Phi.
504  if (LoopDefIsPhi) {
505  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
506  int LVNumStages = getStagesForPhi(LoopVal);
507  int StageDiff = (StageScheduled - LoopValStage);
508  LVNumStages -= StageDiff;
509  // Make sure the loop value Phi has been processed already.
510  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
511  NewReg = PhiOp2;
512  unsigned ReuseStage = CurStageNum;
513  if (isLoopCarried(*PhiInst))
514  ReuseStage -= LVNumStages;
515  // Check if the Phi to reuse has been generated yet. If not, then
516  // there is nothing to reuse.
517  if (VRMap[ReuseStage - np].count(LoopVal)) {
518  NewReg = VRMap[ReuseStage - np][LoopVal];
519 
520  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
521  Def, NewReg);
522  // Update the map with the new Phi name.
523  VRMap[CurStageNum - np][Def] = NewReg;
524  PhiOp2 = NewReg;
525  if (VRMap[LastStageNum - np - 1].count(LoopVal))
526  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
527 
528  if (IsLast && np == NumPhis - 1)
529  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
530  continue;
531  }
532  }
533  }
534  if (InKernel && StageDiff > 0 &&
535  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
536  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
537  }
538 
539  const TargetRegisterClass *RC = MRI.getRegClass(Def);
540  NewReg = MRI.createVirtualRegister(RC);
541 
542  MachineInstrBuilder NewPhi =
543  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
544  TII->get(TargetOpcode::PHI), NewReg);
545  NewPhi.addReg(PhiOp1).addMBB(BB1);
546  NewPhi.addReg(PhiOp2).addMBB(BB2);
547  if (np == 0)
548  InstrMap[NewPhi] = &*BBI;
549 
550  // We define the Phis after creating the new pipelined code, so
551  // we need to rename the Phi values in scheduled instructions.
552 
553  unsigned PrevReg = 0;
554  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
555  PrevReg = VRMap[PrevStage - np][LoopVal];
556  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
557  NewReg, PrevReg);
558  // If the Phi has been scheduled, use the new name for rewriting.
559  if (VRMap[CurStageNum - np].count(Def)) {
560  unsigned R = VRMap[CurStageNum - np][Def];
561  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
562  NewReg);
563  }
564 
565  // Check if we need to rename any uses that occurs after the loop. The
566  // register to replace depends on whether the Phi is scheduled in the
567  // epilog.
568  if (IsLast && np == NumPhis - 1)
569  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
570 
571  // In the kernel, a dependent Phi uses the value from this Phi.
572  if (InKernel)
573  PhiOp2 = NewReg;
574 
575  // Update the map with the new Phi name.
576  VRMap[CurStageNum - np][Def] = NewReg;
577  }
578 
579  while (NumPhis++ < NumStages) {
580  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
581  NewReg, 0);
582  }
583 
584  // Check if we need to rename a Phi that has been eliminated due to
585  // scheduling.
586  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
587  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
588  }
589 }
590 
591 /// Generate Phis for the specified block in the generated pipelined code.
592 /// These are new Phis needed because the definition is scheduled after the
593 /// use in the pipelined sequence.
594 void ModuloScheduleExpander::generatePhis(
596  MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
597  unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
598  // Compute the stage number that contains the initial Phi value, and
599  // the Phi from the previous stage.
600  unsigned PrologStage = 0;
601  unsigned PrevStage = 0;
602  unsigned StageDiff = CurStageNum - LastStageNum;
603  bool InKernel = (StageDiff == 0);
604  if (InKernel) {
605  PrologStage = LastStageNum - 1;
606  PrevStage = CurStageNum;
607  } else {
608  PrologStage = LastStageNum - StageDiff;
609  PrevStage = LastStageNum + StageDiff - 1;
610  }
611 
612  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
613  BBE = BB->instr_end();
614  BBI != BBE; ++BBI) {
615  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
616  MachineOperand &MO = BBI->getOperand(i);
617  if (!MO.isReg() || !MO.isDef() ||
619  continue;
620 
621  int StageScheduled = Schedule.getStage(&*BBI);
622  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
623  Register Def = MO.getReg();
624  unsigned NumPhis = getStagesForReg(Def, CurStageNum);
625  // An instruction scheduled in stage 0 and is used after the loop
626  // requires a phi in the epilog for the last definition from either
627  // the kernel or prolog.
628  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
629  hasUseAfterLoop(Def, BB, MRI))
630  NumPhis = 1;
631  if (!InKernel && (unsigned)StageScheduled > PrologStage)
632  continue;
633 
634  unsigned PhiOp2 = VRMap[PrevStage][Def];
635  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
636  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
637  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
638  // The number of Phis can't exceed the number of prolog stages. The
639  // prolog stage number is zero based.
640  if (NumPhis > PrologStage + 1 - StageScheduled)
641  NumPhis = PrologStage + 1 - StageScheduled;
642  for (unsigned np = 0; np < NumPhis; ++np) {
643  unsigned PhiOp1 = VRMap[PrologStage][Def];
644  if (np <= PrologStage)
645  PhiOp1 = VRMap[PrologStage - np][Def];
646  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
647  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
648  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
649  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
650  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
651  }
652  if (!InKernel)
653  PhiOp2 = VRMap[PrevStage - np][Def];
654 
655  const TargetRegisterClass *RC = MRI.getRegClass(Def);
656  Register NewReg = MRI.createVirtualRegister(RC);
657 
658  MachineInstrBuilder NewPhi =
659  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
660  TII->get(TargetOpcode::PHI), NewReg);
661  NewPhi.addReg(PhiOp1).addMBB(BB1);
662  NewPhi.addReg(PhiOp2).addMBB(BB2);
663  if (np == 0)
664  InstrMap[NewPhi] = &*BBI;
665 
666  // Rewrite uses and update the map. The actions depend upon whether
667  // we generating code for the kernel or epilog blocks.
668  if (InKernel) {
669  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
670  NewReg);
671  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
672  NewReg);
673 
674  PhiOp2 = NewReg;
675  VRMap[PrevStage - np - 1][Def] = NewReg;
676  } else {
677  VRMap[CurStageNum - np][Def] = NewReg;
678  if (np == NumPhis - 1)
679  rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
680  NewReg);
681  }
682  if (IsLast && np == NumPhis - 1)
683  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
684  }
685  }
686  }
687 }
688 
689 /// Remove instructions that generate values with no uses.
690 /// Typically, these are induction variable operations that generate values
691 /// used in the loop itself. A dead instruction has a definition with
692 /// no uses, or uses that occur in the original loop only.
693 void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
694  MBBVectorTy &EpilogBBs) {
695  // For each epilog block, check that the value defined by each instruction
696  // is used. If not, delete it.
697  for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
699  ME = MBB->instr_rend();
700  MI != ME;) {
701  // From DeadMachineInstructionElem. Don't delete inline assembly.
702  if (MI->isInlineAsm()) {
703  ++MI;
704  continue;
705  }
706  bool SawStore = false;
707  // Check if it's safe to remove the instruction due to side effects.
708  // We can, and want to, remove Phis here.
709  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
710  ++MI;
711  continue;
712  }
713  bool used = true;
714  for (const MachineOperand &MO : MI->operands()) {
715  if (!MO.isReg() || !MO.isDef())
716  continue;
717  Register reg = MO.getReg();
718  // Assume physical registers are used, unless they are marked dead.
719  if (Register::isPhysicalRegister(reg)) {
720  used = !MO.isDead();
721  if (used)
722  break;
723  continue;
724  }
725  unsigned realUses = 0;
726  for (const MachineOperand &U : MRI.use_operands(reg)) {
727  // Check if there are any uses that occur only in the original
728  // loop. If so, that's not a real use.
729  if (U.getParent()->getParent() != BB) {
730  realUses++;
731  used = true;
732  break;
733  }
734  }
735  if (realUses > 0)
736  break;
737  used = false;
738  }
739  if (!used) {
741  MI++->eraseFromParent();
742  continue;
743  }
744  ++MI;
745  }
746  // In the kernel block, check if we can remove a Phi that generates a value
747  // used in an instruction removed in the epilog block.
748  for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
749  Register reg = MI.getOperand(0).getReg();
750  if (MRI.use_begin(reg) == MRI.use_end()) {
752  MI.eraseFromParent();
753  }
754  }
755 }
756 
757 /// For loop carried definitions, we split the lifetime of a virtual register
758 /// that has uses past the definition in the next iteration. A copy with a new
759 /// virtual register is inserted before the definition, which helps with
760 /// generating a better register assignment.
761 ///
762 /// v1 = phi(a, v2) v1 = phi(a, v2)
763 /// v2 = phi(b, v3) v2 = phi(b, v3)
764 /// v3 = .. v4 = copy v1
765 /// .. = V1 v3 = ..
766 /// .. = v4
767 void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
768  MBBVectorTy &EpilogBBs) {
770  for (auto &PHI : KernelBB->phis()) {
771  Register Def = PHI.getOperand(0).getReg();
772  // Check for any Phi definition that used as an operand of another Phi
773  // in the same block.
775  E = MRI.use_instr_end();
776  I != E; ++I) {
777  if (I->isPHI() && I->getParent() == KernelBB) {
778  // Get the loop carried definition.
779  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
780  if (!LCDef)
781  continue;
782  MachineInstr *MI = MRI.getVRegDef(LCDef);
783  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
784  continue;
785  // Search through the rest of the block looking for uses of the Phi
786  // definition. If one occurs, then split the lifetime.
787  unsigned SplitReg = 0;
789  KernelBB->instr_end()))
790  if (BBJ.readsRegister(Def)) {
791  // We split the lifetime when we find the first use.
792  if (SplitReg == 0) {
793  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
794  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
795  TII->get(TargetOpcode::COPY), SplitReg)
796  .addReg(Def);
797  }
798  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
799  }
800  if (!SplitReg)
801  continue;
802  // Search through each of the epilog blocks for any uses to be renamed.
803  for (auto &Epilog : EpilogBBs)
804  for (auto &I : *Epilog)
805  if (I.readsRegister(Def))
806  I.substituteRegister(Def, SplitReg, 0, *TRI);
807  break;
808  }
809  }
810  }
811 }
812 
813 /// Remove the incoming block from the Phis in a basic block.
815  for (MachineInstr &MI : *BB) {
816  if (!MI.isPHI())
817  break;
818  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
819  if (MI.getOperand(i + 1).getMBB() == Incoming) {
820  MI.removeOperand(i + 1);
821  MI.removeOperand(i);
822  break;
823  }
824  }
825 }
826 
827 /// Create branches from each prolog basic block to the appropriate epilog
828 /// block. These edges are needed if the loop ends before reaching the
829 /// kernel.
830 void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
831  MBBVectorTy &PrologBBs,
832  MachineBasicBlock *KernelBB,
833  MBBVectorTy &EpilogBBs,
834  ValueMapTy *VRMap) {
835  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
836  MachineBasicBlock *LastPro = KernelBB;
837  MachineBasicBlock *LastEpi = KernelBB;
838 
839  // Start from the blocks connected to the kernel and work "out"
840  // to the first prolog and the last epilog blocks.
842  unsigned MaxIter = PrologBBs.size() - 1;
843  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
844  // Add branches to the prolog that go to the corresponding
845  // epilog, and the fall-thru prolog/kernel block.
846  MachineBasicBlock *Prolog = PrologBBs[j];
847  MachineBasicBlock *Epilog = EpilogBBs[i];
848 
850  Optional<bool> StaticallyGreater =
851  LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
852  unsigned numAdded = 0;
853  if (!StaticallyGreater.hasValue()) {
854  Prolog->addSuccessor(Epilog);
855  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
856  } else if (*StaticallyGreater == false) {
857  Prolog->addSuccessor(Epilog);
858  Prolog->removeSuccessor(LastPro);
859  LastEpi->removeSuccessor(Epilog);
860  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
861  removePhis(Epilog, LastEpi);
862  // Remove the blocks that are no longer referenced.
863  if (LastPro != LastEpi) {
864  LastEpi->clear();
865  LastEpi->eraseFromParent();
866  }
867  if (LastPro == KernelBB) {
868  LoopInfo->disposed();
869  NewKernel = nullptr;
870  }
871  LastPro->clear();
872  LastPro->eraseFromParent();
873  } else {
874  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
876  }
877  LastPro = Prolog;
878  LastEpi = Epilog;
879  for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
880  E = Prolog->instr_rend();
881  I != E && numAdded > 0; ++I, --numAdded)
882  updateInstruction(&*I, false, j, 0, VRMap);
883  }
884 
885  if (NewKernel) {
886  LoopInfo->setPreheader(PrologBBs[MaxIter]);
887  LoopInfo->adjustTripCount(-(MaxIter + 1));
888  }
889 }
890 
891 /// Return true if we can compute the amount the instruction changes
892 /// during each iteration. Set Delta to the amount of the change.
893 bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
895  const MachineOperand *BaseOp;
896  int64_t Offset;
897  bool OffsetIsScalable;
898  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
899  return false;
900 
901  // FIXME: This algorithm assumes instructions have fixed-size offsets.
902  if (OffsetIsScalable)
903  return false;
904 
905  if (!BaseOp->isReg())
906  return false;
907 
908  Register BaseReg = BaseOp->getReg();
909 
910  MachineRegisterInfo &MRI = MF.getRegInfo();
911  // Check if there is a Phi. If so, get the definition in the loop.
912  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
913  if (BaseDef && BaseDef->isPHI()) {
914  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
915  BaseDef = MRI.getVRegDef(BaseReg);
916  }
917  if (!BaseDef)
918  return false;
919 
920  int D = 0;
921  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
922  return false;
923 
924  Delta = D;
925  return true;
926 }
927 
928 /// Update the memory operand with a new offset when the pipeliner
929 /// generates a new copy of the instruction that refers to a
930 /// different memory location.
931 void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
932  MachineInstr &OldMI,
933  unsigned Num) {
934  if (Num == 0)
935  return;
936  // If the instruction has memory operands, then adjust the offset
937  // when the instruction appears in different stages.
938  if (NewMI.memoperands_empty())
939  return;
941  for (MachineMemOperand *MMO : NewMI.memoperands()) {
942  // TODO: Figure out whether isAtomic is really necessary (see D57601).
943  if (MMO->isVolatile() || MMO->isAtomic() ||
944  (MMO->isInvariant() && MMO->isDereferenceable()) ||
945  (!MMO->getValue())) {
946  NewMMOs.push_back(MMO);
947  continue;
948  }
949  unsigned Delta;
950  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
951  int64_t AdjOffset = Delta * Num;
952  NewMMOs.push_back(
953  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
954  } else {
955  NewMMOs.push_back(
957  }
958  }
959  NewMI.setMemRefs(MF, NewMMOs);
960 }
961 
962 /// Clone the instruction for the new pipelined loop and update the
963 /// memory operands, if needed.
964 MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
965  unsigned CurStageNum,
966  unsigned InstStageNum) {
967  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
968  // Check for tied operands in inline asm instructions. This should be handled
969  // elsewhere, but I'm not sure of the best solution.
970  if (OldMI->isInlineAsm())
971  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
972  const auto &MO = OldMI->getOperand(i);
973  if (MO.isReg() && MO.isUse())
974  break;
975  unsigned UseIdx;
976  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
977  NewMI->tieOperands(i, UseIdx);
978  }
979  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
980  return NewMI;
981 }
982 
983 /// Clone the instruction for the new pipelined loop. If needed, this
984 /// function updates the instruction using the values saved in the
985 /// InstrChanges structure.
986 MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
987  MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
988  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
989  auto It = InstrChanges.find(OldMI);
990  if (It != InstrChanges.end()) {
991  std::pair<unsigned, int64_t> RegAndOffset = It->second;
992  unsigned BasePos, OffsetPos;
993  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
994  return nullptr;
995  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
996  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
997  if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
998  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
999  NewMI->getOperand(OffsetPos).setImm(NewOffset);
1000  }
1001  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1002  return NewMI;
1003 }
1004 
1005 /// Update the machine instruction with new virtual registers. This
1006 /// function may change the defintions and/or uses.
1007 void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1008  bool LastDef,
1009  unsigned CurStageNum,
1010  unsigned InstrStageNum,
1011  ValueMapTy *VRMap) {
1012  for (MachineOperand &MO : NewMI->operands()) {
1013  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
1014  continue;
1015  Register reg = MO.getReg();
1016  if (MO.isDef()) {
1017  // Create a new virtual register for the definition.
1018  const TargetRegisterClass *RC = MRI.getRegClass(reg);
1019  Register NewReg = MRI.createVirtualRegister(RC);
1020  MO.setReg(NewReg);
1021  VRMap[CurStageNum][reg] = NewReg;
1022  if (LastDef)
1023  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1024  } else if (MO.isUse()) {
1025  MachineInstr *Def = MRI.getVRegDef(reg);
1026  // Compute the stage that contains the last definition for instruction.
1027  int DefStageNum = Schedule.getStage(Def);
1028  unsigned StageNum = CurStageNum;
1029  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1030  // Compute the difference in stages between the defintion and the use.
1031  unsigned StageDiff = (InstrStageNum - DefStageNum);
1032  // Make an adjustment to get the last definition.
1033  StageNum -= StageDiff;
1034  }
1035  if (VRMap[StageNum].count(reg))
1036  MO.setReg(VRMap[StageNum][reg]);
1037  }
1038  }
1039 }
1040 
1041 /// Return the instruction in the loop that defines the register.
1042 /// If the definition is a Phi, then follow the Phi operand to
1043 /// the instruction in the loop.
1044 MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1047  while (Def->isPHI()) {
1048  if (!Visited.insert(Def).second)
1049  break;
1050  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1051  if (Def->getOperand(i + 1).getMBB() == BB) {
1052  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1053  break;
1054  }
1055  }
1056  return Def;
1057 }
1058 
1059 /// Return the new name for the value from the previous stage.
1060 unsigned ModuloScheduleExpander::getPrevMapVal(
1061  unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1062  ValueMapTy *VRMap, MachineBasicBlock *BB) {
1063  unsigned PrevVal = 0;
1064  if (StageNum > PhiStage) {
1065  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1066  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1067  // The name is defined in the previous stage.
1068  PrevVal = VRMap[StageNum - 1][LoopVal];
1069  else if (VRMap[StageNum].count(LoopVal))
1070  // The previous name is defined in the current stage when the instruction
1071  // order is swapped.
1072  PrevVal = VRMap[StageNum][LoopVal];
1073  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1074  // The loop value hasn't yet been scheduled.
1075  PrevVal = LoopVal;
1076  else if (StageNum == PhiStage + 1)
1077  // The loop value is another phi, which has not been scheduled.
1078  PrevVal = getInitPhiReg(*LoopInst, BB);
1079  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1080  // The loop value is another phi, which has been scheduled.
1081  PrevVal =
1082  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1083  LoopStage, VRMap, BB);
1084  }
1085  return PrevVal;
1086 }
1087 
1088 /// Rewrite the Phi values in the specified block to use the mappings
1089 /// from the initial operand. Once the Phi is scheduled, we switch
1090 /// to using the loop value instead of the Phi value, so those names
1091 /// do not need to be rewritten.
1092 void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1093  unsigned StageNum,
1094  ValueMapTy *VRMap,
1095  InstrMapTy &InstrMap) {
1096  for (auto &PHI : BB->phis()) {
1097  unsigned InitVal = 0;
1098  unsigned LoopVal = 0;
1099  getPhiRegs(PHI, BB, InitVal, LoopVal);
1100  Register PhiDef = PHI.getOperand(0).getReg();
1101 
1102  unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1103  unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1104  unsigned NumPhis = getStagesForPhi(PhiDef);
1105  if (NumPhis > StageNum)
1106  NumPhis = StageNum;
1107  for (unsigned np = 0; np <= NumPhis; ++np) {
1108  unsigned NewVal =
1109  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1110  if (!NewVal)
1111  NewVal = InitVal;
1112  rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1113  NewVal);
1114  }
1115  }
1116 }
1117 
1118 /// Rewrite a previously scheduled instruction to use the register value
1119 /// from the new instruction. Make sure the instruction occurs in the
1120 /// basic block, and we don't change the uses in the new instruction.
1121 void ModuloScheduleExpander::rewriteScheduledInstr(
1122  MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1123  unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1124  unsigned PrevReg) {
1125  bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1126  int StagePhi = Schedule.getStage(Phi) + PhiNum;
1127  // Rewrite uses that have been scheduled already to use the new
1128  // Phi register.
1129  for (MachineOperand &UseOp :
1131  MachineInstr *UseMI = UseOp.getParent();
1132  if (UseMI->getParent() != BB)
1133  continue;
1134  if (UseMI->isPHI()) {
1135  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1136  continue;
1137  if (getLoopPhiReg(*UseMI, BB) != OldReg)
1138  continue;
1139  }
1140  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1141  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1142  MachineInstr *OrigMI = OrigInstr->second;
1143  int StageSched = Schedule.getStage(OrigMI);
1144  int CycleSched = Schedule.getCycle(OrigMI);
1145  unsigned ReplaceReg = 0;
1146  // This is the stage for the scheduled instruction.
1147  if (StagePhi == StageSched && Phi->isPHI()) {
1148  int CyclePhi = Schedule.getCycle(Phi);
1149  if (PrevReg && InProlog)
1150  ReplaceReg = PrevReg;
1151  else if (PrevReg && !isLoopCarried(*Phi) &&
1152  (CyclePhi <= CycleSched || OrigMI->isPHI()))
1153  ReplaceReg = PrevReg;
1154  else
1155  ReplaceReg = NewReg;
1156  }
1157  // The scheduled instruction occurs before the scheduled Phi, and the
1158  // Phi is not loop carried.
1159  if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1160  ReplaceReg = NewReg;
1161  if (StagePhi > StageSched && Phi->isPHI())
1162  ReplaceReg = NewReg;
1163  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1164  ReplaceReg = NewReg;
1165  if (ReplaceReg) {
1166  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1167  UseOp.setReg(ReplaceReg);
1168  }
1169  }
1170 }
1171 
1172 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1173  if (!Phi.isPHI())
1174  return false;
1175  int DefCycle = Schedule.getCycle(&Phi);
1176  int DefStage = Schedule.getStage(&Phi);
1177 
1178  unsigned InitVal = 0;
1179  unsigned LoopVal = 0;
1180  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1181  MachineInstr *Use = MRI.getVRegDef(LoopVal);
1182  if (!Use || Use->isPHI())
1183  return true;
1184  int LoopCycle = Schedule.getCycle(Use);
1185  int LoopStage = Schedule.getStage(Use);
1186  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1187 }
1188 
1189 //===----------------------------------------------------------------------===//
1190 // PeelingModuloScheduleExpander implementation
1191 //===----------------------------------------------------------------------===//
1192 // This is a reimplementation of ModuloScheduleExpander that works by creating
1193 // a fully correct steady-state kernel and peeling off the prolog and epilogs.
1194 //===----------------------------------------------------------------------===//
1195 
1196 namespace {
1197 // Remove any dead phis in MBB. Dead phis either have only one block as input
1198 // (in which case they are the identity) or have no uses.
1199 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1200  LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1201  bool Changed = true;
1202  while (Changed) {
1203  Changed = false;
1205  assert(MI.isPHI());
1206  if (MRI.use_empty(MI.getOperand(0).getReg())) {
1207  if (LIS)
1209  MI.eraseFromParent();
1210  Changed = true;
1211  } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1212  MRI.constrainRegClass(MI.getOperand(1).getReg(),
1213  MRI.getRegClass(MI.getOperand(0).getReg()));
1214  MRI.replaceRegWith(MI.getOperand(0).getReg(),
1215  MI.getOperand(1).getReg());
1216  if (LIS)
1218  MI.eraseFromParent();
1219  Changed = true;
1220  }
1221  }
1222  }
1223 }
1224 
1225 /// Rewrites the kernel block in-place to adhere to the given schedule.
1226 /// KernelRewriter holds all of the state required to perform the rewriting.
1227 class KernelRewriter {
1228  ModuloSchedule &S;
1230  MachineBasicBlock *PreheaderBB, *ExitBB;
1232  const TargetInstrInfo *TII;
1233  LiveIntervals *LIS;
1234 
1235  // Map from register class to canonical undef register for that class.
1237  // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1238  // this map is only used when InitReg is non-undef.
1240  // Map from LoopReg to phi register where the InitReg is undef.
1241  DenseMap<Register, Register> UndefPhis;
1242 
1243  // Reg is used by MI. Return the new register MI should use to adhere to the
1244  // schedule. Insert phis as necessary.
1245  Register remapUse(Register Reg, MachineInstr &MI);
1246  // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1247  // If InitReg is not given it is chosen arbitrarily. It will either be undef
1248  // or will be chosen so as to share another phi.
1249  Register phi(Register LoopReg, Optional<Register> InitReg = {},
1250  const TargetRegisterClass *RC = nullptr);
1251  // Create an undef register of the given register class.
1252  Register undef(const TargetRegisterClass *RC);
1253 
1254 public:
1255  KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1256  LiveIntervals *LIS = nullptr);
1257  void rewrite();
1258 };
1259 } // namespace
1260 
1261 KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1262  MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1263  : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1264  ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1265  TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1266  PreheaderBB = *BB->pred_begin();
1267  if (PreheaderBB == BB)
1268  PreheaderBB = *std::next(BB->pred_begin());
1269 }
1270 
1271 void KernelRewriter::rewrite() {
1272  // Rearrange the loop to be in schedule order. Note that the schedule may
1273  // contain instructions that are not owned by the loop block (InstrChanges and
1274  // friends), so we gracefully handle unowned instructions and delete any
1275  // instructions that weren't in the schedule.
1276  auto InsertPt = BB->getFirstTerminator();
1277  MachineInstr *FirstMI = nullptr;
1278  for (MachineInstr *MI : S.getInstructions()) {
1279  if (MI->isPHI())
1280  continue;
1281  if (MI->getParent())
1282  MI->removeFromParent();
1283  BB->insert(InsertPt, MI);
1284  if (!FirstMI)
1285  FirstMI = MI;
1286  }
1287  assert(FirstMI && "Failed to find first MI in schedule");
1288 
1289  // At this point all of the scheduled instructions are between FirstMI
1290  // and the end of the block. Kill from the first non-phi to FirstMI.
1291  for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1292  if (LIS)
1294  (I++)->eraseFromParent();
1295  }
1296 
1297  // Now remap every instruction in the loop.
1298  for (MachineInstr &MI : *BB) {
1299  if (MI.isPHI() || MI.isTerminator())
1300  continue;
1301  for (MachineOperand &MO : MI.uses()) {
1302  if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1303  continue;
1304  Register Reg = remapUse(MO.getReg(), MI);
1305  MO.setReg(Reg);
1306  }
1307  }
1308  EliminateDeadPhis(BB, MRI, LIS);
1309 
1310  // Ensure a phi exists for all instructions that are either referenced by
1311  // an illegal phi or by an instruction outside the loop. This allows us to
1312  // treat remaps of these values the same as "normal" values that come from
1313  // loop-carried phis.
1314  for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1315  if (MI->isPHI()) {
1316  Register R = MI->getOperand(0).getReg();
1317  phi(R);
1318  continue;
1319  }
1320 
1321  for (MachineOperand &Def : MI->defs()) {
1322  for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1323  if (MI.getParent() != BB) {
1324  phi(Def.getReg());
1325  break;
1326  }
1327  }
1328  }
1329  }
1330 }
1331 
1332 Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1333  MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1334  if (!Producer)
1335  return Reg;
1336 
1337  int ConsumerStage = S.getStage(&MI);
1338  if (!Producer->isPHI()) {
1339  // Non-phi producers are simple to remap. Insert as many phis as the
1340  // difference between the consumer and producer stages.
1341  if (Producer->getParent() != BB)
1342  // Producer was not inside the loop. Use the register as-is.
1343  return Reg;
1344  int ProducerStage = S.getStage(Producer);
1345  assert(ConsumerStage != -1 &&
1346  "In-loop consumer should always be scheduled!");
1347  assert(ConsumerStage >= ProducerStage);
1348  unsigned StageDiff = ConsumerStage - ProducerStage;
1349 
1350  for (unsigned I = 0; I < StageDiff; ++I)
1351  Reg = phi(Reg);
1352  return Reg;
1353  }
1354 
1355  // First, dive through the phi chain to find the defaults for the generated
1356  // phis.
1357  SmallVector<Optional<Register>, 4> Defaults;
1358  Register LoopReg = Reg;
1359  auto LoopProducer = Producer;
1360  while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1361  LoopReg = getLoopPhiReg(*LoopProducer, BB);
1362  Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1363  LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1364  assert(LoopProducer);
1365  }
1366  int LoopProducerStage = S.getStage(LoopProducer);
1367 
1368  Optional<Register> IllegalPhiDefault;
1369 
1370  if (LoopProducerStage == -1) {
1371  // Do nothing.
1372  } else if (LoopProducerStage > ConsumerStage) {
1373  // This schedule is only representable if ProducerStage == ConsumerStage+1.
1374  // In addition, Consumer's cycle must be scheduled after Producer in the
1375  // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1376  // functions.
1377 #ifndef NDEBUG // Silence unused variables in non-asserts mode.
1378  int LoopProducerCycle = S.getCycle(LoopProducer);
1379  int ConsumerCycle = S.getCycle(&MI);
1380 #endif
1381  assert(LoopProducerCycle <= ConsumerCycle);
1382  assert(LoopProducerStage == ConsumerStage + 1);
1383  // Peel off the first phi from Defaults and insert a phi between producer
1384  // and consumer. This phi will not be at the front of the block so we
1385  // consider it illegal. It will only exist during the rewrite process; it
1386  // needs to exist while we peel off prologs because these could take the
1387  // default value. After that we can replace all uses with the loop producer
1388  // value.
1389  IllegalPhiDefault = Defaults.front();
1390  Defaults.erase(Defaults.begin());
1391  } else {
1392  assert(ConsumerStage >= LoopProducerStage);
1393  int StageDiff = ConsumerStage - LoopProducerStage;
1394  if (StageDiff > 0) {
1395  LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1396  << " to " << (Defaults.size() + StageDiff) << "\n");
1397  // If we need more phis than we have defaults for, pad out with undefs for
1398  // the earliest phis, which are at the end of the defaults chain (the
1399  // chain is in reverse order).
1400  Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
1401  ? Optional<Register>()
1402  : Defaults.back());
1403  }
1404  }
1405 
1406  // Now we know the number of stages to jump back, insert the phi chain.
1407  auto DefaultI = Defaults.rbegin();
1408  while (DefaultI != Defaults.rend())
1409  LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1410 
1411  if (IllegalPhiDefault.hasValue()) {
1412  // The consumer optionally consumes LoopProducer in the same iteration
1413  // (because the producer is scheduled at an earlier cycle than the consumer)
1414  // or the initial value. To facilitate this we create an illegal block here
1415  // by embedding a phi in the middle of the block. We will fix this up
1416  // immediately prior to pruning.
1417  auto RC = MRI.getRegClass(Reg);
1419  MachineInstr *IllegalPhi =
1420  BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1421  .addReg(IllegalPhiDefault.getValue())
1422  .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1423  .addReg(LoopReg)
1424  .addMBB(BB); // Block choice is arbitrary and has no effect.
1425  // Illegal phi should belong to the producer stage so that it can be
1426  // filtered correctly during peeling.
1427  S.setStage(IllegalPhi, LoopProducerStage);
1428  return R;
1429  }
1430 
1431  return LoopReg;
1432 }
1433 
1435  const TargetRegisterClass *RC) {
1436  // If the init register is not undef, try and find an existing phi.
1437  if (InitReg.hasValue()) {
1438  auto I = Phis.find({LoopReg, InitReg.getValue()});
1439  if (I != Phis.end())
1440  return I->second;
1441  } else {
1442  for (auto &KV : Phis) {
1443  if (KV.first.first == LoopReg)
1444  return KV.second;
1445  }
1446  }
1447 
1448  // InitReg is either undef or no existing phi takes InitReg as input. Try and
1449  // find a phi that takes undef as input.
1450  auto I = UndefPhis.find(LoopReg);
1451  if (I != UndefPhis.end()) {
1452  Register R = I->second;
1453  if (!InitReg.hasValue())
1454  // Found a phi taking undef as input, and this input is undef so return
1455  // without any more changes.
1456  return R;
1457  // Found a phi taking undef as input, so rewrite it to take InitReg.
1459  MI->getOperand(1).setReg(InitReg.getValue());
1460  Phis.insert({{LoopReg, InitReg.getValue()}, R});
1462  UndefPhis.erase(I);
1463  return R;
1464  }
1465 
1466  // Failed to find any existing phi to reuse, so create a new one.
1467  if (!RC)
1468  RC = MRI.getRegClass(LoopReg);
1470  if (InitReg.hasValue())
1471  MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1472  BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1473  .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
1474  .addMBB(PreheaderBB)
1475  .addReg(LoopReg)
1476  .addMBB(BB);
1477  if (!InitReg.hasValue())
1478  UndefPhis[LoopReg] = R;
1479  else
1480  Phis[{LoopReg, *InitReg}] = R;
1481  return R;
1482 }
1483 
1484 Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1485  Register &R = Undefs[RC];
1486  if (R == 0) {
1487  // Create an IMPLICIT_DEF that defines this register if we need it.
1488  // All uses of this should be removed by the time we have finished unrolling
1489  // prologs and epilogs.
1490  R = MRI.createVirtualRegister(RC);
1491  auto *InsertBB = &PreheaderBB->getParent()->front();
1492  BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1493  TII->get(TargetOpcode::IMPLICIT_DEF), R);
1494  }
1495  return R;
1496 }
1497 
1498 namespace {
1499 /// Describes an operand in the kernel of a pipelined loop. Characteristics of
1500 /// the operand are discovered, such as how many in-loop PHIs it has to jump
1501 /// through and defaults for these phis.
1502 class KernelOperandInfo {
1505  SmallVector<Register, 4> PhiDefaults;
1508 
1509 public:
1510  KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1511  const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1512  : MRI(MRI) {
1513  Source = MO;
1514  BB = MO->getParent()->getParent();
1515  while (isRegInLoop(MO)) {
1516  MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1517  if (MI->isFullCopy()) {
1518  MO = &MI->getOperand(1);
1519  continue;
1520  }
1521  if (!MI->isPHI())
1522  break;
1523  // If this is an illegal phi, don't count it in distance.
1524  if (IllegalPhis.count(MI)) {
1525  MO = &MI->getOperand(3);
1526  continue;
1527  }
1528 
1530  MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1531  : &MI->getOperand(3);
1532  PhiDefaults.push_back(Default);
1533  }
1534  Target = MO;
1535  }
1536 
1537  bool operator==(const KernelOperandInfo &Other) const {
1538  return PhiDefaults.size() == Other.PhiDefaults.size();
1539  }
1540 
1541  void print(raw_ostream &OS) const {
1542  OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1543  << *Source->getParent();
1544  }
1545 
1546 private:
1547  bool isRegInLoop(MachineOperand *MO) {
1548  return MO->isReg() && MO->getReg().isVirtual() &&
1549  MRI.getVRegDef(MO->getReg())->getParent() == BB;
1550  }
1551 };
1552 } // namespace
1553 
1555 PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
1556  MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
1557  if (LPD == LPD_Front)
1558  PeeledFront.push_back(NewBB);
1559  else
1560  PeeledBack.push_front(NewBB);
1561  for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1562  ++I, ++NI) {
1563  CanonicalMIs[&*I] = &*I;
1564  CanonicalMIs[&*NI] = &*I;
1565  BlockMIs[{NewBB, &*I}] = &*NI;
1566  BlockMIs[{BB, &*I}] = &*I;
1567  }
1568  return NewBB;
1569 }
1570 
1572  int MinStage) {
1573  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1574  I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1575  MachineInstr *MI = &*I++;
1576  int Stage = getStage(MI);
1577  if (Stage == -1 || Stage >= MinStage)
1578  continue;
1579 
1580  for (MachineOperand &DefMO : MI->defs()) {
1582  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1583  // Only PHIs can use values from this block by construction.
1584  // Match with the equivalent PHI in B.
1585  assert(UseMI.isPHI());
1586  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1587  MI->getParent());
1588  Subs.emplace_back(&UseMI, Reg);
1589  }
1590  for (auto &Sub : Subs)
1591  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1593  }
1594  if (LIS)
1596  MI->eraseFromParent();
1597  }
1598 }
1599 
1601  MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1602  auto InsertPt = DestBB->getFirstNonPHI();
1605  llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1606  if (MI.isPHI()) {
1607  // This is an illegal PHI. If we move any instructions using an illegal
1608  // PHI, we need to create a legal Phi.
1609  if (getStage(&MI) != Stage) {
1610  // The legal Phi is not necessary if the illegal phi's stage
1611  // is being moved.
1612  Register PhiR = MI.getOperand(0).getReg();
1613  auto RC = MRI.getRegClass(PhiR);
1615  MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1616  DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1617  .addReg(PhiR)
1618  .addMBB(SourceBB);
1619  BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1620  CanonicalMIs[NI] = CanonicalMIs[&MI];
1621  Remaps[PhiR] = NR;
1622  }
1623  }
1624  if (getStage(&MI) != Stage)
1625  continue;
1626  MI.removeFromParent();
1627  DestBB->insert(InsertPt, &MI);
1628  auto *KernelMI = CanonicalMIs[&MI];
1629  BlockMIs[{DestBB, KernelMI}] = &MI;
1630  BlockMIs.erase({SourceBB, KernelMI});
1631  }
1632  SmallVector<MachineInstr *, 4> PhiToDelete;
1633  for (MachineInstr &MI : DestBB->phis()) {
1634  assert(MI.getNumOperands() == 3);
1635  MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1636  // If the instruction referenced by the phi is moved inside the block
1637  // we don't need the phi anymore.
1638  if (getStage(Def) == Stage) {
1639  Register PhiReg = MI.getOperand(0).getReg();
1640  assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1641  MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1642  MI.getOperand(0).setReg(PhiReg);
1643  PhiToDelete.push_back(&MI);
1644  }
1645  }
1646  for (auto *P : PhiToDelete)
1647  P->eraseFromParent();
1648  InsertPt = DestBB->getFirstNonPHI();
1649  // Helper to clone Phi instructions into the destination block. We clone Phi
1650  // greedily to avoid combinatorial explosion of Phi instructions.
1651  auto clonePhi = [&](MachineInstr *Phi) {
1652  MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1653  DestBB->insert(InsertPt, NewMI);
1654  Register OrigR = Phi->getOperand(0).getReg();
1656  NewMI->getOperand(0).setReg(R);
1657  NewMI->getOperand(1).setReg(OrigR);
1658  NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1659  Remaps[OrigR] = R;
1660  CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1661  BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1663  return R;
1664  };
1665  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1666  for (MachineOperand &MO : I->uses()) {
1667  if (!MO.isReg())
1668  continue;
1669  if (Remaps.count(MO.getReg()))
1670  MO.setReg(Remaps[MO.getReg()]);
1671  else {
1672  // If we are using a phi from the source block we need to add a new phi
1673  // pointing to the old one.
1675  if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1676  Register R = clonePhi(Use);
1677  MO.setReg(R);
1678  }
1679  }
1680  }
1681  }
1682 }
1683 
1684 Register
1686  MachineInstr *Phi) {
1687  unsigned distance = PhiNodeLoopIteration[Phi];
1688  MachineInstr *CanonicalUse = CanonicalPhi;
1689  Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1690  for (unsigned I = 0; I < distance; ++I) {
1691  assert(CanonicalUse->isPHI());
1692  assert(CanonicalUse->getNumOperands() == 5);
1693  unsigned LoopRegIdx = 3, InitRegIdx = 1;
1694  if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1695  std::swap(LoopRegIdx, InitRegIdx);
1696  CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1697  CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1698  }
1699  return CanonicalUseReg;
1700 }
1701 
1703  BitVector LS(Schedule.getNumStages(), true);
1704  BitVector AS(Schedule.getNumStages(), true);
1705  LiveStages[BB] = LS;
1706  AvailableStages[BB] = AS;
1707 
1708  // Peel out the prologs.
1709  LS.reset();
1710  for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1711  LS[I] = true;
1712  Prologs.push_back(peelKernel(LPD_Front));
1713  LiveStages[Prologs.back()] = LS;
1714  AvailableStages[Prologs.back()] = LS;
1715  }
1716 
1717  // Create a block that will end up as the new loop exiting block (dominated by
1718  // all prologs and epilogs). It will only contain PHIs, in the same order as
1719  // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1720  // that the exiting block is a (sub) clone of BB. This in turn gives us the
1721  // property that any value deffed in BB but used outside of BB is used by a
1722  // PHI in the exiting block.
1724  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1725  // Push out the epilogs, again in reverse order.
1726  // We can't assume anything about the minumum loop trip count at this point,
1727  // so emit a fairly complex epilog.
1728 
1729  // We first peel number of stages minus one epilogue. Then we remove dead
1730  // stages and reorder instructions based on their stage. If we have 3 stages
1731  // we generate first:
1732  // E0[3, 2, 1]
1733  // E1[3', 2']
1734  // E2[3'']
1735  // And then we move instructions based on their stages to have:
1736  // E0[3]
1737  // E1[2, 3']
1738  // E2[1, 2', 3'']
1739  // The transformation is legal because we only move instructions past
1740  // instructions of a previous loop iteration.
1741  for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1742  Epilogs.push_back(peelKernel(LPD_Back));
1743  MachineBasicBlock *B = Epilogs.back();
1745  // Keep track at which iteration each phi belongs to. We need it to know
1746  // what version of the variable to use during prologue/epilogue stitching.
1747  EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1748  for (MachineInstr &Phi : B->phis())
1750  }
1751  for (size_t I = 0; I < Epilogs.size(); I++) {
1752  LS.reset();
1753  for (size_t J = I; J < Epilogs.size(); J++) {
1754  int Iteration = J;
1755  unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1756  // Move stage one block at a time so that Phi nodes are updated correctly.
1757  for (size_t K = Iteration; K > I; K--)
1758  moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1759  LS[Stage] = true;
1760  }
1761  LiveStages[Epilogs[I]] = LS;
1762  AvailableStages[Epilogs[I]] = AS;
1763  }
1764 
1765  // Now we've defined all the prolog and epilog blocks as a fallthrough
1766  // sequence, add the edges that will be followed if the loop trip count is
1767  // lower than the number of stages (connecting prologs directly with epilogs).
1768  auto PI = Prologs.begin();
1769  auto EI = Epilogs.begin();
1770  assert(Prologs.size() == Epilogs.size());
1771  for (; PI != Prologs.end(); ++PI, ++EI) {
1772  MachineBasicBlock *Pred = *(*EI)->pred_begin();
1773  (*PI)->addSuccessor(*EI);
1774  for (MachineInstr &MI : (*EI)->phis()) {
1775  Register Reg = MI.getOperand(1).getReg();
1777  if (Use && Use->getParent() == Pred) {
1778  MachineInstr *CanonicalUse = CanonicalMIs[Use];
1779  if (CanonicalUse->isPHI()) {
1780  // If the use comes from a phi we need to skip as many phi as the
1781  // distance between the epilogue and the kernel. Trace through the phi
1782  // chain to find the right value.
1783  Reg = getPhiCanonicalReg(CanonicalUse, Use);
1784  }
1786  }
1787  MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1788  MI.addOperand(MachineOperand::CreateMBB(*PI));
1789  }
1790  }
1791 
1792  // Create a list of all blocks in order.
1794  llvm::copy(PeeledFront, std::back_inserter(Blocks));
1795  Blocks.push_back(BB);
1796  llvm::copy(PeeledBack, std::back_inserter(Blocks));
1797 
1798  // Iterate in reverse order over all instructions, remapping as we go.
1799  for (MachineBasicBlock *B : reverse(Blocks)) {
1800  for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
1801  I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1802  MachineInstr *MI = &*I++;
1803  rewriteUsesOf(MI);
1804  }
1805  }
1806  for (auto *MI : IllegalPhisToDelete) {
1807  if (LIS)
1809  MI->eraseFromParent();
1810  }
1812 
1813  // Now all remapping has been done, we're free to optimize the generated code.
1814  for (MachineBasicBlock *B : reverse(Blocks))
1815  EliminateDeadPhis(B, MRI, LIS);
1816  EliminateDeadPhis(ExitingBB, MRI, LIS);
1817 }
1818 
1820  MachineFunction &MF = *BB->getParent();
1821  MachineBasicBlock *Exit = *BB->succ_begin();
1822  if (Exit == BB)
1823  Exit = *std::next(BB->succ_begin());
1824 
1826  MF.insert(std::next(BB->getIterator()), NewBB);
1827 
1828  // Clone all phis in BB into NewBB and rewrite.
1829  for (MachineInstr &MI : BB->phis()) {
1830  auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1831  Register OldR = MI.getOperand(3).getReg();
1834  for (MachineInstr &Use : MRI.use_instructions(OldR))
1835  if (Use.getParent() != BB)
1836  Uses.push_back(&Use);
1837  for (MachineInstr *Use : Uses)
1838  Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1840  MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1841  .addReg(OldR)
1842  .addMBB(BB);
1843  BlockMIs[{NewBB, &MI}] = NI;
1844  CanonicalMIs[NI] = &MI;
1845  }
1846  BB->replaceSuccessor(Exit, NewBB);
1847  Exit->replacePhiUsesWith(BB, NewBB);
1848  NewBB->addSuccessor(Exit);
1849 
1850  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1852  bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1853  (void)CanAnalyzeBr;
1854  assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1855  TII->removeBranch(*BB);
1856  TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1857  Cond, DebugLoc());
1858  TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1859  return NewBB;
1860 }
1861 
1862 Register
1864  MachineBasicBlock *BB) {
1866  unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1867  return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1868 }
1869 
1871  if (MI->isPHI()) {
1872  // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1873  // and it is produced by this block.
1874  Register PhiR = MI->getOperand(0).getReg();
1875  Register R = MI->getOperand(3).getReg();
1876  int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1877  if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1878  R = MI->getOperand(1).getReg();
1879  MRI.setRegClass(R, MRI.getRegClass(PhiR));
1880  MRI.replaceRegWith(PhiR, R);
1881  // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1882  // later to figure out how to remap registers.
1883  MI->getOperand(0).setReg(PhiR);
1884  IllegalPhisToDelete.push_back(MI);
1885  return;
1886  }
1887 
1888  int Stage = getStage(MI);
1889  if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1890  LiveStages[MI->getParent()].test(Stage))
1891  // Instruction is live, no rewriting to do.
1892  return;
1893 
1894  for (MachineOperand &DefMO : MI->defs()) {
1896  for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1897  // Only PHIs can use values from this block by construction.
1898  // Match with the equivalent PHI in B.
1899  assert(UseMI.isPHI());
1900  Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1901  MI->getParent());
1902  Subs.emplace_back(&UseMI, Reg);
1903  }
1904  for (auto &Sub : Subs)
1905  Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1907  }
1908  if (LIS)
1910  MI->eraseFromParent();
1911 }
1912 
1914  // Work outwards from the kernel.
1915  bool KernelDisposed = false;
1916  int TC = Schedule.getNumStages() - 1;
1917  for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1918  ++PI, ++EI, --TC) {
1919  MachineBasicBlock *Prolog = *PI;
1920  MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1921  MachineBasicBlock *Epilog = *EI;
1923  TII->removeBranch(*Prolog);
1924  Optional<bool> StaticallyGreater =
1925  LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1926  if (!StaticallyGreater.hasValue()) {
1927  LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1928  // Dynamically branch based on Cond.
1929  TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1930  } else if (*StaticallyGreater == false) {
1931  LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1932  // Prolog never falls through; branch to epilog and orphan interior
1933  // blocks. Leave it to unreachable-block-elim to clean up.
1934  Prolog->removeSuccessor(Fallthrough);
1935  for (MachineInstr &P : Fallthrough->phis()) {
1936  P.removeOperand(2);
1937  P.removeOperand(1);
1938  }
1940  KernelDisposed = true;
1941  } else {
1942  LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1943  // Prolog always falls through; remove incoming values in epilog.
1944  Prolog->removeSuccessor(Epilog);
1945  for (MachineInstr &P : Epilog->phis()) {
1946  P.removeOperand(4);
1947  P.removeOperand(3);
1948  }
1949  }
1950  }
1951 
1952  if (!KernelDisposed) {
1953  LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1954  LoopInfo->setPreheader(Prologs.back());
1955  } else {
1956  LoopInfo->disposed();
1957  }
1958 }
1959 
1961  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
1962  KR.rewrite();
1963 }
1964 
1966  BB = Schedule.getLoop()->getTopBlock();
1970  assert(LoopInfo);
1971 
1972  rewriteKernel();
1974  fixupBranches();
1975 }
1976 
1978  BB = Schedule.getLoop()->getTopBlock();
1980 
1981  // Dump the schedule before we invalidate and remap all its instructions.
1982  // Stash it in a string so we can print it if we found an error.
1983  std::string ScheduleDump;
1984  raw_string_ostream OS(ScheduleDump);
1985  Schedule.print(OS);
1986  OS.flush();
1987 
1988  // First, run the normal ModuleScheduleExpander. We don't support any
1989  // InstrChanges.
1990  assert(LIS && "Requires LiveIntervals!");
1993  MSE.expand();
1994  MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
1995  if (!ExpandedKernel) {
1996  // The expander optimized away the kernel. We can't do any useful checking.
1997  MSE.cleanup();
1998  return;
1999  }
2000  // Before running the KernelRewriter, re-add BB into the CFG.
2002 
2003  // Now run the new expansion algorithm.
2004  KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2005  KR.rewrite();
2007 
2008  // Collect all illegal phis that the new algorithm created. We'll give these
2009  // to KernelOperandInfo.
2010  SmallPtrSet<MachineInstr *, 4> IllegalPhis;
2011  for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2012  if (NI->isPHI())
2013  IllegalPhis.insert(&*NI);
2014  }
2015 
2016  // Co-iterate across both kernels. We expect them to be identical apart from
2017  // phis and full COPYs (we look through both).
2019  auto OI = ExpandedKernel->begin();
2020  auto NI = BB->begin();
2021  for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2022  while (OI->isPHI() || OI->isFullCopy())
2023  ++OI;
2024  while (NI->isPHI() || NI->isFullCopy())
2025  ++NI;
2026  assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2027  // Analyze every operand separately.
2028  for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2029  OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2030  KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2031  KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2032  }
2033 
2034  bool Failed = false;
2035  for (auto &OldAndNew : KOIs) {
2036  if (OldAndNew.first == OldAndNew.second)
2037  continue;
2038  Failed = true;
2039  errs() << "Modulo kernel validation error: [\n";
2040  errs() << " [golden] ";
2041  OldAndNew.first.print(errs());
2042  errs() << " ";
2043  OldAndNew.second.print(errs());
2044  errs() << "]\n";
2045  }
2046 
2047  if (Failed) {
2048  errs() << "Golden reference kernel:\n";
2049  ExpandedKernel->print(errs());
2050  errs() << "New kernel:\n";
2051  BB->print(errs());
2052  errs() << ScheduleDump;
2054  "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2055  }
2056 
2057  // Cleanup by removing BB from the CFG again as the original
2058  // ModuloScheduleExpander intended.
2060  MSE.cleanup();
2061 }
2062 
2063 //===----------------------------------------------------------------------===//
2064 // ModuloScheduleTestPass implementation
2065 //===----------------------------------------------------------------------===//
2066 // This pass constructs a ModuloSchedule from its module and runs
2067 // ModuloScheduleExpander.
2068 //
2069 // The module is expected to contain a single-block analyzable loop.
2070 // The total order of instructions is taken from the loop as-is.
2071 // Instructions are expected to be annotated with a PostInstrSymbol.
2072 // This PostInstrSymbol must have the following format:
2073 // "Stage=%d Cycle=%d".
2074 //===----------------------------------------------------------------------===//
2075 
2076 namespace {
2077 class ModuloScheduleTest : public MachineFunctionPass {
2078 public:
2079  static char ID;
2080 
2081  ModuloScheduleTest() : MachineFunctionPass(ID) {
2083  }
2084 
2085  bool runOnMachineFunction(MachineFunction &MF) override;
2086  void runOnLoop(MachineFunction &MF, MachineLoop &L);
2087 
2088  void getAnalysisUsage(AnalysisUsage &AU) const override {
2090  AU.addRequired<LiveIntervals>();
2092  }
2093 };
2094 } // namespace
2095 
2096 char ModuloScheduleTest::ID = 0;
2097 
2098 INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2099  "Modulo Schedule test pass", false, false)
2102 INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2103  "Modulo Schedule test pass", false, false)
2104 
2105 bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2106  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2107  for (auto *L : MLI) {
2108  if (L->getTopBlock() != L->getBottomBlock())
2109  continue;
2110  runOnLoop(MF, *L);
2111  return false;
2112  }
2113  return false;
2114 }
2115 
2116 static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2117  std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2118  std::pair<StringRef, StringRef> StageTokenAndValue =
2119  getToken(StageAndCycle.first, "-");
2120  std::pair<StringRef, StringRef> CycleTokenAndValue =
2121  getToken(StageAndCycle.second, "-");
2122  if (StageTokenAndValue.first != "Stage" ||
2123  CycleTokenAndValue.first != "_Cycle") {
2125  "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2126  return;
2127  }
2128 
2129  StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2130  CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2131 
2132  dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2133 }
2134 
2135 void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2136  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2138  dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2139 
2141  std::vector<MachineInstr *> Instrs;
2142  for (MachineInstr &MI : *BB) {
2143  if (MI.isTerminator())
2144  continue;
2145  Instrs.push_back(&MI);
2146  if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2147  dbgs() << "Parsing post-instr symbol for " << MI;
2148  parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2149  }
2150  }
2151 
2152  ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2153  std::move(Stage));
2155  MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2156  MSE.expand();
2157  MSE.cleanup();
2158 }
2159 
2160 //===----------------------------------------------------------------------===//
2161 // ModuloScheduleTestAnnotater implementation
2162 //===----------------------------------------------------------------------===//
2163 
2165  for (MachineInstr *MI : S.getInstructions()) {
2167  raw_svector_ostream OS(SV);
2168  OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2169  MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2170  MI->setPostInstrSymbol(MF, Sym);
2171  }
2172 }
i
i
Definition: README.txt:29
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1070
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:325
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineOperand::CreateReg
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
Definition: MachineOperand.h:800
llvm::PeelingModuloScheduleExpander::rewriteKernel
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
Definition: ModuloSchedule.cpp:1960
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:205
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:176
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:79
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:592
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::MachineBasicBlock::clear
void clear()
Definition: MachineBasicBlock.h:964
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:140
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:456
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineRegisterInfo::getTargetRegisterInfo
const TargetRegisterInfo * getTargetRegisterInfo() const
Definition: MachineRegisterInfo.h:151
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:277
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::LPD_Front
@ LPD_Front
Peel the first iteration of the loop.
Definition: MachineLoopUtils.h:18
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1397
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:126
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:477
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:841
llvm::copy
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1658
llvm::PeelingModuloScheduleExpander::Epilogs
SmallVector< MachineBasicBlock *, 4 > Epilogs
Definition: ModuloSchedule.h:306
llvm::PeelingModuloScheduleExpander::getStage
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
Definition: ModuloSchedule.h:358
llvm::AMDGPU::Exp::Target
Target
Definition: SIDefines.h:851
llvm::Optional< bool >
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:257
llvm::PeelingModuloScheduleExpander::TII
const TargetInstrInfo * TII
Definition: ModuloSchedule.h:298
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:297
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67
llvm::MachineLoop::getBottomBlock
MachineBasicBlock * getBottomBlock()
Return the "bottom" block in the loop, which is the last block in the linear layout,...
Definition: MachineLoopInfo.cpp:76
llvm::PeelingModuloScheduleExpander::PeeledFront
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
Definition: ModuloSchedule.h:324
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::PeelingModuloScheduleExpander::moveStageBetweenBlocks
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Definition: ModuloSchedule.cpp:1600
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:291
llvm::TargetInstrInfo::insertBranch
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
Definition: TargetInstrInfo.h:709
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:283
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:747
llvm::ModuloSchedule::dump
void dump()
Definition: ModuloSchedule.h:154
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:865
llvm::Register::isPhysical
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
llvm::PeelingModuloScheduleExpander::AvailableStages
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
Definition: ModuloSchedule.h:312
llvm::MachineBasicBlock::getFirstInstrTerminator
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Definition: MachineBasicBlock.cpp:247
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:834
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:204
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:379
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
llvm::TargetInstrInfo::removeBranch
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
Definition: TargetInstrInfo.h:691
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::Failed
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
llvm::PeelingModuloScheduleExpander::Schedule
ModuloSchedule & Schedule
Definition: ModuloSchedule.h:294
llvm::PeelingModuloScheduleExpander::PeeledBack
std::deque< MachineBasicBlock * > PeeledBack
Definition: ModuloSchedule.h:324
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineInstr::setMemRefs
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
Definition: MachineInstr.cpp:344
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:343
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::PeelingModuloScheduleExpander::PhiNodeLoopIteration
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
Definition: ModuloSchedule.h:315
MCContext.h
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::MachineBasicBlock::eraseFromParent
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Definition: MachineBasicBlock.cpp:1328
llvm::PeelingModuloScheduleExpander::BlockMIs
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Definition: ModuloSchedule.h:321
llvm::PeelingModuloScheduleExpander::BB
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
Definition: ModuloSchedule.h:302
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::PeelingModuloScheduleExpander::Prologs
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
Definition: ModuloSchedule.h:306
pass
modulo schedule Modulo Schedule test pass
Definition: ModuloSchedule.cpp:2103
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::PeelingModuloScheduleExpander::filterInstructions
void filterInstructions(MachineBasicBlock *MB, int MinStage)
Definition: ModuloSchedule.cpp:1571
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:187
llvm::ModuloSchedule::getInstructions
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
Definition: ModuloSchedule.h:152
llvm::PeelSingleBlockLoop
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
Definition: MachineLoopUtils.cpp:26
llvm::BitVector
Definition: BitVector.h:75
CombinerObjective::Default
@ Default
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition: MachineOperand.h:237
llvm::PeelingModuloScheduleExpander::peelKernel
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
Definition: ModuloSchedule.cpp:1555
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
parseSymbolString
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
Definition: ModuloSchedule.cpp:2116
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:94
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:769
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1369
llvm::PeelingModuloScheduleExpander::Preheader
MachineBasicBlock * Preheader
The original loop preheader.
Definition: ModuloSchedule.h:304
llvm::PeelingModuloScheduleExpander::fixupBranches
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
Definition: ModuloSchedule.cpp:1913
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:315
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::ModuloScheduleExpander::getRewrittenKernel
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Definition: ModuloSchedule.h:275
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::PeelingModuloScheduleExpander::rewriteUsesOf
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
Definition: ModuloSchedule.cpp:1870
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:409
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:267
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1699
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2164
LiveIntervals.h
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:825
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:385
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:485
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:716
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:265
llvm::MachineOperand::isDead
bool isDead() const
Definition: MachineOperand.h:384
I
#define I(x, y, z)
Definition: MD5.cpp:58
StringExtras.h
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:196
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:441
replaceRegUsesAfterLoop
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
Definition: ModuloSchedule.cpp:331
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:279
llvm::TargetInstrInfo::insertUnconditionalBranch
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
Definition: TargetInstrInfo.h:717
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:341
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::operator==
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2002
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1257
llvm::PeelingModuloScheduleExpander::CreateLCSSAExitingBlock
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
Definition: ModuloSchedule.cpp:1819
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::LoopPeelDirection
LoopPeelDirection
Definition: MachineLoopUtils.h:17
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:261
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:263
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::MachineInstr::isRegTiedToUseOperand
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
Definition: MachineInstr.h:1523
llvm::PeelingModuloScheduleExpander::getEquivalentRegisterIn
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
Definition: ModuloSchedule.cpp:1863
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::MachineBasicBlock::phis
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
Definition: MachineBasicBlock.h:305
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:178
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineInstr::isInlineAsm
bool isInlineAsm() const
Definition: MachineInstr.h:1263
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:687
hasUseAfterLoop
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
Definition: ModuloSchedule.cpp:345
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1965
llvm::MachineRegisterInfo::use_begin
use_iterator use_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:472
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: ModuloSchedule.cpp:36
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:868
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::LiveIntervals::createEmptyInterval
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
Definition: LiveIntervals.h:131
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: ModuloSchedule.cpp:60
llvm::MachineBasicBlock::transferSuccessors
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
Definition: MachineBasicBlock.cpp:866
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:255
llvm::LPD_Back
@ LPD_Back
Peel the last iteration of the loop.
Definition: MachineLoopUtils.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
j
return j(j<< 16)
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:808
llvm::PeelingModuloScheduleExpander::LIS
LiveIntervals * LIS
Definition: ModuloSchedule.h:299
llvm::MachineBasicBlock::replacePhiUsesWith
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
Definition: MachineBasicBlock.cpp:1356
llvm::PeelingModuloScheduleExpander::MRI
MachineRegisterInfo & MRI
Definition: ModuloSchedule.h:297
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:785
Epilog
@ Epilog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:125
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1308
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:638
llvm::AArch64CC::LS
@ LS
Definition: AArch64BaseInfo.h:264
llvm::ModuloSchedule::getCycle
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
Definition: ModuloSchedule.h:140
llvm::ModuloSchedule::getNumStages
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
Definition: ModuloSchedule.h:123
llvm::PeelingModuloScheduleExpander::IllegalPhisToDelete
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
Definition: ModuloSchedule.h:326
llvm::ModuloSchedule::getLoop
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
Definition: ModuloSchedule.h:119
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::MachineInstr::memoperands_empty
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:722
llvm::PeelingModuloScheduleExpander::peelPrologAndEpilogs
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
Definition: ModuloSchedule.cpp:1702
llvm::PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Definition: ModuloSchedule.cpp:1977
getInitPhiReg
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
Definition: ModuloSchedule.cpp:52
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
llvm::MachineRegisterInfo::use_end
static use_iterator use_end()
Definition: MachineRegisterInfo.h:475
llvm::numbers::phi
constexpr double phi
Definition: MathExtras.h:71
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::iterator
DenseMapIterator< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > > iterator
Definition: DenseMap.h:71
llvm::ModuloSchedule::print
void print(raw_ostream &OS)
Definition: ModuloSchedule.cpp:25
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:277
MachineInstrBuilder.h
llvm::PeelingModuloScheduleExpander::LiveStages
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
Definition: ModuloSchedule.h:308
used
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is used
Definition: README-SSE.txt:270
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:698
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:662
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:83
llvm::MachineInstr::memoperands
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:692
llvm::initializeModuloScheduleTestPass
void initializeModuloScheduleTestPass(PassRegistry &)
llvm::PeelingModuloScheduleExpander::MF
MachineFunction & MF
Definition: ModuloSchedule.h:295
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:160
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test", "Modulo Schedule test pass", false, false) INITIALIZE_PASS_END(ModuloScheduleTest
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:125
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:329
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
llvm::ModuloSchedule::getStage
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
Definition: ModuloSchedule.h:134
llvm::MachineInstr::tieOperands
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
Definition: MachineInstr.cpp:1052
test
modulo schedule test
Definition: ModuloSchedule.cpp:2102
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:620
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:488
llvm::PeelingModuloScheduleExpander::getPhiCanonicalReg
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
Definition: ModuloSchedule.cpp:1685
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:279
llvm::MachineRegisterInfo::setRegClass
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
Definition: MachineRegisterInfo.cpp:56
removePhis
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Definition: ModuloSchedule.cpp:814
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1225
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::PeelingModuloScheduleExpander::CanonicalMIs
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
Definition: ModuloSchedule.h:319