LLVM 17.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
17#include "llvm/MC/MCContext.h"
18#include "llvm/Support/Debug.h"
21
22#define DEBUG_TYPE "pipeliner"
23using namespace llvm;
24
26 for (MachineInstr *MI : ScheduledInstrs)
27 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
28}
29
30//===----------------------------------------------------------------------===//
31// ModuloScheduleExpander implementation
32//===----------------------------------------------------------------------===//
33
34/// Return the register values for the operands of a Phi instruction.
35/// This function assume the instruction is a Phi.
37 unsigned &InitVal, unsigned &LoopVal) {
38 assert(Phi.isPHI() && "Expecting a Phi.");
39
40 InitVal = 0;
41 LoopVal = 0;
42 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
43 if (Phi.getOperand(i + 1).getMBB() != Loop)
44 InitVal = Phi.getOperand(i).getReg();
45 else
46 LoopVal = Phi.getOperand(i).getReg();
47
48 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
49}
50
51/// Return the Phi register value that comes from the incoming block.
52static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
53 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
54 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
55 return Phi.getOperand(i).getReg();
56 return 0;
57}
58
59/// Return the Phi register value that comes the loop block.
60static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
61 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
62 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
63 return Phi.getOperand(i).getReg();
64 return 0;
65}
66
68 BB = Schedule.getLoop()->getTopBlock();
69 Preheader = *BB->pred_begin();
70 if (Preheader == BB)
71 Preheader = *std::next(BB->pred_begin());
72
73 // Iterate over the definitions in each instruction, and compute the
74 // stage difference for each use. Keep the maximum value.
75 for (MachineInstr *MI : Schedule.getInstructions()) {
76 int DefStage = Schedule.getStage(MI);
77 for (const MachineOperand &Op : MI->all_defs()) {
78 Register Reg = Op.getReg();
79 unsigned MaxDiff = 0;
80 bool PhiIsSwapped = false;
81 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
82 MachineInstr *UseMI = UseOp.getParent();
83 int UseStage = Schedule.getStage(UseMI);
84 unsigned Diff = 0;
85 if (UseStage != -1 && UseStage >= DefStage)
86 Diff = UseStage - DefStage;
87 if (MI->isPHI()) {
88 if (isLoopCarried(*MI))
89 ++Diff;
90 else
91 PhiIsSwapped = true;
92 }
93 MaxDiff = std::max(Diff, MaxDiff);
94 }
95 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
96 }
97 }
98
99 generatePipelinedLoop();
100}
101
102void ModuloScheduleExpander::generatePipelinedLoop() {
104 assert(LoopInfo && "Must be able to analyze loop!");
105
106 // Create a new basic block for the kernel and add it to the CFG.
108
109 unsigned MaxStageCount = Schedule.getNumStages() - 1;
110
111 // Remember the registers that are used in different stages. The index is
112 // the iteration, or stage, that the instruction is scheduled in. This is
113 // a map between register names in the original block and the names created
114 // in each stage of the pipelined loop.
115 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
116
117 // The renaming destination by Phis for the registers across stages.
118 // This map is updated during Phis generation to point to the most recent
119 // renaming destination.
120 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
121
122 InstrMapTy InstrMap;
123
125
126 // Generate the prolog instructions that set up the pipeline.
127 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
128 MF.insert(BB->getIterator(), KernelBB);
129
130 // Rearrange the instructions to generate the new, pipelined loop,
131 // and update register names as needed.
132 for (MachineInstr *CI : Schedule.getInstructions()) {
133 if (CI->isPHI())
134 continue;
135 unsigned StageNum = Schedule.getStage(CI);
136 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
137 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
138 KernelBB->push_back(NewMI);
139 InstrMap[NewMI] = CI;
140 }
141
142 // Copy any terminator instructions to the new kernel, and update
143 // names as needed.
144 for (MachineInstr &MI : BB->terminators()) {
145 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
146 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
147 KernelBB->push_back(NewMI);
148 InstrMap[NewMI] = &MI;
149 }
150
151 NewKernel = KernelBB;
152 KernelBB->transferSuccessors(BB);
153 KernelBB->replaceSuccessor(BB, KernelBB);
154
155 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
156 InstrMap, MaxStageCount, MaxStageCount, false);
157 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
158 InstrMap, MaxStageCount, MaxStageCount, false);
159
160 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
161
163 // Generate the epilog instructions to complete the pipeline.
164 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
165 PrologBBs);
166
167 // We need this step because the register allocation doesn't handle some
168 // situations well, so we insert copies to help out.
169 splitLifetimes(KernelBB, EpilogBBs);
170
171 // Remove dead instructions due to loop induction variables.
172 removeDeadInstructions(KernelBB, EpilogBBs);
173
174 // Add branches between prolog and epilog blocks.
175 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
176
177 delete[] VRMap;
178 delete[] VRMapPhi;
179}
180
182 // Remove the original loop since it's no longer referenced.
183 for (auto &I : *BB)
185 BB->clear();
186 BB->eraseFromParent();
187}
188
189/// Generate the pipeline prolog code.
190void ModuloScheduleExpander::generateProlog(unsigned LastStage,
191 MachineBasicBlock *KernelBB,
192 ValueMapTy *VRMap,
193 MBBVectorTy &PrologBBs) {
194 MachineBasicBlock *PredBB = Preheader;
195 InstrMapTy InstrMap;
196
197 // Generate a basic block for each stage, not including the last stage,
198 // which will be generated in the kernel. Each basic block may contain
199 // instructions from multiple stages/iterations.
200 for (unsigned i = 0; i < LastStage; ++i) {
201 // Create and insert the prolog basic block prior to the original loop
202 // basic block. The original loop is removed later.
204 PrologBBs.push_back(NewBB);
205 MF.insert(BB->getIterator(), NewBB);
206 NewBB->transferSuccessors(PredBB);
207 PredBB->addSuccessor(NewBB);
208 PredBB = NewBB;
209
210 // Generate instructions for each appropriate stage. Process instructions
211 // in original program order.
212 for (int StageNum = i; StageNum >= 0; --StageNum) {
214 BBE = BB->getFirstTerminator();
215 BBI != BBE; ++BBI) {
216 if (Schedule.getStage(&*BBI) == StageNum) {
217 if (BBI->isPHI())
218 continue;
219 MachineInstr *NewMI =
220 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
221 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
222 NewBB->push_back(NewMI);
223 InstrMap[NewMI] = &*BBI;
224 }
225 }
226 }
227 rewritePhiValues(NewBB, i, VRMap, InstrMap);
228 LLVM_DEBUG({
229 dbgs() << "prolog:\n";
230 NewBB->dump();
231 });
232 }
233
234 PredBB->replaceSuccessor(BB, KernelBB);
235
236 // Check if we need to remove the branch from the preheader to the original
237 // loop, and replace it with a branch to the new loop.
238 unsigned numBranches = TII->removeBranch(*Preheader);
239 if (numBranches) {
241 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
242 }
243}
244
245/// Generate the pipeline epilog code. The epilog code finishes the iterations
246/// that were started in either the prolog or the kernel. We create a basic
247/// block for each stage that needs to complete.
248void ModuloScheduleExpander::generateEpilog(
249 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
250 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
251 MBBVectorTy &PrologBBs) {
252 // We need to change the branch from the kernel to the first epilog block, so
253 // this call to analyze branch uses the kernel rather than the original BB.
254 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
256 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
257 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
258 if (checkBranch)
259 return;
260
261 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
262 if (*LoopExitI == KernelBB)
263 ++LoopExitI;
264 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
265 MachineBasicBlock *LoopExitBB = *LoopExitI;
266
267 MachineBasicBlock *PredBB = KernelBB;
268 MachineBasicBlock *EpilogStart = LoopExitBB;
269 InstrMapTy InstrMap;
270
271 // Generate a basic block for each stage, not including the last stage,
272 // which was generated for the kernel. Each basic block may contain
273 // instructions from multiple stages/iterations.
274 int EpilogStage = LastStage + 1;
275 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
277 EpilogBBs.push_back(NewBB);
278 MF.insert(BB->getIterator(), NewBB);
279
280 PredBB->replaceSuccessor(LoopExitBB, NewBB);
281 NewBB->addSuccessor(LoopExitBB);
282
283 if (EpilogStart == LoopExitBB)
284 EpilogStart = NewBB;
285
286 // Add instructions to the epilog depending on the current block.
287 // Process instructions in original program order.
288 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
289 for (auto &BBI : *BB) {
290 if (BBI.isPHI())
291 continue;
292 MachineInstr *In = &BBI;
293 if ((unsigned)Schedule.getStage(In) == StageNum) {
294 // Instructions with memoperands in the epilog are updated with
295 // conservative values.
296 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
297 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
298 NewBB->push_back(NewMI);
299 InstrMap[NewMI] = In;
300 }
301 }
302 }
303 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
304 InstrMap, LastStage, EpilogStage, i == 1);
305 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
306 InstrMap, LastStage, EpilogStage, i == 1);
307 PredBB = NewBB;
308
309 LLVM_DEBUG({
310 dbgs() << "epilog:\n";
311 NewBB->dump();
312 });
313 }
314
315 // Fix any Phi nodes in the loop exit block.
316 LoopExitBB->replacePhiUsesWith(BB, PredBB);
317
318 // Create a branch to the new epilog from the kernel.
319 // Remove the original branch and add a new branch to the epilog.
320 TII->removeBranch(*KernelBB);
321 assert((OrigBB == TBB || OrigBB == FBB) &&
322 "Unable to determine looping branch direction");
323 if (OrigBB != TBB)
324 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
325 else
326 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
327 // Add a branch to the loop exit.
328 if (EpilogBBs.size() > 0) {
329 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
331 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
332 }
333}
334
335/// Replace all uses of FromReg that appear outside the specified
336/// basic block with ToReg.
337static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
340 LiveIntervals &LIS) {
341 for (MachineOperand &O :
342 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
343 if (O.getParent()->getParent() != MBB)
344 O.setReg(ToReg);
345 if (!LIS.hasInterval(ToReg))
346 LIS.createEmptyInterval(ToReg);
347}
348
349/// Return true if the register has a use that occurs outside the
350/// specified loop.
351static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
353 for (const MachineOperand &MO : MRI.use_operands(Reg))
354 if (MO.getParent()->getParent() != BB)
355 return true;
356 return false;
357}
358
359/// Generate Phis for the specific block in the generated pipelined code.
360/// This function looks at the Phis from the original code to guide the
361/// creation of new Phis.
362void ModuloScheduleExpander::generateExistingPhis(
364 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
365 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
366 // Compute the stage number for the initial value of the Phi, which
367 // comes from the prolog. The prolog to use depends on to which kernel/
368 // epilog that we're adding the Phi.
369 unsigned PrologStage = 0;
370 unsigned PrevStage = 0;
371 bool InKernel = (LastStageNum == CurStageNum);
372 if (InKernel) {
373 PrologStage = LastStageNum - 1;
374 PrevStage = CurStageNum;
375 } else {
376 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
377 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
378 }
379
380 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
381 BBE = BB->getFirstNonPHI();
382 BBI != BBE; ++BBI) {
383 Register Def = BBI->getOperand(0).getReg();
384
385 unsigned InitVal = 0;
386 unsigned LoopVal = 0;
387 getPhiRegs(*BBI, BB, InitVal, LoopVal);
388
389 unsigned PhiOp1 = 0;
390 // The Phi value from the loop body typically is defined in the loop, but
391 // not always. So, we need to check if the value is defined in the loop.
392 unsigned PhiOp2 = LoopVal;
393 if (VRMap[LastStageNum].count(LoopVal))
394 PhiOp2 = VRMap[LastStageNum][LoopVal];
395
396 int StageScheduled = Schedule.getStage(&*BBI);
397 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
398 unsigned NumStages = getStagesForReg(Def, CurStageNum);
399 if (NumStages == 0) {
400 // We don't need to generate a Phi anymore, but we need to rename any uses
401 // of the Phi value.
402 unsigned NewReg = VRMap[PrevStage][LoopVal];
403 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
404 InitVal, NewReg);
405 if (VRMap[CurStageNum].count(LoopVal))
406 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
407 }
408 // Adjust the number of Phis needed depending on the number of prologs left,
409 // and the distance from where the Phi is first scheduled. The number of
410 // Phis cannot exceed the number of prolog stages. Each stage can
411 // potentially define two values.
412 unsigned MaxPhis = PrologStage + 2;
413 if (!InKernel && (int)PrologStage <= LoopValStage)
414 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
415 unsigned NumPhis = std::min(NumStages, MaxPhis);
416
417 unsigned NewReg = 0;
418 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
419 // In the epilog, we may need to look back one stage to get the correct
420 // Phi name, because the epilog and prolog blocks execute the same stage.
421 // The correct name is from the previous block only when the Phi has
422 // been completely scheduled prior to the epilog, and Phi value is not
423 // needed in multiple stages.
424 int StageDiff = 0;
425 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
426 NumPhis == 1)
427 StageDiff = 1;
428 // Adjust the computations below when the phi and the loop definition
429 // are scheduled in different stages.
430 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
431 StageDiff = StageScheduled - LoopValStage;
432 for (unsigned np = 0; np < NumPhis; ++np) {
433 // If the Phi hasn't been scheduled, then use the initial Phi operand
434 // value. Otherwise, use the scheduled version of the instruction. This
435 // is a little complicated when a Phi references another Phi.
436 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
437 PhiOp1 = InitVal;
438 // Check if the Phi has already been scheduled in a prolog stage.
439 else if (PrologStage >= AccessStage + StageDiff + np &&
440 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
441 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
442 // Check if the Phi has already been scheduled, but the loop instruction
443 // is either another Phi, or doesn't occur in the loop.
444 else if (PrologStage >= AccessStage + StageDiff + np) {
445 // If the Phi references another Phi, we need to examine the other
446 // Phi to get the correct value.
447 PhiOp1 = LoopVal;
448 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
449 int Indirects = 1;
450 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
451 int PhiStage = Schedule.getStage(InstOp1);
452 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
453 PhiOp1 = getInitPhiReg(*InstOp1, BB);
454 else
455 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
456 InstOp1 = MRI.getVRegDef(PhiOp1);
457 int PhiOpStage = Schedule.getStage(InstOp1);
458 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
459 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
460 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
461 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
462 break;
463 }
464 ++Indirects;
465 }
466 } else
467 PhiOp1 = InitVal;
468 // If this references a generated Phi in the kernel, get the Phi operand
469 // from the incoming block.
470 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
471 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
472 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
473
474 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
475 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
476 // In the epilog, a map lookup is needed to get the value from the kernel,
477 // or previous epilog block. How is does this depends on if the
478 // instruction is scheduled in the previous block.
479 if (!InKernel) {
480 int StageDiffAdj = 0;
481 if (LoopValStage != -1 && StageScheduled > LoopValStage)
482 StageDiffAdj = StageScheduled - LoopValStage;
483 // Use the loop value defined in the kernel, unless the kernel
484 // contains the last definition of the Phi.
485 if (np == 0 && PrevStage == LastStageNum &&
486 (StageScheduled != 0 || LoopValStage != 0) &&
487 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
488 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
489 // Use the value defined by the Phi. We add one because we switch
490 // from looking at the loop value to the Phi definition.
491 else if (np > 0 && PrevStage == LastStageNum &&
492 VRMap[PrevStage - np + 1].count(Def))
493 PhiOp2 = VRMap[PrevStage - np + 1][Def];
494 // Use the loop value defined in the kernel.
495 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
496 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
497 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
498 // Use the value defined by the Phi, unless we're generating the first
499 // epilog and the Phi refers to a Phi in a different stage.
500 else if (VRMap[PrevStage - np].count(Def) &&
501 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
502 (LoopValStage == StageScheduled)))
503 PhiOp2 = VRMap[PrevStage - np][Def];
504 }
505
506 // Check if we can reuse an existing Phi. This occurs when a Phi
507 // references another Phi, and the other Phi is scheduled in an
508 // earlier stage. We can try to reuse an existing Phi up until the last
509 // stage of the current Phi.
510 if (LoopDefIsPhi) {
511 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
512 int LVNumStages = getStagesForPhi(LoopVal);
513 int StageDiff = (StageScheduled - LoopValStage);
514 LVNumStages -= StageDiff;
515 // Make sure the loop value Phi has been processed already.
516 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
517 NewReg = PhiOp2;
518 unsigned ReuseStage = CurStageNum;
519 if (isLoopCarried(*PhiInst))
520 ReuseStage -= LVNumStages;
521 // Check if the Phi to reuse has been generated yet. If not, then
522 // there is nothing to reuse.
523 if (VRMap[ReuseStage - np].count(LoopVal)) {
524 NewReg = VRMap[ReuseStage - np][LoopVal];
525
526 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
527 Def, NewReg);
528 // Update the map with the new Phi name.
529 VRMap[CurStageNum - np][Def] = NewReg;
530 PhiOp2 = NewReg;
531 if (VRMap[LastStageNum - np - 1].count(LoopVal))
532 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
533
534 if (IsLast && np == NumPhis - 1)
535 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
536 continue;
537 }
538 }
539 }
540 if (InKernel && StageDiff > 0 &&
541 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
542 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
543 }
544
545 const TargetRegisterClass *RC = MRI.getRegClass(Def);
546 NewReg = MRI.createVirtualRegister(RC);
547
548 MachineInstrBuilder NewPhi =
549 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
550 TII->get(TargetOpcode::PHI), NewReg);
551 NewPhi.addReg(PhiOp1).addMBB(BB1);
552 NewPhi.addReg(PhiOp2).addMBB(BB2);
553 if (np == 0)
554 InstrMap[NewPhi] = &*BBI;
555
556 // We define the Phis after creating the new pipelined code, so
557 // we need to rename the Phi values in scheduled instructions.
558
559 unsigned PrevReg = 0;
560 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
561 PrevReg = VRMap[PrevStage - np][LoopVal];
562 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
563 NewReg, PrevReg);
564 // If the Phi has been scheduled, use the new name for rewriting.
565 if (VRMap[CurStageNum - np].count(Def)) {
566 unsigned R = VRMap[CurStageNum - np][Def];
567 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
568 NewReg);
569 }
570
571 // Check if we need to rename any uses that occurs after the loop. The
572 // register to replace depends on whether the Phi is scheduled in the
573 // epilog.
574 if (IsLast && np == NumPhis - 1)
575 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
576
577 // In the kernel, a dependent Phi uses the value from this Phi.
578 if (InKernel)
579 PhiOp2 = NewReg;
580
581 // Update the map with the new Phi name.
582 VRMap[CurStageNum - np][Def] = NewReg;
583 }
584
585 while (NumPhis++ < NumStages) {
586 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
587 NewReg, 0);
588 }
589
590 // Check if we need to rename a Phi that has been eliminated due to
591 // scheduling.
592 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
593 replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
594 }
595}
596
597/// Generate Phis for the specified block in the generated pipelined code.
598/// These are new Phis needed because the definition is scheduled after the
599/// use in the pipelined sequence.
600void ModuloScheduleExpander::generatePhis(
602 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
603 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
604 bool IsLast) {
605 // Compute the stage number that contains the initial Phi value, and
606 // the Phi from the previous stage.
607 unsigned PrologStage = 0;
608 unsigned PrevStage = 0;
609 unsigned StageDiff = CurStageNum - LastStageNum;
610 bool InKernel = (StageDiff == 0);
611 if (InKernel) {
612 PrologStage = LastStageNum - 1;
613 PrevStage = CurStageNum;
614 } else {
615 PrologStage = LastStageNum - StageDiff;
616 PrevStage = LastStageNum + StageDiff - 1;
617 }
618
619 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
620 BBE = BB->instr_end();
621 BBI != BBE; ++BBI) {
622 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
623 MachineOperand &MO = BBI->getOperand(i);
624 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
625 continue;
626
627 int StageScheduled = Schedule.getStage(&*BBI);
628 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
629 Register Def = MO.getReg();
630 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
631 // An instruction scheduled in stage 0 and is used after the loop
632 // requires a phi in the epilog for the last definition from either
633 // the kernel or prolog.
634 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
635 hasUseAfterLoop(Def, BB, MRI))
636 NumPhis = 1;
637 if (!InKernel && (unsigned)StageScheduled > PrologStage)
638 continue;
639
640 unsigned PhiOp2;
641 if (InKernel) {
642 PhiOp2 = VRMap[PrevStage][Def];
643 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
644 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
645 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
646 }
647 // The number of Phis can't exceed the number of prolog stages. The
648 // prolog stage number is zero based.
649 if (NumPhis > PrologStage + 1 - StageScheduled)
650 NumPhis = PrologStage + 1 - StageScheduled;
651 for (unsigned np = 0; np < NumPhis; ++np) {
652 // Example for
653 // Org:
654 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
655 //
656 // Prolog0 (Stage0):
657 // %Clone0 = ...
658 // Prolog1 (Stage1):
659 // %Clone1 = ...
660 // Kernel (Stage2):
661 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
662 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
663 // %Clone2 = ...
664 // Epilog0 (Stage3):
665 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
666 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
667 // Epilog1 (Stage4):
668 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
669 //
670 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
671 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
672 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
673
674 unsigned PhiOp1 = VRMap[PrologStage][Def];
675 if (np <= PrologStage)
676 PhiOp1 = VRMap[PrologStage - np][Def];
677 if (!InKernel) {
678 if (PrevStage == LastStageNum && np == 0)
679 PhiOp2 = VRMap[LastStageNum][Def];
680 else
681 PhiOp2 = VRMapPhi[PrevStage - np][Def];
682 }
683
684 const TargetRegisterClass *RC = MRI.getRegClass(Def);
685 Register NewReg = MRI.createVirtualRegister(RC);
686
687 MachineInstrBuilder NewPhi =
688 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
689 TII->get(TargetOpcode::PHI), NewReg);
690 NewPhi.addReg(PhiOp1).addMBB(BB1);
691 NewPhi.addReg(PhiOp2).addMBB(BB2);
692 if (np == 0)
693 InstrMap[NewPhi] = &*BBI;
694
695 // Rewrite uses and update the map. The actions depend upon whether
696 // we generating code for the kernel or epilog blocks.
697 if (InKernel) {
698 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
699 NewReg);
700 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
701 NewReg);
702
703 PhiOp2 = NewReg;
704 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
705 } else {
706 VRMapPhi[CurStageNum - np][Def] = NewReg;
707 if (np == NumPhis - 1)
708 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
709 NewReg);
710 }
711 if (IsLast && np == NumPhis - 1)
712 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
713 }
714 }
715 }
716}
717
718/// Remove instructions that generate values with no uses.
719/// Typically, these are induction variable operations that generate values
720/// used in the loop itself. A dead instruction has a definition with
721/// no uses, or uses that occur in the original loop only.
722void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
723 MBBVectorTy &EpilogBBs) {
724 // For each epilog block, check that the value defined by each instruction
725 // is used. If not, delete it.
726 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
728 ME = MBB->instr_rend();
729 MI != ME;) {
730 // From DeadMachineInstructionElem. Don't delete inline assembly.
731 if (MI->isInlineAsm()) {
732 ++MI;
733 continue;
734 }
735 bool SawStore = false;
736 // Check if it's safe to remove the instruction due to side effects.
737 // We can, and want to, remove Phis here.
738 if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
739 ++MI;
740 continue;
741 }
742 bool used = true;
743 for (const MachineOperand &MO : MI->all_defs()) {
744 Register reg = MO.getReg();
745 // Assume physical registers are used, unless they are marked dead.
746 if (reg.isPhysical()) {
747 used = !MO.isDead();
748 if (used)
749 break;
750 continue;
751 }
752 unsigned realUses = 0;
753 for (const MachineOperand &U : MRI.use_operands(reg)) {
754 // Check if there are any uses that occur only in the original
755 // loop. If so, that's not a real use.
756 if (U.getParent()->getParent() != BB) {
757 realUses++;
758 used = true;
759 break;
760 }
761 }
762 if (realUses > 0)
763 break;
764 used = false;
765 }
766 if (!used) {
768 MI++->eraseFromParent();
769 continue;
770 }
771 ++MI;
772 }
773 // In the kernel block, check if we can remove a Phi that generates a value
774 // used in an instruction removed in the epilog block.
775 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
776 Register reg = MI.getOperand(0).getReg();
777 if (MRI.use_begin(reg) == MRI.use_end()) {
779 MI.eraseFromParent();
780 }
781 }
782}
783
784/// For loop carried definitions, we split the lifetime of a virtual register
785/// that has uses past the definition in the next iteration. A copy with a new
786/// virtual register is inserted before the definition, which helps with
787/// generating a better register assignment.
788///
789/// v1 = phi(a, v2) v1 = phi(a, v2)
790/// v2 = phi(b, v3) v2 = phi(b, v3)
791/// v3 = .. v4 = copy v1
792/// .. = V1 v3 = ..
793/// .. = v4
794void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
795 MBBVectorTy &EpilogBBs) {
797 for (auto &PHI : KernelBB->phis()) {
798 Register Def = PHI.getOperand(0).getReg();
799 // Check for any Phi definition that used as an operand of another Phi
800 // in the same block.
802 E = MRI.use_instr_end();
803 I != E; ++I) {
804 if (I->isPHI() && I->getParent() == KernelBB) {
805 // Get the loop carried definition.
806 unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
807 if (!LCDef)
808 continue;
809 MachineInstr *MI = MRI.getVRegDef(LCDef);
810 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
811 continue;
812 // Search through the rest of the block looking for uses of the Phi
813 // definition. If one occurs, then split the lifetime.
814 unsigned SplitReg = 0;
816 KernelBB->instr_end()))
817 if (BBJ.readsRegister(Def)) {
818 // We split the lifetime when we find the first use.
819 if (SplitReg == 0) {
820 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
821 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
822 TII->get(TargetOpcode::COPY), SplitReg)
823 .addReg(Def);
824 }
825 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
826 }
827 if (!SplitReg)
828 continue;
829 // Search through each of the epilog blocks for any uses to be renamed.
830 for (auto &Epilog : EpilogBBs)
831 for (auto &I : *Epilog)
832 if (I.readsRegister(Def))
833 I.substituteRegister(Def, SplitReg, 0, *TRI);
834 break;
835 }
836 }
837 }
838}
839
840/// Remove the incoming block from the Phis in a basic block.
841static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
842 for (MachineInstr &MI : *BB) {
843 if (!MI.isPHI())
844 break;
845 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
846 if (MI.getOperand(i + 1).getMBB() == Incoming) {
847 MI.removeOperand(i + 1);
848 MI.removeOperand(i);
849 break;
850 }
851 }
852}
853
854/// Create branches from each prolog basic block to the appropriate epilog
855/// block. These edges are needed if the loop ends before reaching the
856/// kernel.
857void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
858 MBBVectorTy &PrologBBs,
859 MachineBasicBlock *KernelBB,
860 MBBVectorTy &EpilogBBs,
861 ValueMapTy *VRMap) {
862 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
863 MachineBasicBlock *LastPro = KernelBB;
864 MachineBasicBlock *LastEpi = KernelBB;
865
866 // Start from the blocks connected to the kernel and work "out"
867 // to the first prolog and the last epilog blocks.
869 unsigned MaxIter = PrologBBs.size() - 1;
870 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
871 // Add branches to the prolog that go to the corresponding
872 // epilog, and the fall-thru prolog/kernel block.
873 MachineBasicBlock *Prolog = PrologBBs[j];
874 MachineBasicBlock *Epilog = EpilogBBs[i];
875
877 std::optional<bool> StaticallyGreater =
878 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
879 unsigned numAdded = 0;
880 if (!StaticallyGreater) {
881 Prolog->addSuccessor(Epilog);
882 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
883 } else if (*StaticallyGreater == false) {
884 Prolog->addSuccessor(Epilog);
885 Prolog->removeSuccessor(LastPro);
886 LastEpi->removeSuccessor(Epilog);
887 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
888 removePhis(Epilog, LastEpi);
889 // Remove the blocks that are no longer referenced.
890 if (LastPro != LastEpi) {
891 LastEpi->clear();
892 LastEpi->eraseFromParent();
893 }
894 if (LastPro == KernelBB) {
895 LoopInfo->disposed();
896 NewKernel = nullptr;
897 }
898 LastPro->clear();
899 LastPro->eraseFromParent();
900 } else {
901 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
903 }
904 LastPro = Prolog;
905 LastEpi = Epilog;
907 E = Prolog->instr_rend();
908 I != E && numAdded > 0; ++I, --numAdded)
909 updateInstruction(&*I, false, j, 0, VRMap);
910 }
911
912 if (NewKernel) {
913 LoopInfo->setPreheader(PrologBBs[MaxIter]);
914 LoopInfo->adjustTripCount(-(MaxIter + 1));
915 }
916}
917
918/// Return true if we can compute the amount the instruction changes
919/// during each iteration. Set Delta to the amount of the change.
920bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
922 const MachineOperand *BaseOp;
923 int64_t Offset;
924 bool OffsetIsScalable;
925 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
926 return false;
927
928 // FIXME: This algorithm assumes instructions have fixed-size offsets.
929 if (OffsetIsScalable)
930 return false;
931
932 if (!BaseOp->isReg())
933 return false;
934
935 Register BaseReg = BaseOp->getReg();
936
938 // Check if there is a Phi. If so, get the definition in the loop.
939 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
940 if (BaseDef && BaseDef->isPHI()) {
941 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
942 BaseDef = MRI.getVRegDef(BaseReg);
943 }
944 if (!BaseDef)
945 return false;
946
947 int D = 0;
948 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
949 return false;
950
951 Delta = D;
952 return true;
953}
954
955/// Update the memory operand with a new offset when the pipeliner
956/// generates a new copy of the instruction that refers to a
957/// different memory location.
958void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
959 MachineInstr &OldMI,
960 unsigned Num) {
961 if (Num == 0)
962 return;
963 // If the instruction has memory operands, then adjust the offset
964 // when the instruction appears in different stages.
965 if (NewMI.memoperands_empty())
966 return;
968 for (MachineMemOperand *MMO : NewMI.memoperands()) {
969 // TODO: Figure out whether isAtomic is really necessary (see D57601).
970 if (MMO->isVolatile() || MMO->isAtomic() ||
971 (MMO->isInvariant() && MMO->isDereferenceable()) ||
972 (!MMO->getValue())) {
973 NewMMOs.push_back(MMO);
974 continue;
975 }
976 unsigned Delta;
977 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
978 int64_t AdjOffset = Delta * Num;
979 NewMMOs.push_back(
980 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
981 } else {
982 NewMMOs.push_back(
984 }
985 }
986 NewMI.setMemRefs(MF, NewMMOs);
987}
988
989/// Clone the instruction for the new pipelined loop and update the
990/// memory operands, if needed.
991MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
992 unsigned CurStageNum,
993 unsigned InstStageNum) {
994 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
995 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
996 return NewMI;
997}
998
999/// Clone the instruction for the new pipelined loop. If needed, this
1000/// function updates the instruction using the values saved in the
1001/// InstrChanges structure.
1002MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1003 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1004 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1005 auto It = InstrChanges.find(OldMI);
1006 if (It != InstrChanges.end()) {
1007 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1008 unsigned BasePos, OffsetPos;
1009 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1010 return nullptr;
1011 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1012 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1013 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1014 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1015 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1016 }
1017 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1018 return NewMI;
1019}
1020
1021/// Update the machine instruction with new virtual registers. This
1022/// function may change the definitions and/or uses.
1023void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1024 bool LastDef,
1025 unsigned CurStageNum,
1026 unsigned InstrStageNum,
1027 ValueMapTy *VRMap) {
1028 for (MachineOperand &MO : NewMI->operands()) {
1029 if (!MO.isReg() || !MO.getReg().isVirtual())
1030 continue;
1031 Register reg = MO.getReg();
1032 if (MO.isDef()) {
1033 // Create a new virtual register for the definition.
1034 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1035 Register NewReg = MRI.createVirtualRegister(RC);
1036 MO.setReg(NewReg);
1037 VRMap[CurStageNum][reg] = NewReg;
1038 if (LastDef)
1039 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1040 } else if (MO.isUse()) {
1041 MachineInstr *Def = MRI.getVRegDef(reg);
1042 // Compute the stage that contains the last definition for instruction.
1043 int DefStageNum = Schedule.getStage(Def);
1044 unsigned StageNum = CurStageNum;
1045 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1046 // Compute the difference in stages between the defintion and the use.
1047 unsigned StageDiff = (InstrStageNum - DefStageNum);
1048 // Make an adjustment to get the last definition.
1049 StageNum -= StageDiff;
1050 }
1051 if (VRMap[StageNum].count(reg))
1052 MO.setReg(VRMap[StageNum][reg]);
1053 }
1054 }
1055}
1056
1057/// Return the instruction in the loop that defines the register.
1058/// If the definition is a Phi, then follow the Phi operand to
1059/// the instruction in the loop.
1060MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
1062 MachineInstr *Def = MRI.getVRegDef(Reg);
1063 while (Def->isPHI()) {
1064 if (!Visited.insert(Def).second)
1065 break;
1066 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1067 if (Def->getOperand(i + 1).getMBB() == BB) {
1068 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1069 break;
1070 }
1071 }
1072 return Def;
1073}
1074
1075/// Return the new name for the value from the previous stage.
1076unsigned ModuloScheduleExpander::getPrevMapVal(
1077 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
1078 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1079 unsigned PrevVal = 0;
1080 if (StageNum > PhiStage) {
1081 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1082 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1083 // The name is defined in the previous stage.
1084 PrevVal = VRMap[StageNum - 1][LoopVal];
1085 else if (VRMap[StageNum].count(LoopVal))
1086 // The previous name is defined in the current stage when the instruction
1087 // order is swapped.
1088 PrevVal = VRMap[StageNum][LoopVal];
1089 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1090 // The loop value hasn't yet been scheduled.
1091 PrevVal = LoopVal;
1092 else if (StageNum == PhiStage + 1)
1093 // The loop value is another phi, which has not been scheduled.
1094 PrevVal = getInitPhiReg(*LoopInst, BB);
1095 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1096 // The loop value is another phi, which has been scheduled.
1097 PrevVal =
1098 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1099 LoopStage, VRMap, BB);
1100 }
1101 return PrevVal;
1102}
1103
1104/// Rewrite the Phi values in the specified block to use the mappings
1105/// from the initial operand. Once the Phi is scheduled, we switch
1106/// to using the loop value instead of the Phi value, so those names
1107/// do not need to be rewritten.
1108void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1109 unsigned StageNum,
1110 ValueMapTy *VRMap,
1111 InstrMapTy &InstrMap) {
1112 for (auto &PHI : BB->phis()) {
1113 unsigned InitVal = 0;
1114 unsigned LoopVal = 0;
1115 getPhiRegs(PHI, BB, InitVal, LoopVal);
1116 Register PhiDef = PHI.getOperand(0).getReg();
1117
1118 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1119 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1120 unsigned NumPhis = getStagesForPhi(PhiDef);
1121 if (NumPhis > StageNum)
1122 NumPhis = StageNum;
1123 for (unsigned np = 0; np <= NumPhis; ++np) {
1124 unsigned NewVal =
1125 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1126 if (!NewVal)
1127 NewVal = InitVal;
1128 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1129 NewVal);
1130 }
1131 }
1132}
1133
1134/// Rewrite a previously scheduled instruction to use the register value
1135/// from the new instruction. Make sure the instruction occurs in the
1136/// basic block, and we don't change the uses in the new instruction.
1137void ModuloScheduleExpander::rewriteScheduledInstr(
1138 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1139 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
1140 unsigned PrevReg) {
1141 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1142 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1143 // Rewrite uses that have been scheduled already to use the new
1144 // Phi register.
1145 for (MachineOperand &UseOp :
1146 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1147 MachineInstr *UseMI = UseOp.getParent();
1148 if (UseMI->getParent() != BB)
1149 continue;
1150 if (UseMI->isPHI()) {
1151 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1152 continue;
1153 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1154 continue;
1155 }
1156 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1157 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1158 MachineInstr *OrigMI = OrigInstr->second;
1159 int StageSched = Schedule.getStage(OrigMI);
1160 int CycleSched = Schedule.getCycle(OrigMI);
1161 unsigned ReplaceReg = 0;
1162 // This is the stage for the scheduled instruction.
1163 if (StagePhi == StageSched && Phi->isPHI()) {
1164 int CyclePhi = Schedule.getCycle(Phi);
1165 if (PrevReg && InProlog)
1166 ReplaceReg = PrevReg;
1167 else if (PrevReg && !isLoopCarried(*Phi) &&
1168 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1169 ReplaceReg = PrevReg;
1170 else
1171 ReplaceReg = NewReg;
1172 }
1173 // The scheduled instruction occurs before the scheduled Phi, and the
1174 // Phi is not loop carried.
1175 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1176 ReplaceReg = NewReg;
1177 if (StagePhi > StageSched && Phi->isPHI())
1178 ReplaceReg = NewReg;
1179 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1180 ReplaceReg = NewReg;
1181 if (ReplaceReg) {
1182 const TargetRegisterClass *NRC =
1183 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1184 if (NRC)
1185 UseOp.setReg(ReplaceReg);
1186 else {
1187 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1188 BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1189 SplitReg)
1190 .addReg(ReplaceReg);
1191 UseOp.setReg(SplitReg);
1192 }
1193 }
1194 }
1195}
1196
1197bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1198 if (!Phi.isPHI())
1199 return false;
1200 int DefCycle = Schedule.getCycle(&Phi);
1201 int DefStage = Schedule.getStage(&Phi);
1202
1203 unsigned InitVal = 0;
1204 unsigned LoopVal = 0;
1205 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1206 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1207 if (!Use || Use->isPHI())
1208 return true;
1209 int LoopCycle = Schedule.getCycle(Use);
1210 int LoopStage = Schedule.getStage(Use);
1211 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1212}
1213
1214//===----------------------------------------------------------------------===//
1215// PeelingModuloScheduleExpander implementation
1216//===----------------------------------------------------------------------===//
1217// This is a reimplementation of ModuloScheduleExpander that works by creating
1218// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1219//===----------------------------------------------------------------------===//
1220
1221namespace {
1222// Remove any dead phis in MBB. Dead phis either have only one block as input
1223// (in which case they are the identity) or have no uses.
1224void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1225 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1226 bool Changed = true;
1227 while (Changed) {
1228 Changed = false;
1230 assert(MI.isPHI());
1231 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1232 if (LIS)
1234 MI.eraseFromParent();
1235 Changed = true;
1236 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1237 const TargetRegisterClass *ConstrainRegClass =
1238 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1239 MRI.getRegClass(MI.getOperand(0).getReg()));
1240 assert(ConstrainRegClass &&
1241 "Expected a valid constrained register class!");
1242 (void)ConstrainRegClass;
1243 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1244 MI.getOperand(1).getReg());
1245 if (LIS)
1247 MI.eraseFromParent();
1248 Changed = true;
1249 }
1250 }
1251 }
1252}
1253
1254/// Rewrites the kernel block in-place to adhere to the given schedule.
1255/// KernelRewriter holds all of the state required to perform the rewriting.
1256class KernelRewriter {
1257 ModuloSchedule &S;
1259 MachineBasicBlock *PreheaderBB, *ExitBB;
1261 const TargetInstrInfo *TII;
1262 LiveIntervals *LIS;
1263
1264 // Map from register class to canonical undef register for that class.
1266 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1267 // this map is only used when InitReg is non-undef.
1269 // Map from LoopReg to phi register where the InitReg is undef.
1271
1272 // Reg is used by MI. Return the new register MI should use to adhere to the
1273 // schedule. Insert phis as necessary.
1274 Register remapUse(Register Reg, MachineInstr &MI);
1275 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1276 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1277 // or will be chosen so as to share another phi.
1278 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1279 const TargetRegisterClass *RC = nullptr);
1280 // Create an undef register of the given register class.
1281 Register undef(const TargetRegisterClass *RC);
1282
1283public:
1284 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1285 LiveIntervals *LIS = nullptr);
1286 void rewrite();
1287};
1288} // namespace
1289
1290KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1291 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1292 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1293 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1294 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1295 PreheaderBB = *BB->pred_begin();
1296 if (PreheaderBB == BB)
1297 PreheaderBB = *std::next(BB->pred_begin());
1298}
1299
1300void KernelRewriter::rewrite() {
1301 // Rearrange the loop to be in schedule order. Note that the schedule may
1302 // contain instructions that are not owned by the loop block (InstrChanges and
1303 // friends), so we gracefully handle unowned instructions and delete any
1304 // instructions that weren't in the schedule.
1305 auto InsertPt = BB->getFirstTerminator();
1306 MachineInstr *FirstMI = nullptr;
1307 for (MachineInstr *MI : S.getInstructions()) {
1308 if (MI->isPHI())
1309 continue;
1310 if (MI->getParent())
1311 MI->removeFromParent();
1312 BB->insert(InsertPt, MI);
1313 if (!FirstMI)
1314 FirstMI = MI;
1315 }
1316 assert(FirstMI && "Failed to find first MI in schedule");
1317
1318 // At this point all of the scheduled instructions are between FirstMI
1319 // and the end of the block. Kill from the first non-phi to FirstMI.
1320 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1321 if (LIS)
1323 (I++)->eraseFromParent();
1324 }
1325
1326 // Now remap every instruction in the loop.
1327 for (MachineInstr &MI : *BB) {
1328 if (MI.isPHI() || MI.isTerminator())
1329 continue;
1330 for (MachineOperand &MO : MI.uses()) {
1331 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1332 continue;
1333 Register Reg = remapUse(MO.getReg(), MI);
1334 MO.setReg(Reg);
1335 }
1336 }
1337 EliminateDeadPhis(BB, MRI, LIS);
1338
1339 // Ensure a phi exists for all instructions that are either referenced by
1340 // an illegal phi or by an instruction outside the loop. This allows us to
1341 // treat remaps of these values the same as "normal" values that come from
1342 // loop-carried phis.
1343 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1344 if (MI->isPHI()) {
1345 Register R = MI->getOperand(0).getReg();
1346 phi(R);
1347 continue;
1348 }
1349
1350 for (MachineOperand &Def : MI->defs()) {
1351 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1352 if (MI.getParent() != BB) {
1353 phi(Def.getReg());
1354 break;
1355 }
1356 }
1357 }
1358 }
1359}
1360
1361Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1362 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1363 if (!Producer)
1364 return Reg;
1365
1366 int ConsumerStage = S.getStage(&MI);
1367 if (!Producer->isPHI()) {
1368 // Non-phi producers are simple to remap. Insert as many phis as the
1369 // difference between the consumer and producer stages.
1370 if (Producer->getParent() != BB)
1371 // Producer was not inside the loop. Use the register as-is.
1372 return Reg;
1373 int ProducerStage = S.getStage(Producer);
1374 assert(ConsumerStage != -1 &&
1375 "In-loop consumer should always be scheduled!");
1376 assert(ConsumerStage >= ProducerStage);
1377 unsigned StageDiff = ConsumerStage - ProducerStage;
1378
1379 for (unsigned I = 0; I < StageDiff; ++I)
1380 Reg = phi(Reg);
1381 return Reg;
1382 }
1383
1384 // First, dive through the phi chain to find the defaults for the generated
1385 // phis.
1387 Register LoopReg = Reg;
1388 auto LoopProducer = Producer;
1389 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1390 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1391 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1392 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1393 assert(LoopProducer);
1394 }
1395 int LoopProducerStage = S.getStage(LoopProducer);
1396
1397 std::optional<Register> IllegalPhiDefault;
1398
1399 if (LoopProducerStage == -1) {
1400 // Do nothing.
1401 } else if (LoopProducerStage > ConsumerStage) {
1402 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1403 // In addition, Consumer's cycle must be scheduled after Producer in the
1404 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1405 // functions.
1406#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1407 int LoopProducerCycle = S.getCycle(LoopProducer);
1408 int ConsumerCycle = S.getCycle(&MI);
1409#endif
1410 assert(LoopProducerCycle <= ConsumerCycle);
1411 assert(LoopProducerStage == ConsumerStage + 1);
1412 // Peel off the first phi from Defaults and insert a phi between producer
1413 // and consumer. This phi will not be at the front of the block so we
1414 // consider it illegal. It will only exist during the rewrite process; it
1415 // needs to exist while we peel off prologs because these could take the
1416 // default value. After that we can replace all uses with the loop producer
1417 // value.
1418 IllegalPhiDefault = Defaults.front();
1419 Defaults.erase(Defaults.begin());
1420 } else {
1421 assert(ConsumerStage >= LoopProducerStage);
1422 int StageDiff = ConsumerStage - LoopProducerStage;
1423 if (StageDiff > 0) {
1424 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1425 << " to " << (Defaults.size() + StageDiff) << "\n");
1426 // If we need more phis than we have defaults for, pad out with undefs for
1427 // the earliest phis, which are at the end of the defaults chain (the
1428 // chain is in reverse order).
1429 Defaults.resize(Defaults.size() + StageDiff,
1430 Defaults.empty() ? std::optional<Register>()
1431 : Defaults.back());
1432 }
1433 }
1434
1435 // Now we know the number of stages to jump back, insert the phi chain.
1436 auto DefaultI = Defaults.rbegin();
1437 while (DefaultI != Defaults.rend())
1438 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1439
1440 if (IllegalPhiDefault) {
1441 // The consumer optionally consumes LoopProducer in the same iteration
1442 // (because the producer is scheduled at an earlier cycle than the consumer)
1443 // or the initial value. To facilitate this we create an illegal block here
1444 // by embedding a phi in the middle of the block. We will fix this up
1445 // immediately prior to pruning.
1446 auto RC = MRI.getRegClass(Reg);
1447 Register R = MRI.createVirtualRegister(RC);
1448 MachineInstr *IllegalPhi =
1449 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1450 .addReg(*IllegalPhiDefault)
1451 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1452 .addReg(LoopReg)
1453 .addMBB(BB); // Block choice is arbitrary and has no effect.
1454 // Illegal phi should belong to the producer stage so that it can be
1455 // filtered correctly during peeling.
1456 S.setStage(IllegalPhi, LoopProducerStage);
1457 return R;
1458 }
1459
1460 return LoopReg;
1461}
1462
1463Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1464 const TargetRegisterClass *RC) {
1465 // If the init register is not undef, try and find an existing phi.
1466 if (InitReg) {
1467 auto I = Phis.find({LoopReg, *InitReg});
1468 if (I != Phis.end())
1469 return I->second;
1470 } else {
1471 for (auto &KV : Phis) {
1472 if (KV.first.first == LoopReg)
1473 return KV.second;
1474 }
1475 }
1476
1477 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1478 // find a phi that takes undef as input.
1479 auto I = UndefPhis.find(LoopReg);
1480 if (I != UndefPhis.end()) {
1481 Register R = I->second;
1482 if (!InitReg)
1483 // Found a phi taking undef as input, and this input is undef so return
1484 // without any more changes.
1485 return R;
1486 // Found a phi taking undef as input, so rewrite it to take InitReg.
1487 MachineInstr *MI = MRI.getVRegDef(R);
1488 MI->getOperand(1).setReg(*InitReg);
1489 Phis.insert({{LoopReg, *InitReg}, R});
1490 const TargetRegisterClass *ConstrainRegClass =
1491 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1492 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1493 (void)ConstrainRegClass;
1494 UndefPhis.erase(I);
1495 return R;
1496 }
1497
1498 // Failed to find any existing phi to reuse, so create a new one.
1499 if (!RC)
1500 RC = MRI.getRegClass(LoopReg);
1501 Register R = MRI.createVirtualRegister(RC);
1502 if (InitReg) {
1503 const TargetRegisterClass *ConstrainRegClass =
1504 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1505 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1506 (void)ConstrainRegClass;
1507 }
1508 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1509 .addReg(InitReg ? *InitReg : undef(RC))
1510 .addMBB(PreheaderBB)
1511 .addReg(LoopReg)
1512 .addMBB(BB);
1513 if (!InitReg)
1514 UndefPhis[LoopReg] = R;
1515 else
1516 Phis[{LoopReg, *InitReg}] = R;
1517 return R;
1518}
1519
1520Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1521 Register &R = Undefs[RC];
1522 if (R == 0) {
1523 // Create an IMPLICIT_DEF that defines this register if we need it.
1524 // All uses of this should be removed by the time we have finished unrolling
1525 // prologs and epilogs.
1526 R = MRI.createVirtualRegister(RC);
1527 auto *InsertBB = &PreheaderBB->getParent()->front();
1528 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1529 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1530 }
1531 return R;
1532}
1533
1534namespace {
1535/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1536/// the operand are discovered, such as how many in-loop PHIs it has to jump
1537/// through and defaults for these phis.
1538class KernelOperandInfo {
1541 SmallVector<Register, 4> PhiDefaults;
1544
1545public:
1546 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1547 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1548 : MRI(MRI) {
1549 Source = MO;
1550 BB = MO->getParent()->getParent();
1551 while (isRegInLoop(MO)) {
1552 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1553 if (MI->isFullCopy()) {
1554 MO = &MI->getOperand(1);
1555 continue;
1556 }
1557 if (!MI->isPHI())
1558 break;
1559 // If this is an illegal phi, don't count it in distance.
1560 if (IllegalPhis.count(MI)) {
1561 MO = &MI->getOperand(3);
1562 continue;
1563 }
1564
1566 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1567 : &MI->getOperand(3);
1568 PhiDefaults.push_back(Default);
1569 }
1570 Target = MO;
1571 }
1572
1573 bool operator==(const KernelOperandInfo &Other) const {
1574 return PhiDefaults.size() == Other.PhiDefaults.size();
1575 }
1576
1577 void print(raw_ostream &OS) const {
1578 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1579 << *Source->getParent();
1580 }
1581
1582private:
1583 bool isRegInLoop(MachineOperand *MO) {
1584 return MO->isReg() && MO->getReg().isVirtual() &&
1585 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1586 }
1587};
1588} // namespace
1589
1593 if (LPD == LPD_Front)
1594 PeeledFront.push_back(NewBB);
1595 else
1596 PeeledBack.push_front(NewBB);
1597 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1598 ++I, ++NI) {
1599 CanonicalMIs[&*I] = &*I;
1600 CanonicalMIs[&*NI] = &*I;
1601 BlockMIs[{NewBB, &*I}] = &*NI;
1602 BlockMIs[{BB, &*I}] = &*I;
1603 }
1604 return NewBB;
1605}
1606
1608 int MinStage) {
1609 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1610 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1611 MachineInstr *MI = &*I++;
1612 int Stage = getStage(MI);
1613 if (Stage == -1 || Stage >= MinStage)
1614 continue;
1615
1616 for (MachineOperand &DefMO : MI->defs()) {
1618 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1619 // Only PHIs can use values from this block by construction.
1620 // Match with the equivalent PHI in B.
1621 assert(UseMI.isPHI());
1622 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1623 MI->getParent());
1624 Subs.emplace_back(&UseMI, Reg);
1625 }
1626 for (auto &Sub : Subs)
1627 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1629 }
1630 if (LIS)
1632 MI->eraseFromParent();
1633 }
1634}
1635
1637 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1638 auto InsertPt = DestBB->getFirstNonPHI();
1641 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1642 if (MI.isPHI()) {
1643 // This is an illegal PHI. If we move any instructions using an illegal
1644 // PHI, we need to create a legal Phi.
1645 if (getStage(&MI) != Stage) {
1646 // The legal Phi is not necessary if the illegal phi's stage
1647 // is being moved.
1648 Register PhiR = MI.getOperand(0).getReg();
1649 auto RC = MRI.getRegClass(PhiR);
1651 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1652 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1653 .addReg(PhiR)
1654 .addMBB(SourceBB);
1655 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1657 Remaps[PhiR] = NR;
1658 }
1659 }
1660 if (getStage(&MI) != Stage)
1661 continue;
1662 MI.removeFromParent();
1663 DestBB->insert(InsertPt, &MI);
1664 auto *KernelMI = CanonicalMIs[&MI];
1665 BlockMIs[{DestBB, KernelMI}] = &MI;
1666 BlockMIs.erase({SourceBB, KernelMI});
1667 }
1669 for (MachineInstr &MI : DestBB->phis()) {
1670 assert(MI.getNumOperands() == 3);
1671 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1672 // If the instruction referenced by the phi is moved inside the block
1673 // we don't need the phi anymore.
1674 if (getStage(Def) == Stage) {
1675 Register PhiReg = MI.getOperand(0).getReg();
1676 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg()) != -1);
1677 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1678 MI.getOperand(0).setReg(PhiReg);
1679 PhiToDelete.push_back(&MI);
1680 }
1681 }
1682 for (auto *P : PhiToDelete)
1683 P->eraseFromParent();
1684 InsertPt = DestBB->getFirstNonPHI();
1685 // Helper to clone Phi instructions into the destination block. We clone Phi
1686 // greedily to avoid combinatorial explosion of Phi instructions.
1687 auto clonePhi = [&](MachineInstr *Phi) {
1688 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1689 DestBB->insert(InsertPt, NewMI);
1690 Register OrigR = Phi->getOperand(0).getReg();
1692 NewMI->getOperand(0).setReg(R);
1693 NewMI->getOperand(1).setReg(OrigR);
1694 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1695 Remaps[OrigR] = R;
1696 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1697 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1699 return R;
1700 };
1701 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1702 for (MachineOperand &MO : I->uses()) {
1703 if (!MO.isReg())
1704 continue;
1705 if (Remaps.count(MO.getReg()))
1706 MO.setReg(Remaps[MO.getReg()]);
1707 else {
1708 // If we are using a phi from the source block we need to add a new phi
1709 // pointing to the old one.
1711 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1712 Register R = clonePhi(Use);
1713 MO.setReg(R);
1714 }
1715 }
1716 }
1717 }
1718}
1719
1722 MachineInstr *Phi) {
1723 unsigned distance = PhiNodeLoopIteration[Phi];
1724 MachineInstr *CanonicalUse = CanonicalPhi;
1725 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1726 for (unsigned I = 0; I < distance; ++I) {
1727 assert(CanonicalUse->isPHI());
1728 assert(CanonicalUse->getNumOperands() == 5);
1729 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1730 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1731 std::swap(LoopRegIdx, InitRegIdx);
1732 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1733 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1734 }
1735 return CanonicalUseReg;
1736}
1737
1739 BitVector LS(Schedule.getNumStages(), true);
1740 BitVector AS(Schedule.getNumStages(), true);
1741 LiveStages[BB] = LS;
1742 AvailableStages[BB] = AS;
1743
1744 // Peel out the prologs.
1745 LS.reset();
1746 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1747 LS[I] = true;
1748 Prologs.push_back(peelKernel(LPD_Front));
1749 LiveStages[Prologs.back()] = LS;
1750 AvailableStages[Prologs.back()] = LS;
1751 }
1752
1753 // Create a block that will end up as the new loop exiting block (dominated by
1754 // all prologs and epilogs). It will only contain PHIs, in the same order as
1755 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1756 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1757 // property that any value deffed in BB but used outside of BB is used by a
1758 // PHI in the exiting block.
1760 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1761 // Push out the epilogs, again in reverse order.
1762 // We can't assume anything about the minumum loop trip count at this point,
1763 // so emit a fairly complex epilog.
1764
1765 // We first peel number of stages minus one epilogue. Then we remove dead
1766 // stages and reorder instructions based on their stage. If we have 3 stages
1767 // we generate first:
1768 // E0[3, 2, 1]
1769 // E1[3', 2']
1770 // E2[3'']
1771 // And then we move instructions based on their stages to have:
1772 // E0[3]
1773 // E1[2, 3']
1774 // E2[1, 2', 3'']
1775 // The transformation is legal because we only move instructions past
1776 // instructions of a previous loop iteration.
1777 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1778 Epilogs.push_back(peelKernel(LPD_Back));
1779 MachineBasicBlock *B = Epilogs.back();
1781 // Keep track at which iteration each phi belongs to. We need it to know
1782 // what version of the variable to use during prologue/epilogue stitching.
1783 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1784 for (MachineInstr &Phi : B->phis())
1786 }
1787 for (size_t I = 0; I < Epilogs.size(); I++) {
1788 LS.reset();
1789 for (size_t J = I; J < Epilogs.size(); J++) {
1790 int Iteration = J;
1791 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1792 // Move stage one block at a time so that Phi nodes are updated correctly.
1793 for (size_t K = Iteration; K > I; K--)
1794 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1795 LS[Stage] = true;
1796 }
1797 LiveStages[Epilogs[I]] = LS;
1798 AvailableStages[Epilogs[I]] = AS;
1799 }
1800
1801 // Now we've defined all the prolog and epilog blocks as a fallthrough
1802 // sequence, add the edges that will be followed if the loop trip count is
1803 // lower than the number of stages (connecting prologs directly with epilogs).
1804 auto PI = Prologs.begin();
1805 auto EI = Epilogs.begin();
1806 assert(Prologs.size() == Epilogs.size());
1807 for (; PI != Prologs.end(); ++PI, ++EI) {
1808 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1809 (*PI)->addSuccessor(*EI);
1810 for (MachineInstr &MI : (*EI)->phis()) {
1811 Register Reg = MI.getOperand(1).getReg();
1813 if (Use && Use->getParent() == Pred) {
1814 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1815 if (CanonicalUse->isPHI()) {
1816 // If the use comes from a phi we need to skip as many phi as the
1817 // distance between the epilogue and the kernel. Trace through the phi
1818 // chain to find the right value.
1819 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1820 }
1821 Reg = getEquivalentRegisterIn(Reg, *PI);
1822 }
1823 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1824 MI.addOperand(MachineOperand::CreateMBB(*PI));
1825 }
1826 }
1827
1828 // Create a list of all blocks in order.
1830 llvm::copy(PeeledFront, std::back_inserter(Blocks));
1831 Blocks.push_back(BB);
1832 llvm::copy(PeeledBack, std::back_inserter(Blocks));
1833
1834 // Iterate in reverse order over all instructions, remapping as we go.
1835 for (MachineBasicBlock *B : reverse(Blocks)) {
1836 for (auto I = B->instr_rbegin();
1837 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1839 rewriteUsesOf(&*MI);
1840 }
1841 }
1842 for (auto *MI : IllegalPhisToDelete) {
1843 if (LIS)
1845 MI->eraseFromParent();
1846 }
1848
1849 // Now all remapping has been done, we're free to optimize the generated code.
1851 EliminateDeadPhis(B, MRI, LIS);
1852 EliminateDeadPhis(ExitingBB, MRI, LIS);
1853}
1854
1857 MachineBasicBlock *Exit = *BB->succ_begin();
1858 if (Exit == BB)
1859 Exit = *std::next(BB->succ_begin());
1860
1862 MF.insert(std::next(BB->getIterator()), NewBB);
1863
1864 // Clone all phis in BB into NewBB and rewrite.
1865 for (MachineInstr &MI : BB->phis()) {
1866 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1867 Register OldR = MI.getOperand(3).getReg();
1870 for (MachineInstr &Use : MRI.use_instructions(OldR))
1871 if (Use.getParent() != BB)
1872 Uses.push_back(&Use);
1873 for (MachineInstr *Use : Uses)
1874 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1876 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1877 .addReg(OldR)
1878 .addMBB(BB);
1879 BlockMIs[{NewBB, &MI}] = NI;
1880 CanonicalMIs[NI] = &MI;
1881 }
1882 BB->replaceSuccessor(Exit, NewBB);
1883 Exit->replacePhiUsesWith(BB, NewBB);
1884 NewBB->addSuccessor(Exit);
1885
1886 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1888 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1889 (void)CanAnalyzeBr;
1890 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1891 TII->removeBranch(*BB);
1892 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1893 Cond, DebugLoc());
1894 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1895 return NewBB;
1896}
1897
1900 MachineBasicBlock *BB) {
1902 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
1903 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1904}
1905
1907 if (MI->isPHI()) {
1908 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1909 // and it is produced by this block.
1910 Register PhiR = MI->getOperand(0).getReg();
1911 Register R = MI->getOperand(3).getReg();
1912 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1913 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1914 R = MI->getOperand(1).getReg();
1915 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1916 MRI.replaceRegWith(PhiR, R);
1917 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1918 // later to figure out how to remap registers.
1919 MI->getOperand(0).setReg(PhiR);
1921 return;
1922 }
1923
1924 int Stage = getStage(MI);
1925 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1926 LiveStages[MI->getParent()].test(Stage))
1927 // Instruction is live, no rewriting to do.
1928 return;
1929
1930 for (MachineOperand &DefMO : MI->defs()) {
1932 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1933 // Only PHIs can use values from this block by construction.
1934 // Match with the equivalent PHI in B.
1935 assert(UseMI.isPHI());
1936 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1937 MI->getParent());
1938 Subs.emplace_back(&UseMI, Reg);
1939 }
1940 for (auto &Sub : Subs)
1941 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1943 }
1944 if (LIS)
1946 MI->eraseFromParent();
1947}
1948
1950 // Work outwards from the kernel.
1951 bool KernelDisposed = false;
1952 int TC = Schedule.getNumStages() - 1;
1953 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1954 ++PI, ++EI, --TC) {
1956 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1960 std::optional<bool> StaticallyGreater =
1961 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1962 if (!StaticallyGreater) {
1963 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1964 // Dynamically branch based on Cond.
1965 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1966 } else if (*StaticallyGreater == false) {
1967 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1968 // Prolog never falls through; branch to epilog and orphan interior
1969 // blocks. Leave it to unreachable-block-elim to clean up.
1970 Prolog->removeSuccessor(Fallthrough);
1971 for (MachineInstr &P : Fallthrough->phis()) {
1972 P.removeOperand(2);
1973 P.removeOperand(1);
1974 }
1976 KernelDisposed = true;
1977 } else {
1978 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1979 // Prolog always falls through; remove incoming values in epilog.
1980 Prolog->removeSuccessor(Epilog);
1981 for (MachineInstr &P : Epilog->phis()) {
1982 P.removeOperand(4);
1983 P.removeOperand(3);
1984 }
1985 }
1986 }
1987
1988 if (!KernelDisposed) {
1989 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1990 LoopInfo->setPreheader(Prologs.back());
1991 } else {
1992 LoopInfo->disposed();
1993 }
1994}
1995
1997 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
1998 KR.rewrite();
1999}
2000
2007
2008 rewriteKernel();
2010 fixupBranches();
2011}
2012
2016
2017 // Dump the schedule before we invalidate and remap all its instructions.
2018 // Stash it in a string so we can print it if we found an error.
2019 std::string ScheduleDump;
2020 raw_string_ostream OS(ScheduleDump);
2021 Schedule.print(OS);
2022 OS.flush();
2023
2024 // First, run the normal ModuleScheduleExpander. We don't support any
2025 // InstrChanges.
2026 assert(LIS && "Requires LiveIntervals!");
2029 MSE.expand();
2030 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2031 if (!ExpandedKernel) {
2032 // The expander optimized away the kernel. We can't do any useful checking.
2033 MSE.cleanup();
2034 return;
2035 }
2036 // Before running the KernelRewriter, re-add BB into the CFG.
2038
2039 // Now run the new expansion algorithm.
2040 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2041 KR.rewrite();
2043
2044 // Collect all illegal phis that the new algorithm created. We'll give these
2045 // to KernelOperandInfo.
2047 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2048 if (NI->isPHI())
2049 IllegalPhis.insert(&*NI);
2050 }
2051
2052 // Co-iterate across both kernels. We expect them to be identical apart from
2053 // phis and full COPYs (we look through both).
2055 auto OI = ExpandedKernel->begin();
2056 auto NI = BB->begin();
2057 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2058 while (OI->isPHI() || OI->isFullCopy())
2059 ++OI;
2060 while (NI->isPHI() || NI->isFullCopy())
2061 ++NI;
2062 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2063 // Analyze every operand separately.
2064 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2065 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2066 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2067 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2068 }
2069
2070 bool Failed = false;
2071 for (auto &OldAndNew : KOIs) {
2072 if (OldAndNew.first == OldAndNew.second)
2073 continue;
2074 Failed = true;
2075 errs() << "Modulo kernel validation error: [\n";
2076 errs() << " [golden] ";
2077 OldAndNew.first.print(errs());
2078 errs() << " ";
2079 OldAndNew.second.print(errs());
2080 errs() << "]\n";
2081 }
2082
2083 if (Failed) {
2084 errs() << "Golden reference kernel:\n";
2085 ExpandedKernel->print(errs());
2086 errs() << "New kernel:\n";
2087 BB->print(errs());
2088 errs() << ScheduleDump;
2090 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2091 }
2092
2093 // Cleanup by removing BB from the CFG again as the original
2094 // ModuloScheduleExpander intended.
2096 MSE.cleanup();
2097}
2098
2099//===----------------------------------------------------------------------===//
2100// ModuloScheduleTestPass implementation
2101//===----------------------------------------------------------------------===//
2102// This pass constructs a ModuloSchedule from its module and runs
2103// ModuloScheduleExpander.
2104//
2105// The module is expected to contain a single-block analyzable loop.
2106// The total order of instructions is taken from the loop as-is.
2107// Instructions are expected to be annotated with a PostInstrSymbol.
2108// This PostInstrSymbol must have the following format:
2109// "Stage=%d Cycle=%d".
2110//===----------------------------------------------------------------------===//
2111
2112namespace {
2113class ModuloScheduleTest : public MachineFunctionPass {
2114public:
2115 static char ID;
2116
2117 ModuloScheduleTest() : MachineFunctionPass(ID) {
2119 }
2120
2121 bool runOnMachineFunction(MachineFunction &MF) override;
2122 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2123
2124 void getAnalysisUsage(AnalysisUsage &AU) const override {
2128 }
2129};
2130} // namespace
2131
2132char ModuloScheduleTest::ID = 0;
2133
2134INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2135 "Modulo Schedule test pass", false, false)
2138INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2139 "Modulo Schedule test pass", false, false)
2140
2141bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2142 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
2143 for (auto *L : MLI) {
2144 if (L->getTopBlock() != L->getBottomBlock())
2145 continue;
2146 runOnLoop(MF, *L);
2147 return false;
2148 }
2149 return false;
2150}
2151
2152static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2153 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2154 std::pair<StringRef, StringRef> StageTokenAndValue =
2155 getToken(StageAndCycle.first, "-");
2156 std::pair<StringRef, StringRef> CycleTokenAndValue =
2157 getToken(StageAndCycle.second, "-");
2158 if (StageTokenAndValue.first != "Stage" ||
2159 CycleTokenAndValue.first != "_Cycle") {
2161 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2162 return;
2163 }
2164
2165 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2166 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2167
2168 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2169}
2170
2171void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2172 LiveIntervals &LIS = getAnalysis<LiveIntervals>();
2173 MachineBasicBlock *BB = L.getTopBlock();
2174 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2175
2177 std::vector<MachineInstr *> Instrs;
2178 for (MachineInstr &MI : *BB) {
2179 if (MI.isTerminator())
2180 continue;
2181 Instrs.push_back(&MI);
2182 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2183 dbgs() << "Parsing post-instr symbol for " << MI;
2184 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2185 }
2186 }
2187
2188 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2189 std::move(Stage));
2191 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2192 MSE.expand();
2193 MSE.cleanup();
2194}
2195
2196//===----------------------------------------------------------------------===//
2197// ModuloScheduleTestAnnotater implementation
2198//===----------------------------------------------------------------------===//
2199
2201 for (MachineInstr *MI : S.getInstructions()) {
2204 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2206 MI->setPostInstrSymbol(MF, Sym);
2207 }
2208}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
@ Default
Definition: DwarfDebug.cpp:86
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:491
Symbol * Sym
Definition: ELF_riscv.cpp:463
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
unsigned Reg
This file provides utility analysis objects describing memory locations.
modulo schedule test
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
modulo schedule Modulo Schedule test pass
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static bool rewrite(Function &F)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file contains some functions that are useful when dealing with strings.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
A possibly irreducible generalization of a Loop.
bool hasInterval(Register Reg) const
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:320
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:526
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:775
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:648
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:745
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:452
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:533
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
constexpr double phi
Definition: MathExtras.h:45
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:2011
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1921
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860