LLVM  10.0.0svn
ModuloSchedule.h
Go to the documentation of this file.
1 //===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Software pipelining (SWP) is an instruction scheduling technique for loops
10 // that overlaps loop iterations and exploits ILP via compiler transformations.
11 //
12 // There are multiple methods for analyzing a loop and creating a schedule.
13 // An example algorithm is Swing Modulo Scheduling (implemented by the
14 // MachinePipeliner). The details of how a schedule is arrived at are irrelevant
15 // for the task of actually rewriting a loop to adhere to the schedule, which
16 // is what this file does.
17 //
18 // A schedule is, for every instruction in a block, a Cycle and a Stage. Note
19 // that we only support single-block loops, so "block" and "loop" can be used
20 // interchangably.
21 //
22 // The Cycle of an instruction defines a partial order of the instructions in
23 // the remapped loop. Instructions within a cycle must not consume the output
24 // of any instruction in the same cycle. Cycle information is assumed to have
25 // been calculated such that the processor will execute instructions in
26 // lock-step (for example in a VLIW ISA).
27 //
28 // The Stage of an instruction defines the mapping between logical loop
29 // iterations and pipelined loop iterations. An example (unrolled) pipeline
30 // may look something like:
31 //
32 // I0[0] Execute instruction I0 of iteration 0
33 // I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
34 // I1[1], I0[2]
35 // I1[2], I0[3]
36 //
37 // In the schedule for this unrolled sequence we would say that I0 was scheduled
38 // in stage 0 and I1 in stage 1:
39 //
40 // loop:
41 // [stage 0] x = I0
42 // [stage 1] I1 x (from stage 0)
43 //
44 // And to actually generate valid code we must insert a phi:
45 //
46 // loop:
47 // x' = phi(x)
48 // x = I0
49 // I1 x'
50 //
51 // This is a simple example; the rules for how to generate correct code given
52 // an arbitrary schedule containing loop-carried values are complex.
53 //
54 // Note that these examples only mention the steady-state kernel of the
55 // generated loop; prologs and epilogs must be generated also that prime and
56 // flush the pipeline. Doing so is nontrivial.
57 //
58 //===----------------------------------------------------------------------===//
59 
60 #ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
61 #define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
62 
68 #include <deque>
69 #include <vector>
70 
71 namespace llvm {
72 class MachineBasicBlock;
73 class MachineInstr;
74 class LiveIntervals;
75 
76 /// Represents a schedule for a single-block loop. For every instruction we
77 /// maintain a Cycle and Stage.
79 private:
80  /// The block containing the loop instructions.
82 
83  /// The instructions to be generated, in total order. Cycle provides a partial
84  /// order; the total order within cycles has been decided by the schedule
85  /// producer.
86  std::vector<MachineInstr *> ScheduledInstrs;
87 
88  /// The cycle for each instruction.
90 
91  /// The stage for each instruction.
93 
94  /// The number of stages in this schedule (Max(Stage) + 1).
95  int NumStages;
96 
97 public:
98  /// Create a new ModuloSchedule.
99  /// \arg ScheduledInstrs The new loop instructions, in total resequenced
100  /// order.
101  /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
102  /// not need to start at zero. ScheduledInstrs must be partially ordered by
103  /// Cycle.
104  /// \arg Stage Stage index for all instructions in ScheduleInstrs.
106  std::vector<MachineInstr *> ScheduledInstrs,
109  : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
110  Stage(std::move(Stage)) {
111  NumStages = 0;
112  for (auto &KV : this->Stage)
113  NumStages = std::max(NumStages, KV.second);
114  ++NumStages;
115  }
116 
117  /// Return the single-block loop being scheduled.
118  MachineLoop *getLoop() const { return Loop; }
119 
120  /// Return the number of stages contained in this schedule, which is the
121  /// largest stage index + 1.
122  int getNumStages() const { return NumStages; }
123 
124  /// Return the first cycle in the schedule, which is the cycle index of the
125  /// first instruction.
126  int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
127 
128  /// Return the final cycle in the schedule, which is the cycle index of the
129  /// last instruction.
130  int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
131 
132  /// Return the stage that MI is scheduled in, or -1.
134  auto I = Stage.find(MI);
135  return I == Stage.end() ? -1 : I->second;
136  }
137 
138  /// Return the cycle that MI is scheduled at, or -1.
140  auto I = Cycle.find(MI);
141  return I == Cycle.end() ? -1 : I->second;
142  }
143 
144  /// Return the rescheduled instructions in order.
145  ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
146 
147  void dump() { print(dbgs()); }
148  void print(raw_ostream &OS);
149 };
150 
151 /// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
152 /// rewriting the old loop and inserting prologs and epilogs as required.
154 public:
156 
157 private:
161 
162  ModuloSchedule &Schedule;
163  MachineFunction &MF;
164  const TargetSubtargetInfo &ST;
166  const TargetInstrInfo *TII;
167  LiveIntervals &LIS;
168 
169  MachineBasicBlock *BB;
170  MachineBasicBlock *Preheader;
171  MachineBasicBlock *NewKernel = nullptr;
172  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
173 
174  /// Map for each register and the max difference between its uses and def.
175  /// The first element in the pair is the max difference in stages. The
176  /// second is true if the register defines a Phi value and loop value is
177  /// scheduled before the Phi.
178  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
179 
180  /// Instructions to change when emitting the final schedule.
181  InstrChangesTy InstrChanges;
182 
183  void generatePipelinedLoop();
184  void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
185  ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
186  void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
187  ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
188  MBBVectorTy &PrologBBs);
189  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
190  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
191  ValueMapTy *VRMap, InstrMapTy &InstrMap,
192  unsigned LastStageNum, unsigned CurStageNum,
193  bool IsLast);
194  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
195  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
196  ValueMapTy *VRMap, InstrMapTy &InstrMap,
197  unsigned LastStageNum, unsigned CurStageNum, bool IsLast);
198  void removeDeadInstructions(MachineBasicBlock *KernelBB,
199  MBBVectorTy &EpilogBBs);
200  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
201  void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
202  MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
203  ValueMapTy *VRMap);
204  bool computeDelta(MachineInstr &MI, unsigned &Delta);
205  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
206  unsigned Num);
207  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
208  unsigned InstStageNum);
209  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
210  unsigned InstStageNum);
211  void updateInstruction(MachineInstr *NewMI, bool LastDef,
212  unsigned CurStageNum, unsigned InstrStageNum,
213  ValueMapTy *VRMap);
214  MachineInstr *findDefInLoop(unsigned Reg);
215  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
216  unsigned LoopStage, ValueMapTy *VRMap,
217  MachineBasicBlock *BB);
218  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
219  ValueMapTy *VRMap, InstrMapTy &InstrMap);
220  void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
221  unsigned CurStageNum, unsigned PhiNum,
222  MachineInstr *Phi, unsigned OldReg,
223  unsigned NewReg, unsigned PrevReg = 0);
224  bool isLoopCarried(MachineInstr &Phi);
225 
226  /// Return the max. number of stages/iterations that can occur between a
227  /// register definition and its uses.
228  unsigned getStagesForReg(int Reg, unsigned CurStage) {
229  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
230  if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
231  Stages.second)
232  return 1;
233  return Stages.first;
234  }
235 
236  /// The number of stages for a Phi is a little different than other
237  /// instructions. The minimum value computed in RegToStageDiff is 1
238  /// because we assume the Phi is needed for at least 1 iteration.
239  /// This is not the case if the loop value is scheduled prior to the
240  /// Phi in the same stage. This function returns the number of stages
241  /// or iterations needed between the Phi definition and any uses.
242  unsigned getStagesForPhi(int Reg) {
243  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
244  if (Stages.second)
245  return Stages.first;
246  return Stages.first - 1;
247  }
248 
249 public:
250  /// Create a new ModuloScheduleExpander.
251  /// \arg InstrChanges Modifications to make to instructions with memory
252  /// operands.
253  /// FIXME: InstrChanges is opaque and is an implementation detail of an
254  /// optimization in MachinePipeliner that crosses abstraction boundaries.
256  LiveIntervals &LIS, InstrChangesTy InstrChanges)
257  : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
258  TII(ST.getInstrInfo()), LIS(LIS),
259  InstrChanges(std::move(InstrChanges)) {}
260 
261  /// Performs the actual expansion.
262  void expand();
263  /// Performs final cleanup after expansion.
264  void cleanup();
265 
266  /// Returns the newly rewritten kernel block, or nullptr if this was
267  /// optimized away.
268  MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
269 };
270 
271 /// A reimplementation of ModuloScheduleExpander. It works by generating a
272 /// standalone kernel loop and peeling out the prologs and epilogs.
274  ModuloSchedule &Schedule;
275  MachineFunction &MF;
276  const TargetSubtargetInfo &ST;
278  const TargetInstrInfo *TII;
279  LiveIntervals *LIS;
280 
281  /// The original loop block that gets rewritten in-place.
282  MachineBasicBlock *BB;
283  /// The original loop preheader.
284  MachineBasicBlock *Preheader;
285  /// All prolog and epilog blocks.
286  SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
287  /// For every block, the stages that are produced.
289  /// For every block, the stages that are available. A stage can be available
290  /// but not produced (in the epilog) or produced but not available (in the
291  /// prolog).
293 
294  /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
295  /// loop kernel clones.
298  BlockMIs;
299 
300  /// State passed from peelKernel to peelPrologAndEpilogs().
301  std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
302 
303 public:
305  LiveIntervals *LIS)
306  : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
307  TII(ST.getInstrInfo()), LIS(LIS) {}
308 
309  void expand();
310 
311  /// Runs ModuloScheduleExpander and treats it as a golden input to validate
312  /// aspects of the code generated by PeelingModuloScheduleExpander.
313  void validateAgainstModuloScheduleExpander();
314 
315 protected:
316  /// Converts BB from the original loop body to the rewritten, pipelined
317  /// steady-state.
318  void rewriteKernel();
319 
320 private:
321  /// Peels one iteration of the rewritten kernel (BB) in the specified
322  /// direction.
323  MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
324  /// Peel the kernel forwards and backwards to produce prologs and epilogs,
325  /// and stitch them together.
326  void peelPrologAndEpilogs();
327  /// All prolog and epilog blocks are clones of the kernel, so any produced
328  /// register in one block has an corollary in all other blocks.
329  Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
330  /// Change all users of MI, if MI is predicated out
331  /// (LiveStages[MI->getParent()] == false).
332  void rewriteUsesOf(MachineInstr *MI);
333  /// Insert branches between prologs, kernel and epilogs.
334  void fixupBranches();
335  /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
336  /// to a block dominated by all prologs and epilogs. This allows us to treat
337  /// the loop exiting block as any other kernel clone.
338  MachineBasicBlock *CreateLCSSAExitingBlock();
339  /// Helper to get the stage of an instruction in the schedule.
340  unsigned getStage(MachineInstr *MI) {
341  if (CanonicalMIs.count(MI))
342  MI = CanonicalMIs[MI];
343  return Schedule.getStage(MI);
344  }
345 };
346 
347 /// Expander that simply annotates each scheduled instruction with a post-instr
348 /// symbol that can be consumed by the ModuloScheduleTest pass.
349 ///
350 /// The post-instr symbol is a way of annotating an instruction that can be
351 /// roundtripped in MIR. The syntax is:
352 /// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
354  MachineFunction &MF;
355  ModuloSchedule &S;
356 
357 public:
359  : MF(MF), S(S) {}
360 
361  /// Performs the annotation.
362  void annotate();
363 };
364 
365 } // end namespace llvm
366 
367 #endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A reimplementation of ModuloScheduleExpander.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Represents a schedule for a single-block loop.
unsigned Reg
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
Definition: BitVector.h:937
const HexagonInstrInfo * TII
int getFirstCycle()
Return the first cycle in the schedule, which is the cycle index of the first instruction.
void print(raw_ostream &OS)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
TargetInstrInfo - Interface to description of machine instruction set.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1...
unsigned const MachineRegisterInfo * MRI
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
static Expected< BitVector > expand(StringRef S, StringRef Original)
Definition: GlobPattern.cpp:27
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, LiveIntervals &LIS, InstrChangesTy InstrChanges)
Create a new ModuloScheduleExpander.
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int getFinalCycle()
Return the final cycle in the schedule, which is the cycle index of the last instruction.
ModuloSchedule(MachineFunction &MF, MachineLoop *Loop, std::vector< MachineInstr *> ScheduledInstrs, DenseMap< MachineInstr *, int > Cycle, DenseMap< MachineInstr *, int > Stage)
Create a new ModuloSchedule.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
Representation of each machine instruction.
Definition: MachineInstr.h:64
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
#define I(x, y, z)
Definition: MD5.cpp:58
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S, LiveIntervals *LIS)
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
IRTranslator LLVM IR MI
Wrapper class representing virtual and physical registers.
Definition: Register.h:19