LLVM 23.0.0git
PPCCTRLoops.cpp
Go to the documentation of this file.
1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16// its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21// a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
43#include "llvm/Pass.h"
45#include <cassert>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "ppc-ctrloops"
50
51STATISTIC(NumCTRLoops, "Number of CTR loops generated");
52STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
53
54namespace {
55class PPCCTRLoops : public MachineFunctionPass {
56public:
57 static char ID;
58
59 PPCCTRLoops() : MachineFunctionPass(ID) {}
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
62 AU.addRequired<MachineLoopInfoWrapperPass>();
64 }
65
66 bool runOnMachineFunction(MachineFunction &MF) override;
67
68private:
69 const PPCInstrInfo *TII = nullptr;
70 MachineRegisterInfo *MRI = nullptr;
71
72 bool processLoop(MachineLoop *ML);
73 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
74 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
75 MachineInstr *Dec);
76 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
77};
78} // namespace
79
80char PPCCTRLoops::ID = 0;
81
82INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
83 false, false)
85INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
87
88FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
89
90bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
91 bool Changed = false;
92
93 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
94 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
95 MRI = &MF.getRegInfo();
96
97 for (auto *ML : MLI) {
98 if (ML->isOutermost())
99 Changed |= processLoop(ML);
100 }
101
102#ifndef NDEBUG
103 for (const MachineBasicBlock &BB : MF) {
104 for (const MachineInstr &I : BB)
105 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
106 I.getOpcode() != PPC::DecreaseCTR8loop) &&
107 "CTR loop pseudo is not expanded!");
108 }
109#endif
110
111 return Changed;
112}
113
114bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
115 if (!CheckReads) {
116 // If we are only checking for defs, that is we are going to find
117 // definitions before MTCTRloop, for this case:
118 // CTR defination inside the callee of a call instruction will not impact
119 // the defination of MTCTRloop, so we can use definesRegister() for the
120 // check, no need to check the regmask.
121 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
122 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
123 }
124
125 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
126 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
127 return true;
128
129 if (MI->getDesc().isCall())
130 return true;
131
132 // We define the CTR in the loop preheader, so if there is any CTR reader in
133 // the loop, we also can not use CTR loop form.
134 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
135 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
136 return true;
137
138 return false;
139}
140
141bool PPCCTRLoops::processLoop(MachineLoop *ML) {
142 bool Changed = false;
143
144 // Align with HardwareLoop pass, process inner loops first.
145 for (MachineLoop *I : *ML)
146 Changed |= processLoop(I);
147
148 // If any inner loop is changed, outter loop must be without hardware loop
149 // intrinsics.
150 if (Changed)
151 return true;
152
153 auto IsLoopStart = [](MachineInstr &MI) {
154 return MI.getOpcode() == PPC::MTCTRloop ||
155 MI.getOpcode() == PPC::MTCTR8loop;
156 };
157
158 auto SearchForStart =
159 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
160 for (auto &MI : *MBB) {
161 if (IsLoopStart(MI))
162 return &MI;
163 }
164 return nullptr;
165 };
166
167 MachineInstr *Start = nullptr;
168 MachineInstr *Dec = nullptr;
169 bool InvalidCTRLoop = false;
170
171 MachineBasicBlock *Preheader = ML->getLoopPreheader();
172 // If there is no preheader for this loop, there must be no MTCTRloop
173 // either.
174 if (!Preheader)
175 return false;
176
177 Start = SearchForStart(Preheader);
178 // This is not a CTR loop candidate.
179 if (!Start)
180 return false;
181
182 // If CTR is live to the preheader, we can not redefine the CTR register.
183 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
184 InvalidCTRLoop = true;
185
186 // Make sure there is also no CTR clobber in the block preheader between the
187 // begin and MTCTR.
189 std::next(Start->getReverseIterator());
190 I != Preheader->instr_rend(); ++I)
191 // Only check the definitions of CTR. If there is non-dead definition for
192 // the CTR, we conservatively don't generate a CTR loop.
193 if (isCTRClobber(&*I, /* CheckReads */ false)) {
194 InvalidCTRLoop = true;
195 break;
196 }
197
198 // Make sure there is also no CTR clobber/user in the block preheader between
199 // MTCTR and the end.
200 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
201 I != Preheader->instr_end(); ++I)
202 if (isCTRClobber(&*I, /* CheckReads */ true)) {
203 InvalidCTRLoop = true;
204 break;
205 }
206
207 // Find the CTR loop components and decide whether or not to fall back to a
208 // normal loop.
209 for (auto *MBB : reverse(ML->getBlocks())) {
210 for (auto &MI : *MBB) {
211 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
212 MI.getOpcode() == PPC::DecreaseCTR8loop)
213 Dec = &MI;
214 else if (!InvalidCTRLoop)
215 // If any instruction clobber CTR, then we can not generate a CTR loop.
216 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
217 }
218 if (Dec && InvalidCTRLoop)
219 break;
220 }
221
222 assert(Dec && "CTR loop is not complete!");
223
224 if (InvalidCTRLoop) {
225 expandNormalLoops(ML, Start, Dec);
226 ++NumNormalLoops;
227 }
228 else {
229 expandCTRLoops(ML, Start, Dec);
230 ++NumCTRLoops;
231 }
232 return true;
233}
234
235void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
236 MachineInstr *Dec) {
237 bool Is64Bit =
238 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
239
240 MachineBasicBlock *Preheader = Start->getParent();
241 MachineBasicBlock *Exiting = Dec->getParent();
242 assert((Preheader && Exiting) &&
243 "Preheader and exiting should exist for CTR loop!");
244
245 assert(Dec->getOperand(1).getImm() == 1 &&
246 "Loop decrement stride must be 1");
247
248 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
249 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
250
251 Register PHIDef =
252 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
253 : &PPC::GPRC_and_GPRC_NOR0RegClass);
254
255 Start->getParent()->getParent()->getProperties().resetNoPHIs();
256
257 // Generate "PHI" in the header block.
258 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
259 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
260 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
261
262 Register ADDIDef =
263 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
264 : &PPC::GPRC_and_GPRC_NOR0RegClass);
265 // Generate "addi -1" in the exiting block.
266 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
267 .addReg(PHIDef)
268 .addImm(-1);
269
270 // Add other inputs for the PHI node.
271 if (ML->isLoopLatch(Exiting)) {
272 // There must be only two predecessors for the loop header, one is the
273 // Preheader and the other one is loop latch Exiting. In hardware loop
274 // insertion pass, the block containing DecreaseCTRloop must dominate all
275 // loop latches. So there must be only one latch.
276 assert(ML->getHeader()->pred_size() == 2 &&
277 "Loop header predecessor is not right!");
278 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
279 } else {
280 // If the block containing DecreaseCTRloop is not a loop latch, we can use
281 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
282 // insertion pass, the block containing DecreaseCTRloop must dominate all
283 // loop latches.
284 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
285 if (ML->contains(P)) {
286 assert(ML->isLoopLatch(P) &&
287 "Loop's header in-loop predecessor is not loop latch!");
288 PHIMIB.addReg(ADDIDef).addMBB(P);
289 } else
290 assert(P == Preheader &&
291 "CTR loop should not be generated for irreducible loop!");
292 }
293 }
294
295 // Generate the compare in the exiting block.
296 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
297 auto CMPMIB =
298 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
299 .addReg(ADDIDef)
300 .addImm(0);
301
302 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
303 Dec->getOperand(0).getReg())
304 .addReg(CMPMIB->getOperand(0).getReg(), {}, PPC::sub_gt);
305
306 // Remove the pseudo instructions.
307 Start->eraseFromParent();
308 Dec->eraseFromParent();
309}
310
311void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
312 MachineInstr *Dec) {
313 bool Is64Bit =
314 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
315
316 MachineBasicBlock *Preheader = Start->getParent();
317 MachineBasicBlock *Exiting = Dec->getParent();
318
319 (void)Preheader;
320 assert((Preheader && Exiting) &&
321 "Preheader and exiting should exist for CTR loop!");
322
323 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
324
325 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
326 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
327 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
328 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
329 "There should be only one user for loop decrement pseudo!");
330
331 unsigned Opcode = 0;
332 switch (BrInstr->getOpcode()) {
333 case PPC::BC:
334 Opcode = BDNZOpcode;
335 (void) ML;
336 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
337 "Invalid ctr loop!");
338 break;
339 case PPC::BCn:
340 Opcode = BDZOpcode;
341 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
342 "Invalid ctr loop!");
343 break;
344 default:
345 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
346 }
347
348 // Generate "bdnz/bdz" in the exiting block just before the terminator.
349 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
350 .addMBB(BrInstr->getOperand(1).getMBB());
351
352 // Remove the pseudo instructions.
353 BrInstr->eraseFromParent();
354 Dec->eraseFromParent();
355}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
AnalysisUsage & addRequired()
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createPPCCTRLoopsPass()
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406