LLVM 20.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrBuilder.h"
34#include "PPCInstrInfo.h"
36#include "PPCTargetMachine.h"
37#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(false), cl::Hidden);
98
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {
118 }
119
120private:
124 BlockFrequency EntryFreq;
125 SmallSet<Register, 16> RegsToUpdate;
126
127 // Initialize class variables.
128 void initialize(MachineFunction &MFParm);
129
130 // Perform peepholes.
131 bool simplifyCode();
132
133 // Perform peepholes.
134 bool eliminateRedundantCompare();
135 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
136 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
137 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
138 MachineInstr *&ToErase);
139 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
141
142 // A number of transformations will eliminate the definition of a register
143 // as all of its uses will be removed. However, this leaves a register
144 // without a definition for LiveVariables. Such transformations should
145 // use this function to provide a dummy definition of the register that
146 // will simply be removed by DCE.
147 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
148 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
149 }
150 void addRegToUpdateWithLine(Register Reg, int Line);
151 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
153 Register Dst);
154
155public:
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
167 }
168
169 // Main entry point for this pass.
170 bool runOnMachineFunction(MachineFunction &MF) override {
171 initialize(MF);
172 // At this point, TOC pointer should not be used in a function that uses
173 // PC-Relative addressing.
174 assert((MF.getRegInfo().use_empty(PPC::X2) ||
176 "TOC pointer used in a function using PC-Relative addressing!");
177 if (skipFunction(MF.getFunction()))
178 return false;
179 bool Changed = simplifyCode();
180#ifndef NDEBUG
181 if (Changed)
182 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
183 "-mllvm -disable-ppc-peephole");
184#endif
185 return Changed;
186 }
187};
188
189#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
190void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
192 return;
193 if (RegsToUpdate.insert(Reg).second)
194 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
195 << " on line " << Line
196 << " for re-computation of kill flags\n");
197}
198
199// Initialize class variables.
200void PPCMIPeephole::initialize(MachineFunction &MFParm) {
201 MF = &MFParm;
202 MRI = &MF->getRegInfo();
203 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
204 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
205 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
206 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
207 EntryFreq = MBFI->getEntryFreq();
208 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
209 RegsToUpdate.clear();
210 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
211 LLVM_DEBUG(MF->dump());
212}
213
214static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
216 assert(Op && "Invalid Operand!");
217 if (!Op->isReg())
218 return nullptr;
219
220 Register Reg = Op->getReg();
221 if (!Reg.isVirtual())
222 return nullptr;
223
224 return MRI->getVRegDef(Reg);
225}
226
227// This function returns number of known zero bits in output of MI
228// starting from the most significant bit.
229static unsigned getKnownLeadingZeroCount(const unsigned Reg,
230 const PPCInstrInfo *TII,
231 const MachineRegisterInfo *MRI) {
232 MachineInstr *MI = MRI->getVRegDef(Reg);
233 unsigned Opcode = MI->getOpcode();
234 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
235 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
236 return MI->getOperand(3).getImm();
237
238 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
239 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
240 return MI->getOperand(3).getImm();
241
242 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
243 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
244 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
245 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
246 return 32 + MI->getOperand(3).getImm();
247
248 if (Opcode == PPC::ANDI_rec) {
249 uint16_t Imm = MI->getOperand(2).getImm();
250 return 48 + llvm::countl_zero(Imm);
251 }
252
253 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
254 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
255 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
256 // The result ranges from 0 to 32.
257 return 58;
258
259 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
260 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
261 // The result ranges from 0 to 64.
262 return 57;
263
264 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
265 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
266 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
267 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
268 return 48;
269
270 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
271 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
272 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
273 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
274 return 56;
275
276 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
277 Opcode == PPC::AND8_rec)
278 return std::max(
279 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
280 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
281
282 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
283 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
284 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
285 Opcode == PPC::XOR8_rec)
286 return std::min(
287 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
288 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
289
290 if (TII->isZeroExtended(Reg, MRI))
291 return 32;
292
293 return 0;
294}
295
296// This function maintains a map for the pairs <TOC Save Instr, Keep>
297// Each time a new TOC save is encountered, it checks if any of the existing
298// ones are dominated by the new one. If so, it marks the existing one as
299// redundant by setting it's entry in the map as false. It then adds the new
300// instruction to the map with either true or false depending on if any
301// existing instructions dominated the new one.
302void PPCMIPeephole::UpdateTOCSaves(
303 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
304 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
305 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
306 // here only support it under ELFv2.
307 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
309
310 MachineBasicBlock *Entry = &MF->front();
311 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
312
313 // If the block in which the TOC save resides is in a block that
314 // post-dominates Entry, or a block that is hotter than entry (keep in mind
315 // that early MachineLICM has already run so the TOC save won't be hoisted)
316 // we can just do the save in the prologue.
317 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
318 FI->setMustSaveTOC(true);
319
320 // If we are saving the TOC in the prologue, all the TOC saves can be
321 // removed from the code.
322 if (FI->mustSaveTOC()) {
323 for (auto &TOCSave : TOCSaves)
324 TOCSave.second = false;
325 // Add new instruction to map.
326 TOCSaves[MI] = false;
327 return;
328 }
329 }
330
331 bool Keep = true;
332 for (auto &I : TOCSaves) {
333 MachineInstr *CurrInst = I.first;
334 // If new instruction dominates an existing one, mark existing one as
335 // redundant.
336 if (I.second && MDT->dominates(MI, CurrInst))
337 I.second = false;
338 // Check if the new instruction is redundant.
339 if (MDT->dominates(CurrInst, MI)) {
340 Keep = false;
341 break;
342 }
343 }
344 // Add new instruction to map.
345 TOCSaves[MI] = Keep;
346}
347
348// This function returns a list of all PHI nodes in the tree starting from
349// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
350// The list initially only contains the root PHI. When we visit a PHI node, we
351// add it to the list. We continue to look for other PHI node operands while
352// there are nodes to visit in the list. The function returns false if the
353// optimization cannot be applied on this tree.
354static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
355 MachineInstr *RootPHI,
357 PHIs.push_back(RootPHI);
358 unsigned VisitedIndex = 0;
359 while (VisitedIndex < PHIs.size()) {
360 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
361 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
362 PHIOp != NumOps; PHIOp += 2) {
363 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
364 if (!RegOp.isVirtual())
365 return false;
366 MachineInstr *Instr = MRI->getVRegDef(RegOp);
367 // While collecting the PHI nodes, we check if they can be converted (i.e.
368 // all the operands are either copies, implicit defs or PHI nodes).
369 unsigned Opcode = Instr->getOpcode();
370 if (Opcode == PPC::COPY) {
371 Register Reg = Instr->getOperand(1).getReg();
372 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
373 return false;
374 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
375 return false;
376 // If we detect a cycle in the PHI nodes, we exit. It would be
377 // possible to change cycles as well, but that would add a lot
378 // of complexity for a case that is unlikely to occur with MMA
379 // code.
380 if (Opcode != PPC::PHI)
381 continue;
382 if (llvm::is_contained(PHIs, Instr))
383 return false;
384 PHIs.push_back(Instr);
385 }
386 VisitedIndex++;
387 }
388 return true;
389}
390
391// This function changes the unprimed accumulator PHI nodes in the PHIs list to
392// primed accumulator PHI nodes. The list is traversed in reverse order to
393// change all the PHI operands of a PHI node before changing the node itself.
394// We keep a map to associate each changed PHI node to its non-changed form.
395void PPCMIPeephole::convertUnprimedAccPHIs(
399 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
401 // We check if the current PHI node can be changed by looking at its
402 // operands. If all the operands are either copies from primed
403 // accumulators, implicit definitions or other unprimed accumulator
404 // PHI nodes, we change it.
405 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
406 PHIOp += 2) {
407 Register RegOp = PHI->getOperand(PHIOp).getReg();
408 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
409 unsigned Opcode = PHIInput->getOpcode();
410 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
411 Opcode == PPC::PHI) &&
412 "Unexpected instruction");
413 if (Opcode == PPC::COPY) {
414 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
415 &PPC::ACCRCRegClass &&
416 "Unexpected register class");
417 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
418 } else if (Opcode == PPC::IMPLICIT_DEF) {
419 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
420 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
421 TII->get(PPC::IMPLICIT_DEF), AccReg);
422 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
423 PHI->getOperand(PHIOp + 1)});
424 } else if (Opcode == PPC::PHI) {
425 // We found a PHI operand. At this point we know this operand
426 // has already been changed so we get its associated changed form
427 // from the map.
428 assert(ChangedPHIMap.count(PHIInput) == 1 &&
429 "This PHI node should have already been changed.");
430 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
432 PrimedAccPHI->getOperand(0).getReg(), false),
433 PHI->getOperand(PHIOp + 1)});
434 }
435 }
436 Register AccReg = Dst;
437 // If the PHI node we are changing is the root node, the register it defines
438 // will be the destination register of the original copy (of the PHI def).
439 // For all other PHI's in the list, we need to create another primed
440 // accumulator virtual register as the PHI will no longer define the
441 // unprimed accumulator.
442 if (PHI != PHIs[0])
443 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
445 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
446 for (auto RegMBB : PHIOps) {
447 NewPHI.add(RegMBB.first).add(RegMBB.second);
448 if (MRI->isSSA())
449 addRegToUpdate(RegMBB.first.getReg());
450 }
451 // The liveness of old PHI and new PHI have to be updated.
452 addRegToUpdate(PHI->getOperand(0).getReg());
453 addRegToUpdate(AccReg);
454 ChangedPHIMap[PHI] = NewPHI.getInstr();
455 LLVM_DEBUG(dbgs() << "Converting PHI: ");
456 LLVM_DEBUG(PHI->dump());
457 LLVM_DEBUG(dbgs() << "To: ");
458 LLVM_DEBUG(NewPHI.getInstr()->dump());
459 }
460}
461
462// Perform peephole optimizations.
463bool PPCMIPeephole::simplifyCode() {
464 bool Simplified = false;
465 bool TrapOpt = false;
466 MachineInstr* ToErase = nullptr;
467 std::map<MachineInstr *, bool> TOCSaves;
468 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
469 NumFunctionsEnteredInMIPeephole++;
470 if (ConvertRegReg) {
471 // Fixed-point conversion of reg/reg instructions fed by load-immediate
472 // into reg/imm instructions. FIXME: This is expensive, control it with
473 // an option.
474 bool SomethingChanged = false;
475 do {
476 NumFixedPointIterations++;
477 SomethingChanged = false;
478 for (MachineBasicBlock &MBB : *MF) {
479 for (MachineInstr &MI : MBB) {
480 if (MI.isDebugInstr())
481 continue;
482
483 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
484 continue;
485
486 SmallSet<Register, 4> RRToRIRegsToUpdate;
487 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
488 continue;
489 for (Register R : RRToRIRegsToUpdate)
491 // The updated instruction may now have new register operands.
492 // Conservatively add them to recompute the flags as well.
493 for (const MachineOperand &MO : MI.operands())
494 if (MO.isReg())
495 addRegToUpdate(MO.getReg());
496 // We don't erase anything in case the def has other uses. Let DCE
497 // remove it if it can be removed.
498 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
499 LLVM_DEBUG(MI.dump());
500 NumConvertedToImmediateForm++;
501 SomethingChanged = true;
502 Simplified = true;
503 continue;
504 }
505 }
506 } while (SomethingChanged && FixedPointRegToImm);
507 }
508
509 // Since we are deleting this instruction, we need to run LiveVariables
510 // on any of its definitions that are marked as needing an update since
511 // we can't run LiveVariables on a deleted register. This only needs
512 // to be done for defs since uses will have their own defining
513 // instructions so we won't be running LiveVariables on a deleted reg.
514 auto recomputeLVForDyingInstr = [&]() {
515 if (RegsToUpdate.empty())
516 return;
517 for (MachineOperand &MO : ToErase->operands()) {
518 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
519 continue;
520 Register RegToUpdate = MO.getReg();
521 RegsToUpdate.erase(RegToUpdate);
522 // If some transformation has introduced an additional definition of
523 // this register (breaking SSA), we can safely convert this def to
524 // a def of an invalid register as the instruction is going away.
525 if (!MRI->getUniqueVRegDef(RegToUpdate))
526 MO.setReg(PPC::NoRegister);
527 LV->recomputeForSingleDefVirtReg(RegToUpdate);
528 }
529 };
530
531 for (MachineBasicBlock &MBB : *MF) {
532 for (MachineInstr &MI : MBB) {
533
534 // If the previous instruction was marked for elimination,
535 // remove it now.
536 if (ToErase) {
537 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
538 LLVM_DEBUG(ToErase->dump());
539 recomputeLVForDyingInstr();
540 ToErase->eraseFromParent();
541 ToErase = nullptr;
542 }
543 // If a conditional trap instruction got optimized to an
544 // unconditional trap, eliminate all the instructions after
545 // the trap.
546 if (EnableTrapOptimization && TrapOpt) {
547 ToErase = &MI;
548 continue;
549 }
550
551 // Ignore debug instructions.
552 if (MI.isDebugInstr())
553 continue;
554
555 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
556 continue;
557
558 // Per-opcode peepholes.
559 switch (MI.getOpcode()) {
560
561 default:
562 break;
563 case PPC::COPY: {
564 Register Src = MI.getOperand(1).getReg();
565 Register Dst = MI.getOperand(0).getReg();
566 if (!Src.isVirtual() || !Dst.isVirtual())
567 break;
568 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
569 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
570 break;
571
572 // We are copying an unprimed accumulator to a primed accumulator.
573 // If the input to the copy is a PHI that is fed only by (i) copies in
574 // the other direction (ii) implicitly defined unprimed accumulators or
575 // (iii) other PHI nodes satisfying (i) and (ii), we can change
576 // the PHI to a PHI on primed accumulators (as long as we also change
577 // its operands). To detect and change such copies, we first get a list
578 // of all the PHI nodes starting from the root PHI node in BFS order.
579 // We then visit all these PHI nodes to check if they can be changed to
580 // primed accumulator PHI nodes and if so, we change them.
581 MachineInstr *RootPHI = MRI->getVRegDef(Src);
582 if (RootPHI->getOpcode() != PPC::PHI)
583 break;
584
586 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
587 break;
588
589 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
590
591 ToErase = &MI;
592 break;
593 }
594 case PPC::LI:
595 case PPC::LI8: {
596 // If we are materializing a zero, look for any use operands for which
597 // zero means immediate zero. All such operands can be replaced with
598 // PPC::ZERO.
599 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
600 break;
601 Register MIDestReg = MI.getOperand(0).getReg();
602 bool Folded = false;
603 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
604 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
605 if (MRI->use_nodbg_empty(MIDestReg)) {
606 ++NumLoadImmZeroFoldedAndRemoved;
607 ToErase = &MI;
608 }
609 if (Folded)
610 addRegToUpdate(MIDestReg);
611 Simplified |= Folded;
612 break;
613 }
614 case PPC::STW:
615 case PPC::STD: {
616 MachineFrameInfo &MFI = MF->getFrameInfo();
617 if (MFI.hasVarSizedObjects() ||
618 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
619 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
620 break;
621 // When encountering a TOC save instruction, call UpdateTOCSaves
622 // to add it to the TOCSaves map and mark any existing TOC saves
623 // it dominates as redundant.
624 if (TII->isTOCSaveMI(MI))
625 UpdateTOCSaves(TOCSaves, &MI);
626 break;
627 }
628 case PPC::XXPERMDI: {
629 // Perform simplifications of 2x64 vector swaps and splats.
630 // A swap is identified by an immediate value of 2, and a splat
631 // is identified by an immediate value of 0 or 3.
632 int Immed = MI.getOperand(3).getImm();
633
634 if (Immed == 1)
635 break;
636
637 // For each of these simplifications, we need the two source
638 // regs to match. Unfortunately, MachineCSE ignores COPY and
639 // SUBREG_TO_REG, so for example we can see
640 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
641 // We have to look through chains of COPY and SUBREG_TO_REG
642 // to find the real source values for comparison.
643 Register TrueReg1 =
644 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
645 Register TrueReg2 =
646 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
647
648 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
649 break;
650
651 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
652
653 if (!DefMI)
654 break;
655
656 unsigned DefOpc = DefMI->getOpcode();
657
658 // If this is a splat fed by a splatting load, the splat is
659 // redundant. Replace with a copy. This doesn't happen directly due
660 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
661 // a load of a double to a vector of 64-bit integers.
662 auto isConversionOfLoadAndSplat = [=]() -> bool {
663 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
664 return false;
665 Register FeedReg1 =
666 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
667 if (FeedReg1.isVirtual()) {
668 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
669 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
670 return true;
671 }
672 return false;
673 };
674 if ((Immed == 0 || Immed == 3) &&
675 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
676 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
677 "to load-and-splat/copy: ");
678 LLVM_DEBUG(MI.dump());
679 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
680 MI.getOperand(0).getReg())
681 .add(MI.getOperand(1));
682 addRegToUpdate(MI.getOperand(1).getReg());
683 ToErase = &MI;
684 Simplified = true;
685 }
686
687 // If this is a splat or a swap fed by another splat, we
688 // can replace it with a copy.
689 if (DefOpc == PPC::XXPERMDI) {
690 Register DefReg1 = DefMI->getOperand(1).getReg();
691 Register DefReg2 = DefMI->getOperand(2).getReg();
692 unsigned DefImmed = DefMI->getOperand(3).getImm();
693
694 // If the two inputs are not the same register, check to see if
695 // they originate from the same virtual register after only
696 // copy-like instructions.
697 if (DefReg1 != DefReg2) {
698 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
699 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
700
701 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
702 break;
703 }
704
705 if (DefImmed == 0 || DefImmed == 3) {
706 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
707 "to splat/copy: ");
708 LLVM_DEBUG(MI.dump());
709 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
710 MI.getOperand(0).getReg())
711 .add(MI.getOperand(1));
712 addRegToUpdate(MI.getOperand(1).getReg());
713 ToErase = &MI;
714 Simplified = true;
715 }
716
717 // If this is a splat fed by a swap, we can simplify modify
718 // the splat to splat the other value from the swap's input
719 // parameter.
720 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
721 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
722 LLVM_DEBUG(MI.dump());
723 addRegToUpdate(MI.getOperand(1).getReg());
724 addRegToUpdate(MI.getOperand(2).getReg());
725 MI.getOperand(1).setReg(DefReg1);
726 MI.getOperand(2).setReg(DefReg2);
727 MI.getOperand(3).setImm(3 - Immed);
728 addRegToUpdate(DefReg1);
729 addRegToUpdate(DefReg2);
730 Simplified = true;
731 }
732
733 // If this is a swap fed by a swap, we can replace it
734 // with a copy from the first swap's input.
735 else if (Immed == 2 && DefImmed == 2) {
736 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
737 LLVM_DEBUG(MI.dump());
738 addRegToUpdate(MI.getOperand(1).getReg());
739 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
740 MI.getOperand(0).getReg())
741 .add(DefMI->getOperand(1));
744 ToErase = &MI;
745 Simplified = true;
746 }
747 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
748 DefOpc == PPC::XXPERMDIs &&
749 (DefMI->getOperand(2).getImm() == 0 ||
750 DefMI->getOperand(2).getImm() == 3)) {
751 ToErase = &MI;
752 Simplified = true;
753 // Swap of a splat, convert to copy.
754 if (Immed == 2) {
755 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
756 LLVM_DEBUG(MI.dump());
757 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
758 MI.getOperand(0).getReg())
759 .add(MI.getOperand(1));
760 addRegToUpdate(MI.getOperand(1).getReg());
761 break;
762 }
763 // Splat fed by another splat - switch the output of the first
764 // and remove the second.
765 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
766 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
767 LLVM_DEBUG(MI.dump());
768 } else if (Immed == 2 &&
769 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
770 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
771 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
772 DefOpc == PPC::VSPLTISW)) {
773 // Swap of various vector splats, convert to copy.
774 ToErase = &MI;
775 Simplified = true;
776 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
777 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
778 LLVM_DEBUG(MI.dump());
779 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
780 MI.getOperand(0).getReg())
781 .add(MI.getOperand(1));
782 addRegToUpdate(MI.getOperand(1).getReg());
783 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
784 TII->isLoadFromConstantPool(DefMI)) {
785 const Constant *C = TII->getConstantFromConstantPool(DefMI);
786 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
787 ToErase = &MI;
788 Simplified = true;
790 << "Optimizing swap(splat pattern from constant-pool) "
791 "=> copy(splat pattern from constant-pool): ");
792 LLVM_DEBUG(MI.dump());
793 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
794 MI.getOperand(0).getReg())
795 .add(MI.getOperand(1));
796 addRegToUpdate(MI.getOperand(1).getReg());
797 }
798 }
799 break;
800 }
801 case PPC::VSPLTB:
802 case PPC::VSPLTH:
803 case PPC::XXSPLTW: {
804 unsigned MyOpcode = MI.getOpcode();
805 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
806 Register TrueReg =
807 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
808 if (!TrueReg.isVirtual())
809 break;
810 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
811 if (!DefMI)
812 break;
813 unsigned DefOpcode = DefMI->getOpcode();
814 auto isConvertOfSplat = [=]() -> bool {
815 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
816 return false;
817 Register ConvReg = DefMI->getOperand(1).getReg();
818 if (!ConvReg.isVirtual())
819 return false;
820 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
821 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
822 Splt->getOpcode() == PPC::XXSPLTW);
823 };
824 bool AlreadySplat = (MyOpcode == DefOpcode) ||
825 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
826 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
827 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
828 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
829 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
830 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
831 // If the instruction[s] that feed this splat have already splat
832 // the value, this splat is redundant.
833 if (AlreadySplat) {
834 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
835 LLVM_DEBUG(MI.dump());
836 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
837 MI.getOperand(0).getReg())
838 .add(MI.getOperand(OpNo));
839 addRegToUpdate(MI.getOperand(OpNo).getReg());
840 ToErase = &MI;
841 Simplified = true;
842 }
843 // Splat fed by a shift. Usually when we align value to splat into
844 // vector element zero.
845 if (DefOpcode == PPC::XXSLDWI) {
846 Register ShiftRes = DefMI->getOperand(0).getReg();
847 Register ShiftOp1 = DefMI->getOperand(1).getReg();
848 Register ShiftOp2 = DefMI->getOperand(2).getReg();
849 unsigned ShiftImm = DefMI->getOperand(3).getImm();
850 unsigned SplatImm =
851 MI.getOperand(MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
852 if (ShiftOp1 == ShiftOp2) {
853 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
854 if (MRI->hasOneNonDBGUse(ShiftRes)) {
855 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
857 ToErase = DefMI;
858 }
859 Simplified = true;
860 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
861 << " to " << NewElem << " in instruction: ");
862 LLVM_DEBUG(MI.dump());
863 addRegToUpdate(MI.getOperand(OpNo).getReg());
864 addRegToUpdate(ShiftOp1);
865 MI.getOperand(OpNo).setReg(ShiftOp1);
866 MI.getOperand(2).setImm(NewElem);
867 }
868 }
869 break;
870 }
871 case PPC::XVCVDPSP: {
872 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
873 Register TrueReg =
874 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
875 if (!TrueReg.isVirtual())
876 break;
877 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
878
879 // This can occur when building a vector of single precision or integer
880 // values.
881 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
882 Register DefsReg1 =
883 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
884 Register DefsReg2 =
885 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
886 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
887 break;
888 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
889 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
890
891 if (!P1 || !P2)
892 break;
893
894 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
895 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
896 // the FRSP/XSRSP.
897 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
898 unsigned Opc = RoundInstr->getOpcode();
899 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
900 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
901 Simplified = true;
902 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
903 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
904 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
905 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
906 if (Use.getOperand(i).isReg() &&
907 Use.getOperand(i).getReg() == FRSPDefines)
908 Use.getOperand(i).setReg(ConvReg1);
909 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
910 LLVM_DEBUG(RoundInstr->dump());
911 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
912 LLVM_DEBUG(MI.dump());
913 LLVM_DEBUG(dbgs() << "Through instruction:\n");
915 addRegToUpdate(ConvReg1);
916 addRegToUpdate(FRSPDefines);
917 ToErase = RoundInstr;
918 }
919 };
920
921 // If the input to XVCVDPSP is a vector that was built (even
922 // partially) out of FRSP's, the FRSP(s) can safely be removed
923 // since this instruction performs the same operation.
924 if (P1 != P2) {
925 removeFRSPIfPossible(P1);
926 removeFRSPIfPossible(P2);
927 break;
928 }
929 removeFRSPIfPossible(P1);
930 }
931 break;
932 }
933 case PPC::EXTSH:
934 case PPC::EXTSH8:
935 case PPC::EXTSH8_32_64: {
936 if (!EnableSExtElimination) break;
937 Register NarrowReg = MI.getOperand(1).getReg();
938 if (!NarrowReg.isVirtual())
939 break;
940
941 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
942 unsigned SrcOpcode = SrcMI->getOpcode();
943 // If we've used a zero-extending load that we will sign-extend,
944 // just do a sign-extending load.
945 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
946 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
947 break;
948 // Determine the new opcode. We need to make sure that if the original
949 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
950 // Likewise if the source is X-Form the new opcode should also be
951 // X-Form.
952 unsigned Opc = PPC::LHA;
953 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
954 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
955 MI.getOpcode() == PPC::EXTSH8_32_64;
956
957 if (SourceIsXForm && MIIs64Bit)
958 Opc = PPC::LHAX8;
959 else if (SourceIsXForm && !MIIs64Bit)
960 Opc = PPC::LHAX;
961 else if (MIIs64Bit)
962 Opc = PPC::LHA8;
963
964 addRegToUpdate(NarrowReg);
965 addRegToUpdate(MI.getOperand(0).getReg());
966
967 // We are removing a definition of NarrowReg which will cause
968 // problems in AliveBlocks. Add an implicit def that will be
969 // removed so that AliveBlocks are updated correctly.
970 addDummyDef(MBB, &MI, NarrowReg);
971 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
972 LLVM_DEBUG(SrcMI->dump());
973 LLVM_DEBUG(dbgs() << "and sign-extension\n");
974 LLVM_DEBUG(MI.dump());
975 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
976 SrcMI->setDesc(TII->get(Opc));
977 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
978 ToErase = &MI;
979 Simplified = true;
980 NumEliminatedSExt++;
981 }
982 break;
983 }
984 case PPC::EXTSW:
985 case PPC::EXTSW_32:
986 case PPC::EXTSW_32_64: {
987 if (!EnableSExtElimination) break;
988 Register NarrowReg = MI.getOperand(1).getReg();
989 if (!NarrowReg.isVirtual())
990 break;
991
992 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
993 unsigned SrcOpcode = SrcMI->getOpcode();
994 // If we've used a zero-extending load that we will sign-extend,
995 // just do a sign-extending load.
996 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
997 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
998 break;
999
1000 // The transformation from a zero-extending load to a sign-extending
1001 // load is only legal when the displacement is a multiple of 4.
1002 // If the displacement is not at least 4 byte aligned, don't perform
1003 // the transformation.
1004 bool IsWordAligned = false;
1005 if (SrcMI->getOperand(1).isGlobal()) {
1006 const GlobalObject *GO =
1007 dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
1008 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
1009 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1010 IsWordAligned = true;
1011 } else if (SrcMI->getOperand(1).isImm()) {
1012 int64_t Value = SrcMI->getOperand(1).getImm();
1013 if (Value % 4 == 0)
1014 IsWordAligned = true;
1015 }
1016
1017 // Determine the new opcode. We need to make sure that if the original
1018 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1019 // Likewise if the source is X-Form the new opcode should also be
1020 // X-Form.
1021 unsigned Opc = PPC::LWA_32;
1022 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1023 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1024 MI.getOpcode() == PPC::EXTSW_32_64;
1025
1026 if (SourceIsXForm && MIIs64Bit)
1027 Opc = PPC::LWAX;
1028 else if (SourceIsXForm && !MIIs64Bit)
1029 Opc = PPC::LWAX_32;
1030 else if (MIIs64Bit)
1031 Opc = PPC::LWA;
1032
1033 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1034 break;
1035
1036 addRegToUpdate(NarrowReg);
1037 addRegToUpdate(MI.getOperand(0).getReg());
1038
1039 // We are removing a definition of NarrowReg which will cause
1040 // problems in AliveBlocks. Add an implicit def that will be
1041 // removed so that AliveBlocks are updated correctly.
1042 addDummyDef(MBB, &MI, NarrowReg);
1043 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1044 LLVM_DEBUG(SrcMI->dump());
1045 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1046 LLVM_DEBUG(MI.dump());
1047 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1048 SrcMI->setDesc(TII->get(Opc));
1049 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1050 ToErase = &MI;
1051 Simplified = true;
1052 NumEliminatedSExt++;
1053 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1054 TII->isSignExtended(NarrowReg, MRI)) {
1055 // We can eliminate EXTSW if the input is known to be already
1056 // sign-extended.
1057 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1058 Register TmpReg =
1059 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1060 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1061 TmpReg);
1062 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1063 MI.getOperand(0).getReg())
1064 .addReg(TmpReg)
1065 .addReg(NarrowReg)
1066 .addImm(PPC::sub_32);
1067 ToErase = &MI;
1068 Simplified = true;
1069 NumEliminatedSExt++;
1070 }
1071 break;
1072 }
1073 case PPC::RLDICL: {
1074 // We can eliminate RLDICL (e.g. for zero-extension)
1075 // if all bits to clear are already zero in the input.
1076 // This code assume following code sequence for zero-extension.
1077 // %6 = COPY %5:sub_32; (optional)
1078 // %8 = IMPLICIT_DEF;
1079 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1080 if (!EnableZExtElimination) break;
1081
1082 if (MI.getOperand(2).getImm() != 0)
1083 break;
1084
1085 Register SrcReg = MI.getOperand(1).getReg();
1086 if (!SrcReg.isVirtual())
1087 break;
1088
1089 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1090 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1091 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1092 break;
1093
1094 MachineInstr *ImpDefMI, *SubRegMI;
1095 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1096 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1097 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1098
1099 SrcMI = SubRegMI;
1100 if (SubRegMI->getOpcode() == PPC::COPY) {
1101 Register CopyReg = SubRegMI->getOperand(1).getReg();
1102 if (CopyReg.isVirtual())
1103 SrcMI = MRI->getVRegDef(CopyReg);
1104 }
1105 if (!SrcMI->getOperand(0).isReg())
1106 break;
1107
1108 unsigned KnownZeroCount =
1109 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1110 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1111 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1112 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1113 MI.getOperand(0).getReg())
1114 .addReg(SrcReg);
1115 addRegToUpdate(SrcReg);
1116 ToErase = &MI;
1117 Simplified = true;
1118 NumEliminatedZExt++;
1119 }
1120 break;
1121 }
1122
1123 // TODO: Any instruction that has an immediate form fed only by a PHI
1124 // whose operands are all load immediate can be folded away. We currently
1125 // do this for ADD instructions, but should expand it to arithmetic and
1126 // binary instructions with immediate forms in the future.
1127 case PPC::ADD4:
1128 case PPC::ADD8: {
1129 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1130 assert(PhiOp && "Invalid Operand!");
1131 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1132
1133 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1134 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1135 };
1136
1137 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1138 MachineOperand *PhiOp) {
1139 assert(PhiOp && "Invalid Operand!");
1140 assert(DominatorOp && "Invalid Operand!");
1141 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1142 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1143
1144 // Note: the vregs only show up at odd indices position of PHI Node,
1145 // the even indices position save the BB info.
1146 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1147 MachineInstr *LiMI =
1148 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1149 if (!LiMI ||
1150 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1151 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1152 !MDT->dominates(DefDomMI, LiMI))
1153 return false;
1154 }
1155
1156 return true;
1157 };
1158
1159 MachineOperand Op1 = MI.getOperand(1);
1160 MachineOperand Op2 = MI.getOperand(2);
1161 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1162 std::swap(Op1, Op2);
1163 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1164 break; // We don't have an ADD fed by LI's that can be transformed
1165
1166 // Now we know that Op1 is the PHI node and Op2 is the dominator
1167 Register DominatorReg = Op2.getReg();
1168
1169 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1170 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1171 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1172 MRI->setRegClass(DominatorReg, TRC);
1173
1174 // replace LIs with ADDIs
1175 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1176 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1177 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1178 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1179 LLVM_DEBUG(LiMI->dump());
1180
1181 // There could be repeated registers in the PHI, e.g: %1 =
1182 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1183 // already replaced the def instruction, skip.
1184 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1185 continue;
1186
1187 assert((LiMI->getOpcode() == PPC::LI ||
1188 LiMI->getOpcode() == PPC::LI8) &&
1189 "Invalid Opcode!");
1190 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1191 LiMI->removeOperand(1); // remove the imm of LI
1192 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1193 : PPC::ADDI8));
1194 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1195 .addReg(DominatorReg)
1196 .addImm(LiImm); // restore the imm of LI
1197 LLVM_DEBUG(LiMI->dump());
1198 }
1199
1200 // Replace ADD with COPY
1201 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1202 LLVM_DEBUG(MI.dump());
1203 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1204 MI.getOperand(0).getReg())
1205 .add(Op1);
1206 addRegToUpdate(Op1.getReg());
1207 addRegToUpdate(Op2.getReg());
1208 ToErase = &MI;
1209 Simplified = true;
1210 NumOptADDLIs++;
1211 break;
1212 }
1213 case PPC::RLDICR: {
1214 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1215 combineSEXTAndSHL(MI, ToErase);
1216 break;
1217 }
1218 case PPC::ANDI_rec:
1219 case PPC::ANDI8_rec:
1220 case PPC::ANDIS_rec:
1221 case PPC::ANDIS8_rec: {
1222 Register TrueReg =
1223 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1224 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1225 break;
1226
1227 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1228 if (!SrcMI)
1229 break;
1230
1231 unsigned SrcOpCode = SrcMI->getOpcode();
1232 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1233 break;
1234
1235 Register SrcReg, DstReg;
1236 SrcReg = SrcMI->getOperand(1).getReg();
1237 DstReg = MI.getOperand(1).getReg();
1238 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1239 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1240 if (DstRC != SrcRC)
1241 break;
1242
1243 uint64_t AndImm = MI.getOperand(2).getImm();
1244 if (MI.getOpcode() == PPC::ANDIS_rec ||
1245 MI.getOpcode() == PPC::ANDIS8_rec)
1246 AndImm <<= 16;
1247 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1248 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1249 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1250
1251 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1252 // if all bits to AND are already zero in the input.
1253 bool PatternResultZero =
1254 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1255 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1256
1257 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1258 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1259 bool PatternRemoveRotate =
1260 SrcMI->getOperand(2).getImm() == 0 &&
1261 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1262 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1263
1264 if (!PatternResultZero && !PatternRemoveRotate)
1265 break;
1266
1267 LLVM_DEBUG(dbgs() << "Combining pair: ");
1268 LLVM_DEBUG(SrcMI->dump());
1269 LLVM_DEBUG(MI.dump());
1270 if (PatternResultZero)
1271 MI.getOperand(2).setImm(0);
1272 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1273 LLVM_DEBUG(dbgs() << "To: ");
1274 LLVM_DEBUG(MI.dump());
1275 addRegToUpdate(MI.getOperand(1).getReg());
1276 addRegToUpdate(SrcMI->getOperand(0).getReg());
1277 Simplified = true;
1278 break;
1279 }
1280 case PPC::RLWINM:
1281 case PPC::RLWINM_rec:
1282 case PPC::RLWINM8:
1283 case PPC::RLWINM8_rec: {
1284 // We might replace operand 1 of the instruction which will
1285 // require we recompute kill flags for it.
1286 Register OrigOp1Reg = MI.getOperand(1).isReg()
1287 ? MI.getOperand(1).getReg()
1288 : PPC::NoRegister;
1289 Simplified = TII->combineRLWINM(MI, &ToErase);
1290 if (Simplified) {
1291 addRegToUpdate(OrigOp1Reg);
1292 if (MI.getOperand(1).isReg())
1293 addRegToUpdate(MI.getOperand(1).getReg());
1294 if (ToErase && ToErase->getOperand(1).isReg())
1295 for (auto UseReg : ToErase->explicit_uses())
1296 if (UseReg.isReg())
1297 addRegToUpdate(UseReg.getReg());
1298 ++NumRotatesCollapsed;
1299 }
1300 break;
1301 }
1302 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1303 // always trap, we will delete the node if it will never trap.
1304 case PPC::TDI:
1305 case PPC::TWI:
1306 case PPC::TD:
1307 case PPC::TW: {
1308 if (!EnableTrapOptimization) break;
1309 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1310 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1311 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1312 // We can only do the optimization if we can get immediates
1313 // from both operands
1314 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1315 LiMI1->getOpcode() == PPC::LI8)))
1316 break;
1317 if (!IsOperand2Immediate &&
1318 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1319 LiMI2->getOpcode() == PPC::LI8)))
1320 break;
1321
1322 auto ImmOperand0 = MI.getOperand(0).getImm();
1323 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1324 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1325 : LiMI2->getOperand(1).getImm();
1326
1327 // We will replace the MI with an unconditional trap if it will always
1328 // trap.
1329 if ((ImmOperand0 == 31) ||
1330 ((ImmOperand0 & 0x10) &&
1331 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1332 ((ImmOperand0 & 0x8) &&
1333 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1334 ((ImmOperand0 & 0x2) &&
1335 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1336 ((ImmOperand0 & 0x1) &&
1337 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1338 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1339 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1340 TrapOpt = true;
1341 }
1342 // We will delete the MI if it will never trap.
1343 ToErase = &MI;
1344 Simplified = true;
1345 break;
1346 }
1347 }
1348 }
1349
1350 // If the last instruction was marked for elimination,
1351 // remove it now.
1352 if (ToErase) {
1353 recomputeLVForDyingInstr();
1354 ToErase->eraseFromParent();
1355 ToErase = nullptr;
1356 }
1357 // Reset TrapOpt to false at the end of the basic block.
1359 TrapOpt = false;
1360 }
1361
1362 // Eliminate all the TOC save instructions which are redundant.
1363 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1364 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1365 if (FI->mustSaveTOC())
1366 NumTOCSavesInPrologue++;
1367
1368 // We try to eliminate redundant compare instruction.
1369 Simplified |= eliminateRedundantCompare();
1370
1371 // If we have made any modifications and added any registers to the set of
1372 // registers for which we need to update the kill flags, do so by recomputing
1373 // LiveVariables for those registers.
1374 for (Register Reg : RegsToUpdate) {
1375 if (!MRI->reg_empty(Reg))
1377 }
1378 return Simplified;
1379}
1380
1381// helper functions for eliminateRedundantCompare
1382static bool isEqOrNe(MachineInstr *BI) {
1384 unsigned PredCond = PPC::getPredicateCondition(Pred);
1385 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1386}
1387
1388static bool isSupportedCmpOp(unsigned opCode) {
1389 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1390 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1391 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1392 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1393}
1394
1395static bool is64bitCmpOp(unsigned opCode) {
1396 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1397 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1398}
1399
1400static bool isSignedCmpOp(unsigned opCode) {
1401 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1402 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1403}
1404
1405static unsigned getSignedCmpOpCode(unsigned opCode) {
1406 if (opCode == PPC::CMPLD) return PPC::CMPD;
1407 if (opCode == PPC::CMPLW) return PPC::CMPW;
1408 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1409 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1410 return opCode;
1411}
1412
1413// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1414// (LT x) to (LE x-1)
1415static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1416 uint64_t Imm = CMPI->getOperand(2).getImm();
1417 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1418 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1419 return 0;
1420
1422 unsigned PredCond = PPC::getPredicateCondition(Pred);
1423 unsigned PredHint = PPC::getPredicateHint(Pred);
1424 if (PredCond == PPC::PRED_GE)
1425 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1426 if (PredCond == PPC::PRED_LT)
1427 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1428
1429 return 0;
1430}
1431
1432// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1433// (LE x) to (LT x+1)
1434static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1435 uint64_t Imm = CMPI->getOperand(2).getImm();
1436 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1437 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1438 return 0;
1439
1441 unsigned PredCond = PPC::getPredicateCondition(Pred);
1442 unsigned PredHint = PPC::getPredicateHint(Pred);
1443 if (PredCond == PPC::PRED_GT)
1444 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1445 if (PredCond == PPC::PRED_LE)
1446 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1447
1448 return 0;
1449}
1450
1451// This takes a Phi node and returns a register value for the specified BB.
1452static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1454 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1455 MachineOperand &MO = Phi->getOperand(I);
1456 if (MO.getMBB() == MBB)
1457 return Phi->getOperand(I-1).getReg();
1458 }
1459 llvm_unreachable("invalid src basic block for this Phi node\n");
1460 return 0;
1461}
1462
1463// This function tracks the source of the register through register copy.
1464// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1465// assuming that the control comes from BB1 into BB2.
1466static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1468 unsigned SrcReg = Reg;
1469 while (true) {
1470 unsigned NextReg = SrcReg;
1471 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1472 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1473 NextReg = getIncomingRegForBlock(Inst, BB1);
1474 // We track through PHI only once to avoid infinite loop.
1475 BB1 = nullptr;
1476 }
1477 else if (Inst->isFullCopy())
1478 NextReg = Inst->getOperand(1).getReg();
1479 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1480 break;
1481 SrcReg = NextReg;
1482 }
1483 return SrcReg;
1484}
1485
1486static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1487 MachineBasicBlock *&PredMBB,
1488 MachineBasicBlock *&MBBtoMoveCmp,
1490
1491 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1492 auto BII = BB.getFirstInstrTerminator();
1493 // We optimize BBs ending with a conditional branch.
1494 // We check only for BCC here, not BCCLR, because BCCLR
1495 // will be formed only later in the pipeline.
1496 if (BB.succ_size() == 2 &&
1497 BII != BB.instr_end() &&
1498 (*BII).getOpcode() == PPC::BCC &&
1499 (*BII).getOperand(1).isReg()) {
1500 // We optimize only if the condition code is used only by one BCC.
1501 Register CndReg = (*BII).getOperand(1).getReg();
1502 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1503 return false;
1504
1505 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1506 // We assume compare and branch are in the same BB for ease of analysis.
1507 if (CMPI->getParent() != &BB)
1508 return false;
1509
1510 // We skip this BB if a physical register is used in comparison.
1511 for (MachineOperand &MO : CMPI->operands())
1512 if (MO.isReg() && !MO.getReg().isVirtual())
1513 return false;
1514
1515 return true;
1516 }
1517 return false;
1518 };
1519
1520 // If this BB has more than one successor, we can create a new BB and
1521 // move the compare instruction in the new BB.
1522 // So far, we do not move compare instruction to a BB having multiple
1523 // successors to avoid potentially increasing code size.
1524 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1525 return BB.succ_size() == 1;
1526 };
1527
1528 if (!isEligibleBB(MBB))
1529 return false;
1530
1531 unsigned NumPredBBs = MBB.pred_size();
1532 if (NumPredBBs == 1) {
1533 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1534 if (isEligibleBB(*TmpMBB)) {
1535 PredMBB = TmpMBB;
1536 MBBtoMoveCmp = nullptr;
1537 return true;
1538 }
1539 }
1540 else if (NumPredBBs == 2) {
1541 // We check for partially redundant case.
1542 // So far, we support cases with only two predecessors
1543 // to avoid increasing the number of instructions.
1545 MachineBasicBlock *Pred1MBB = *PI;
1546 MachineBasicBlock *Pred2MBB = *(PI+1);
1547
1548 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1549 // We assume Pred1MBB is the BB containing the compare to be merged and
1550 // Pred2MBB is the BB to which we will append a compare instruction.
1551 // Proceed as is if Pred1MBB is different from MBB.
1552 }
1553 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1554 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1555 std::swap(Pred1MBB, Pred2MBB);
1556 }
1557 else return false;
1558
1559 if (Pred1MBB == &MBB)
1560 return false;
1561
1562 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1563 // We cannot move the compare instruction if operands are not available
1564 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1566 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1567 for (int I = 1; I <= 2; I++)
1568 if (CMPI->getOperand(I).isReg()) {
1569 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1570 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1571 return false;
1572 }
1573
1574 PredMBB = Pred1MBB;
1575 MBBtoMoveCmp = Pred2MBB;
1576 return true;
1577 }
1578
1579 return false;
1580}
1581
1582// This function will iterate over the input map containing a pair of TOC save
1583// instruction and a flag. The flag will be set to false if the TOC save is
1584// proven redundant. This function will erase from the basic block all the TOC
1585// saves marked as redundant.
1586bool PPCMIPeephole::eliminateRedundantTOCSaves(
1587 std::map<MachineInstr *, bool> &TOCSaves) {
1588 bool Simplified = false;
1589 int NumKept = 0;
1590 for (auto TOCSave : TOCSaves) {
1591 if (!TOCSave.second) {
1592 TOCSave.first->eraseFromParent();
1593 RemoveTOCSave++;
1594 Simplified = true;
1595 } else {
1596 NumKept++;
1597 }
1598 }
1599
1600 if (NumKept > 1)
1601 MultiTOCSaves++;
1602
1603 return Simplified;
1604}
1605
1606// If multiple conditional branches are executed based on the (essentially)
1607// same comparison, we merge compare instructions into one and make multiple
1608// conditional branches on this comparison.
1609// For example,
1610// if (a == 0) { ... }
1611// else if (a < 0) { ... }
1612// can be executed by one compare and two conditional branches instead of
1613// two pairs of a compare and a conditional branch.
1614//
1615// This method merges two compare instructions in two MBBs and modifies the
1616// compare and conditional branch instructions if needed.
1617// For the above example, the input for this pass looks like:
1618// cmplwi r3, 0
1619// beq 0, .LBB0_3
1620// cmpwi r3, -1
1621// bgt 0, .LBB0_4
1622// So, before merging two compares, we need to modify these instructions as
1623// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1624// beq 0, .LBB0_3
1625// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1626// bge 0, .LBB0_4
1627
1628bool PPCMIPeephole::eliminateRedundantCompare() {
1629 bool Simplified = false;
1630
1631 for (MachineBasicBlock &MBB2 : *MF) {
1632 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1633
1634 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1635 // as an optimization target if
1636 // - both MBBs end with a conditional branch,
1637 // - MBB1 is the only predecessor of MBB2, and
1638 // - compare does not take a physical register as a operand in both MBBs.
1639 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1640 //
1641 // As partially redundant case, we additionally handle if MBB2 has one
1642 // additional predecessor, which has only one successor (MBB2).
1643 // In this case, we move the compare instruction originally in MBB2 into
1644 // MBBtoMoveCmp. This partially redundant case is typically appear by
1645 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1646 //
1647 // Overview of CFG of related basic blocks
1648 // Fully redundant case Partially redundant case
1649 // -------- ---------------- --------
1650 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1651 // -------- ---------------- --------
1652 // | \ (w/ 1 succ) \ | \
1653 // | \ \ | \
1654 // | \ |
1655 // -------- --------
1656 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1657 // -------- and 2 succ) -------- and 2 succ)
1658 // | \ | \
1659 // | \ | \
1660 //
1661 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1662 continue;
1663
1664 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1665 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1666
1667 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1668 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1669 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1670
1671 // We cannot optimize an unsupported compare opcode or
1672 // a mix of 32-bit and 64-bit comparisons
1673 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1674 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1675 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1676 continue;
1677
1678 unsigned NewOpCode = 0;
1679 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1680 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1681 bool SwapOperands = false;
1682
1683 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1684 // Typically, unsigned comparison is used for equality check, but
1685 // we replace it with a signed comparison if the comparison
1686 // to be merged is a signed comparison.
1687 // In other cases of opcode mismatch, we cannot optimize this.
1688
1689 // We cannot change opcode when comparing against an immediate
1690 // if the most significant bit of the immediate is one
1691 // due to the difference in sign extension.
1692 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1693 if (!I->getOperand(2).isImm())
1694 return false;
1695 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1696 return Imm < 0;
1697 };
1698
1699 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1700 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1701 NewOpCode = CMPI1->getOpcode();
1702 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1703 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1704 NewOpCode = CMPI2->getOpcode();
1705 else continue;
1706 }
1707
1708 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1709 // In case of comparisons between two registers, these two registers
1710 // must be same to merge two comparisons.
1711 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1712 nullptr, nullptr, MRI);
1713 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1714 nullptr, nullptr, MRI);
1715 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1716 MBB1, &MBB2, MRI);
1717 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1718 MBB1, &MBB2, MRI);
1719
1720 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1721 // Same pair of registers in the same order; ready to merge as is.
1722 }
1723 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1724 // Same pair of registers in different order.
1725 // We reverse the predicate to merge compare instructions.
1727 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1728 // In case of partial redundancy, we need to swap operands
1729 // in another compare instruction.
1730 SwapOperands = true;
1731 }
1732 else continue;
1733 }
1734 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1735 // In case of comparisons between a register and an immediate,
1736 // the operand register must be same for two compare instructions.
1737 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1738 nullptr, nullptr, MRI);
1739 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1740 MBB1, &MBB2, MRI);
1741 if (Cmp1Operand1 != Cmp2Operand1)
1742 continue;
1743
1744 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1745 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1746
1747 // If immediate are not same, we try to adjust by changing predicate;
1748 // e.g. GT imm means GE (imm+1).
1749 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1750 int Diff = Imm1 - Imm2;
1751 if (Diff < -2 || Diff > 2)
1752 continue;
1753
1754 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1755 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1756 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1757 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1758 if (Diff == 2) {
1759 if (PredToInc2 && PredToDec1) {
1760 NewPredicate2 = PredToInc2;
1761 NewPredicate1 = PredToDec1;
1762 NewImm2++;
1763 NewImm1--;
1764 }
1765 }
1766 else if (Diff == 1) {
1767 if (PredToInc2) {
1768 NewImm2++;
1769 NewPredicate2 = PredToInc2;
1770 }
1771 else if (PredToDec1) {
1772 NewImm1--;
1773 NewPredicate1 = PredToDec1;
1774 }
1775 }
1776 else if (Diff == -1) {
1777 if (PredToDec2) {
1778 NewImm2--;
1779 NewPredicate2 = PredToDec2;
1780 }
1781 else if (PredToInc1) {
1782 NewImm1++;
1783 NewPredicate1 = PredToInc1;
1784 }
1785 }
1786 else if (Diff == -2) {
1787 if (PredToDec2 && PredToInc1) {
1788 NewPredicate2 = PredToDec2;
1789 NewPredicate1 = PredToInc1;
1790 NewImm2--;
1791 NewImm1++;
1792 }
1793 }
1794 }
1795
1796 // We cannot merge two compares if the immediates are not same.
1797 if (NewImm2 != NewImm1)
1798 continue;
1799 }
1800
1801 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1802 LLVM_DEBUG(CMPI1->dump());
1803 LLVM_DEBUG(BI1->dump());
1804 LLVM_DEBUG(CMPI2->dump());
1805 LLVM_DEBUG(BI2->dump());
1806 for (const MachineOperand &MO : CMPI1->operands())
1807 if (MO.isReg())
1808 addRegToUpdate(MO.getReg());
1809 for (const MachineOperand &MO : CMPI2->operands())
1810 if (MO.isReg())
1811 addRegToUpdate(MO.getReg());
1812
1813 // We adjust opcode, predicates and immediate as we determined above.
1814 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1815 CMPI1->setDesc(TII->get(NewOpCode));
1816 }
1817 if (NewPredicate1) {
1818 BI1->getOperand(0).setImm(NewPredicate1);
1819 }
1820 if (NewPredicate2) {
1821 BI2->getOperand(0).setImm(NewPredicate2);
1822 }
1823 if (NewImm1 != Imm1) {
1824 CMPI1->getOperand(2).setImm(NewImm1);
1825 }
1826
1827 if (IsPartiallyRedundant) {
1828 // We touch up the compare instruction in MBB2 and move it to
1829 // a previous BB to handle partially redundant case.
1830 if (SwapOperands) {
1831 Register Op1 = CMPI2->getOperand(1).getReg();
1832 Register Op2 = CMPI2->getOperand(2).getReg();
1833 CMPI2->getOperand(1).setReg(Op2);
1834 CMPI2->getOperand(2).setReg(Op1);
1835 }
1836 if (NewImm2 != Imm2)
1837 CMPI2->getOperand(2).setImm(NewImm2);
1838
1839 for (int I = 1; I <= 2; I++) {
1840 if (CMPI2->getOperand(I).isReg()) {
1841 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1842 if (Inst->getParent() != &MBB2)
1843 continue;
1844
1845 assert(Inst->getOpcode() == PPC::PHI &&
1846 "We cannot support if an operand comes from this BB.");
1847 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1848 CMPI2->getOperand(I).setReg(SrcReg);
1849 addRegToUpdate(SrcReg);
1850 }
1851 }
1852 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1853 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1854
1855 DebugLoc DL = CMPI2->getDebugLoc();
1856 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1857 BuildMI(MBB2, MBB2.begin(), DL,
1858 TII->get(PPC::PHI), NewVReg)
1859 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1860 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1861 BI2->getOperand(1).setReg(NewVReg);
1862 addRegToUpdate(NewVReg);
1863 }
1864 else {
1865 // We finally eliminate compare instruction in MBB2.
1866 // We do not need to treat CMPI2 specially here in terms of re-computing
1867 // live variables even though it is being deleted because:
1868 // - It defines a register that has a single use (already checked in
1869 // eligibleForCompareElimination())
1870 // - The only user (BI2) is no longer using it so the register is dead (no
1871 // def, no uses)
1872 // - We do not attempt to recompute live variables for dead registers
1873 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1874 CMPI2->eraseFromParent();
1875 }
1876
1877 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1878 LLVM_DEBUG(CMPI1->dump());
1879 LLVM_DEBUG(BI1->dump());
1880 LLVM_DEBUG(BI2->dump());
1881 if (IsPartiallyRedundant) {
1882 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1883 << printMBBReference(*MBBtoMoveCmp)
1884 << " to handle partial redundancy.\n");
1885 LLVM_DEBUG(CMPI2->dump());
1886 }
1887 Simplified = true;
1888 }
1889
1890 return Simplified;
1891}
1892
1893// We miss the opportunity to emit an RLDIC when lowering jump tables
1894// since ISEL sees only a single basic block. When selecting, the clear
1895// and shift left will be in different blocks.
1896bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1897 MachineInstr *&ToErase) {
1898 if (MI.getOpcode() != PPC::RLDICR)
1899 return false;
1900
1901 Register SrcReg = MI.getOperand(1).getReg();
1902 if (!SrcReg.isVirtual())
1903 return false;
1904
1905 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1906 if (SrcMI->getOpcode() != PPC::RLDICL)
1907 return false;
1908
1909 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1910 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1911 MachineOperand MOpSHMI = MI.getOperand(2);
1912 MachineOperand MOpMEMI = MI.getOperand(3);
1913 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1914 MOpMEMI.isImm()))
1915 return false;
1916
1917 uint64_t SHSrc = MOpSHSrc.getImm();
1918 uint64_t MBSrc = MOpMBSrc.getImm();
1919 uint64_t SHMI = MOpSHMI.getImm();
1920 uint64_t MEMI = MOpMEMI.getImm();
1921 uint64_t NewSH = SHSrc + SHMI;
1922 uint64_t NewMB = MBSrc - SHMI;
1923 if (NewMB > 63 || NewSH > 63)
1924 return false;
1925
1926 // The bits cleared with RLDICL are [0, MBSrc).
1927 // The bits cleared with RLDICR are (MEMI, 63].
1928 // After the sequence, the bits cleared are:
1929 // [0, MBSrc-SHMI) and (MEMI, 63).
1930 //
1931 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1932 if ((63 - NewSH) != MEMI)
1933 return false;
1934
1935 LLVM_DEBUG(dbgs() << "Converting pair: ");
1936 LLVM_DEBUG(SrcMI->dump());
1937 LLVM_DEBUG(MI.dump());
1938
1939 MI.setDesc(TII->get(PPC::RLDIC));
1940 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1941 MI.getOperand(2).setImm(NewSH);
1942 MI.getOperand(3).setImm(NewMB);
1943 addRegToUpdate(MI.getOperand(1).getReg());
1944 addRegToUpdate(SrcMI->getOperand(0).getReg());
1945
1946 LLVM_DEBUG(dbgs() << "To: ");
1947 LLVM_DEBUG(MI.dump());
1948 NumRotatesCollapsed++;
1949 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1950 if (MRI->use_nodbg_empty(SrcReg)) {
1951 assert(!SrcMI->hasImplicitDef() &&
1952 "Not expecting an implicit def with this instr.");
1953 ToErase = SrcMI;
1954 }
1955 return true;
1956}
1957
1958// For case in LLVM IR
1959// entry:
1960// %iconv = sext i32 %index to i64
1961// br i1 undef label %true, label %false
1962// true:
1963// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1964// ...
1965// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1966// different BBs when conducting instruction selection. We can do a peephole
1967// optimization to combine these two instructions into extswsli after
1968// instruction selection.
1969bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1970 MachineInstr *&ToErase) {
1971 if (MI.getOpcode() != PPC::RLDICR)
1972 return false;
1973
1974 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1975 return false;
1976
1977 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1978
1979 MachineOperand MOpSHMI = MI.getOperand(2);
1980 MachineOperand MOpMEMI = MI.getOperand(3);
1981 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1982 return false;
1983
1984 uint64_t SHMI = MOpSHMI.getImm();
1985 uint64_t MEMI = MOpMEMI.getImm();
1986 if (SHMI + MEMI != 63)
1987 return false;
1988
1989 Register SrcReg = MI.getOperand(1).getReg();
1990 if (!SrcReg.isVirtual())
1991 return false;
1992
1993 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1994 if (SrcMI->getOpcode() != PPC::EXTSW &&
1995 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1996 return false;
1997
1998 // If the register defined by extsw has more than one use, combination is not
1999 // needed.
2000 if (!MRI->hasOneNonDBGUse(SrcReg))
2001 return false;
2002
2003 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2004 assert(SrcMI->getOperand(1).isReg() &&
2005 "EXTSW's second operand should be a register");
2006 if (!SrcMI->getOperand(1).getReg().isVirtual())
2007 return false;
2008
2009 LLVM_DEBUG(dbgs() << "Combining pair: ");
2010 LLVM_DEBUG(SrcMI->dump());
2011 LLVM_DEBUG(MI.dump());
2012
2013 MachineInstr *NewInstr =
2014 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2015 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2016 : TII->get(PPC::EXTSWSLI_32_64),
2017 MI.getOperand(0).getReg())
2018 .add(SrcMI->getOperand(1))
2019 .add(MOpSHMI);
2020 (void)NewInstr;
2021
2022 LLVM_DEBUG(dbgs() << "TO: ");
2023 LLVM_DEBUG(NewInstr->dump());
2024 ++NumEXTSWAndSLDICombined;
2025 ToErase = &MI;
2026 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2027 // can recompute its kill flags. We run DCE immediately after this pass
2028 // to clean up dead instructions such as this.
2029 addRegToUpdate(NewInstr->getOperand(1).getReg());
2030 addRegToUpdate(SrcMI->getOperand(0).getReg());
2031 return true;
2032}
2033
2034} // end default namespace
2035
2037 "PowerPC MI Peephole Optimization", false, false)
2043 "PowerPC MI Peephole Optimization", false, false)
2044
2045char PPCMIPeephole::ID = 0;
2047llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:190
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define addRegToUpdate(R)
PowerPC MI Peephole Optimization
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
#define DEBUG_TYPE
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:87
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:178
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
unsigned pred_size() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
iterator_range< mop_iterator > explicit_uses()
Definition: MachineInstr.h:740
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:643
bool isFullCopy() const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:685
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isAIXABI() const
Definition: PPCSubtarget.h:215
bool isUsingPCRelativeCalls() const
bool isELFv2ABI() const
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void dump() const
Definition: Pass.cpp:136
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializePPCMIPeepholePass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
@ Keep
No function return thunk.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860