LLVM 22.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
46#include "llvm/Support/Debug.h"
48
49using namespace llvm;
50
51#define DEBUG_TYPE "ppc-mi-peepholes"
52
53STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
54STATISTIC(MultiTOCSaves,
55 "Number of functions with multiple TOC saves that must be kept");
56STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
57STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
58STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
59STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
60STATISTIC(NumConvertedToImmediateForm,
61 "Number of instructions converted to their immediate form");
62STATISTIC(NumFunctionsEnteredInMIPeephole,
63 "Number of functions entered in PPC MI Peepholes");
64STATISTIC(NumFixedPointIterations,
65 "Number of fixed-point iterations converting reg-reg instructions "
66 "to reg-imm ones");
67STATISTIC(NumRotatesCollapsed,
68 "Number of pairs of rotate left, clear left/right collapsed");
69STATISTIC(NumEXTSWAndSLDICombined,
70 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
71STATISTIC(NumLoadImmZeroFoldedAndRemoved,
72 "Number of LI(8) reg, 0 that are folded to r0 and removed");
73
74static cl::opt<bool>
75FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
76 cl::desc("Iterate to a fixed point when attempting to "
77 "convert reg-reg instructions to reg-imm"));
78
79static cl::opt<bool>
80ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
81 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
82
83static cl::opt<bool>
84 EnableSExtElimination("ppc-eliminate-signext",
85 cl::desc("enable elimination of sign-extensions"),
86 cl::init(true), cl::Hidden);
87
88static cl::opt<bool>
89 EnableZExtElimination("ppc-eliminate-zeroext",
90 cl::desc("enable elimination of zero-extensions"),
91 cl::init(true), cl::Hidden);
92
93static cl::opt<bool>
94 EnableTrapOptimization("ppc-opt-conditional-trap",
95 cl::desc("enable optimization of conditional traps"),
96 cl::init(false), cl::Hidden);
97
99 PeepholeXToICounter, "ppc-xtoi-peephole",
100 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
101
102DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
103 "Controls whether PPC per opcode peephole is performed on a MI");
104
105namespace {
106
107struct PPCMIPeephole : public MachineFunctionPass {
108
109 static char ID;
110 const PPCInstrInfo *TII;
111 MachineFunction *MF;
113 LiveVariables *LV;
114
115 PPCMIPeephole() : MachineFunctionPass(ID) {}
116
117private:
118 MachineDominatorTree *MDT;
119 MachinePostDominatorTree *MPDT;
120 MachineBlockFrequencyInfo *MBFI;
121 BlockFrequency EntryFreq;
122 SmallSet<Register, 16> RegsToUpdate;
123
124 // Initialize class variables.
125 void initialize(MachineFunction &MFParm);
126
127 // Perform peepholes.
128 bool simplifyCode();
129
130 // Perform peepholes.
131 bool eliminateRedundantCompare();
132 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
133 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
134 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
135 MachineInstr *&ToErase);
136 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
137 MachineInstr *MI);
138
139 // A number of transformations will eliminate the definition of a register
140 // as all of its uses will be removed. However, this leaves a register
141 // without a definition for LiveVariables. Such transformations should
142 // use this function to provide a dummy definition of the register that
143 // will simply be removed by DCE.
144 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
145 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
146 }
147 void addRegToUpdateWithLine(Register Reg, int Line);
148 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
149 SmallVectorImpl<MachineInstr *> &PHIs,
150 Register Dst);
151
152public:
153
154 void getAnalysisUsage(AnalysisUsage &AU) const override {
155 AU.addRequired<LiveVariablesWrapperPass>();
156 AU.addRequired<MachineDominatorTreeWrapperPass>();
157 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
158 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
159 AU.addPreserved<LiveVariablesWrapperPass>();
160 AU.addPreserved<MachineDominatorTreeWrapperPass>();
161 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
162 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
164 }
165
166 // Main entry point for this pass.
167 bool runOnMachineFunction(MachineFunction &MF) override {
168 initialize(MF);
169 // At this point, TOC pointer should not be used in a function that uses
170 // PC-Relative addressing.
171 assert((MF.getRegInfo().use_empty(PPC::X2) ||
172 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
173 "TOC pointer used in a function using PC-Relative addressing!");
174 if (skipFunction(MF.getFunction()))
175 return false;
176 bool Changed = simplifyCode();
177#ifndef NDEBUG
178 if (Changed)
179 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
180 "-mllvm -disable-ppc-peephole");
181#endif
182 return Changed;
183 }
184};
185
186#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
187void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
188 if (!Reg.isVirtual())
189 return;
190 if (RegsToUpdate.insert(Reg).second)
191 LLVM_DEBUG(dbgs() << "Adding register: " << printReg(Reg) << " on line "
192 << Line << " for re-computation of kill flags\n");
193}
194
195// Initialize class variables.
196void PPCMIPeephole::initialize(MachineFunction &MFParm) {
197 MF = &MFParm;
198 MRI = &MF->getRegInfo();
199 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
200 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
201 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
202 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
203 EntryFreq = MBFI->getEntryFreq();
204 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
205 RegsToUpdate.clear();
206 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
207 LLVM_DEBUG(MF->dump());
208}
209
210static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
211 MachineRegisterInfo *MRI) {
212 assert(Op && "Invalid Operand!");
213 if (!Op->isReg())
214 return nullptr;
215
216 Register Reg = Op->getReg();
217 if (!Reg.isVirtual())
218 return nullptr;
219
220 return MRI->getVRegDef(Reg);
221}
222
223// This function returns number of known zero bits in output of MI
224// starting from the most significant bit.
225static unsigned getKnownLeadingZeroCount(const unsigned Reg,
226 const PPCInstrInfo *TII,
227 const MachineRegisterInfo *MRI) {
228 MachineInstr *MI = MRI->getVRegDef(Reg);
229 unsigned Opcode = MI->getOpcode();
230 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
231 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
232 return MI->getOperand(3).getImm();
233
234 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
235 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
236 return MI->getOperand(3).getImm();
237
238 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
239 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
240 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
241 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
242 return 32 + MI->getOperand(3).getImm();
243
244 if (Opcode == PPC::ANDI_rec) {
245 uint16_t Imm = MI->getOperand(2).getImm();
246 return 48 + llvm::countl_zero(Imm);
247 }
248
249 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
250 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
251 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
252 // The result ranges from 0 to 32.
253 return 58;
254
255 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
256 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
257 // The result ranges from 0 to 64.
258 return 57;
259
260 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
261 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
262 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
263 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
264 return 48;
265
266 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
267 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
268 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
269 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
270 return 56;
271
272 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
273 Opcode == PPC::AND8_rec)
274 return std::max(
275 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
276 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
277
278 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
279 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
280 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
281 Opcode == PPC::XOR8_rec)
282 return std::min(
283 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
284 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
285
286 if (TII->isZeroExtended(Reg, MRI))
287 return 32;
288
289 return 0;
290}
291
292// This function maintains a map for the pairs <TOC Save Instr, Keep>
293// Each time a new TOC save is encountered, it checks if any of the existing
294// ones are dominated by the new one. If so, it marks the existing one as
295// redundant by setting it's entry in the map as false. It then adds the new
296// instruction to the map with either true or false depending on if any
297// existing instructions dominated the new one.
298void PPCMIPeephole::UpdateTOCSaves(
299 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
300 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
301 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
302 // here only support it under ELFv2.
303 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
304 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
305
306 MachineBasicBlock *Entry = &MF->front();
307 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
308
309 // If the block in which the TOC save resides is in a block that
310 // post-dominates Entry, or a block that is hotter than entry (keep in mind
311 // that early MachineLICM has already run so the TOC save won't be hoisted)
312 // we can just do the save in the prologue.
313 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
314 FI->setMustSaveTOC(true);
315
316 // If we are saving the TOC in the prologue, all the TOC saves can be
317 // removed from the code.
318 if (FI->mustSaveTOC()) {
319 for (auto &TOCSave : TOCSaves)
320 TOCSave.second = false;
321 // Add new instruction to map.
322 TOCSaves[MI] = false;
323 return;
324 }
325 }
326
327 bool Keep = true;
328 for (auto &I : TOCSaves) {
329 MachineInstr *CurrInst = I.first;
330 // If new instruction dominates an existing one, mark existing one as
331 // redundant.
332 if (I.second && MDT->dominates(MI, CurrInst))
333 I.second = false;
334 // Check if the new instruction is redundant.
335 if (MDT->dominates(CurrInst, MI)) {
336 Keep = false;
337 break;
338 }
339 }
340 // Add new instruction to map.
341 TOCSaves[MI] = Keep;
342}
343
344// This function returns a list of all PHI nodes in the tree starting from
345// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
346// The list initially only contains the root PHI. When we visit a PHI node, we
347// add it to the list. We continue to look for other PHI node operands while
348// there are nodes to visit in the list. The function returns false if the
349// optimization cannot be applied on this tree.
350static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
351 MachineInstr *RootPHI,
352 SmallVectorImpl<MachineInstr *> &PHIs) {
353 PHIs.push_back(RootPHI);
354 unsigned VisitedIndex = 0;
355 while (VisitedIndex < PHIs.size()) {
356 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
357 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
358 PHIOp != NumOps; PHIOp += 2) {
359 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
360 if (!RegOp.isVirtual())
361 return false;
362 MachineInstr *Instr = MRI->getVRegDef(RegOp);
363 // While collecting the PHI nodes, we check if they can be converted (i.e.
364 // all the operands are either copies, implicit defs or PHI nodes).
365 unsigned Opcode = Instr->getOpcode();
366 if (Opcode == PPC::COPY) {
367 Register Reg = Instr->getOperand(1).getReg();
368 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
369 return false;
370 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
371 return false;
372 // If we detect a cycle in the PHI nodes, we exit. It would be
373 // possible to change cycles as well, but that would add a lot
374 // of complexity for a case that is unlikely to occur with MMA
375 // code.
376 if (Opcode != PPC::PHI)
377 continue;
378 if (llvm::is_contained(PHIs, Instr))
379 return false;
380 PHIs.push_back(Instr);
381 }
382 VisitedIndex++;
383 }
384 return true;
385}
386
387// This function changes the unprimed accumulator PHI nodes in the PHIs list to
388// primed accumulator PHI nodes. The list is traversed in reverse order to
389// change all the PHI operands of a PHI node before changing the node itself.
390// We keep a map to associate each changed PHI node to its non-changed form.
391void PPCMIPeephole::convertUnprimedAccPHIs(
392 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
393 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
394 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
395 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
397 // We check if the current PHI node can be changed by looking at its
398 // operands. If all the operands are either copies from primed
399 // accumulators, implicit definitions or other unprimed accumulator
400 // PHI nodes, we change it.
401 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
402 PHIOp += 2) {
403 Register RegOp = PHI->getOperand(PHIOp).getReg();
404 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
405 unsigned Opcode = PHIInput->getOpcode();
406 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
407 Opcode == PPC::PHI) &&
408 "Unexpected instruction");
409 if (Opcode == PPC::COPY) {
410 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
411 &PPC::ACCRCRegClass &&
412 "Unexpected register class");
413 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
414 } else if (Opcode == PPC::IMPLICIT_DEF) {
415 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
416 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
417 TII->get(PPC::IMPLICIT_DEF), AccReg);
418 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
419 PHI->getOperand(PHIOp + 1)});
420 } else if (Opcode == PPC::PHI) {
421 // We found a PHI operand. At this point we know this operand
422 // has already been changed so we get its associated changed form
423 // from the map.
424 assert(ChangedPHIMap.count(PHIInput) == 1 &&
425 "This PHI node should have already been changed.");
426 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
428 PrimedAccPHI->getOperand(0).getReg(), false),
429 PHI->getOperand(PHIOp + 1)});
430 }
431 }
432 Register AccReg = Dst;
433 // If the PHI node we are changing is the root node, the register it defines
434 // will be the destination register of the original copy (of the PHI def).
435 // For all other PHI's in the list, we need to create another primed
436 // accumulator virtual register as the PHI will no longer define the
437 // unprimed accumulator.
438 if (PHI != PHIs[0])
439 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
440 MachineInstrBuilder NewPHI = BuildMI(
441 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
442 for (auto RegMBB : PHIOps) {
443 NewPHI.add(RegMBB.first).add(RegMBB.second);
444 if (MRI->isSSA())
445 addRegToUpdate(RegMBB.first.getReg());
446 }
447 // The liveness of old PHI and new PHI have to be updated.
448 addRegToUpdate(PHI->getOperand(0).getReg());
449 addRegToUpdate(AccReg);
450 ChangedPHIMap[PHI] = NewPHI.getInstr();
451 LLVM_DEBUG(dbgs() << "Converting PHI: ");
452 LLVM_DEBUG(PHI->dump());
453 LLVM_DEBUG(dbgs() << "To: ");
454 LLVM_DEBUG(NewPHI.getInstr()->dump());
455 }
456}
457
458// Perform peephole optimizations.
459bool PPCMIPeephole::simplifyCode() {
460 bool Simplified = false;
461 bool TrapOpt = false;
462 MachineInstr* ToErase = nullptr;
463 std::map<MachineInstr *, bool> TOCSaves;
464 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
465 NumFunctionsEnteredInMIPeephole++;
466 if (ConvertRegReg) {
467 // Fixed-point conversion of reg/reg instructions fed by load-immediate
468 // into reg/imm instructions. FIXME: This is expensive, control it with
469 // an option.
470 bool SomethingChanged = false;
471 do {
472 NumFixedPointIterations++;
473 SomethingChanged = false;
474 for (MachineBasicBlock &MBB : *MF) {
475 for (MachineInstr &MI : MBB) {
476 if (MI.isDebugInstr())
477 continue;
478
479 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
480 continue;
481
482 SmallSet<Register, 4> RRToRIRegsToUpdate;
483 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
484 continue;
485 for (Register R : RRToRIRegsToUpdate)
487 // The updated instruction may now have new register operands.
488 // Conservatively add them to recompute the flags as well.
489 for (const MachineOperand &MO : MI.operands())
490 if (MO.isReg())
491 addRegToUpdate(MO.getReg());
492 // We don't erase anything in case the def has other uses. Let DCE
493 // remove it if it can be removed.
494 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
495 LLVM_DEBUG(MI.dump());
496 NumConvertedToImmediateForm++;
497 SomethingChanged = true;
498 Simplified = true;
499 }
500 }
501 } while (SomethingChanged && FixedPointRegToImm);
502 }
503
504 // Since we are deleting this instruction, we need to run LiveVariables
505 // on any of its definitions that are marked as needing an update since
506 // we can't run LiveVariables on a deleted register. This only needs
507 // to be done for defs since uses will have their own defining
508 // instructions so we won't be running LiveVariables on a deleted reg.
509 auto recomputeLVForDyingInstr = [&]() {
510 if (RegsToUpdate.empty())
511 return;
512 for (MachineOperand &MO : ToErase->operands()) {
513 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
514 continue;
515 Register RegToUpdate = MO.getReg();
516 RegsToUpdate.erase(RegToUpdate);
517 // If some transformation has introduced an additional definition of
518 // this register (breaking SSA), we can safely convert this def to
519 // a def of an invalid register as the instruction is going away.
520 if (!MRI->getUniqueVRegDef(RegToUpdate))
521 MO.setReg(PPC::NoRegister);
522 LV->recomputeForSingleDefVirtReg(RegToUpdate);
523 }
524 };
525
526 for (MachineBasicBlock &MBB : *MF) {
527 for (MachineInstr &MI : MBB) {
528
529 // If the previous instruction was marked for elimination,
530 // remove it now.
531 if (ToErase) {
532 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
533 LLVM_DEBUG(ToErase->dump());
534 recomputeLVForDyingInstr();
535 ToErase->eraseFromParent();
536 ToErase = nullptr;
537 }
538 // If a conditional trap instruction got optimized to an
539 // unconditional trap, eliminate all the instructions after
540 // the trap.
541 if (EnableTrapOptimization && TrapOpt) {
542 ToErase = &MI;
543 continue;
544 }
545
546 // Ignore debug instructions.
547 if (MI.isDebugInstr())
548 continue;
549
550 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
551 continue;
552
553 // Per-opcode peepholes.
554 switch (MI.getOpcode()) {
555
556 default:
557 break;
558 case PPC::COPY: {
559 Register Src = MI.getOperand(1).getReg();
560 Register Dst = MI.getOperand(0).getReg();
561 if (!Src.isVirtual() || !Dst.isVirtual())
562 break;
563 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
564 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
565 break;
566
567 // We are copying an unprimed accumulator to a primed accumulator.
568 // If the input to the copy is a PHI that is fed only by (i) copies in
569 // the other direction (ii) implicitly defined unprimed accumulators or
570 // (iii) other PHI nodes satisfying (i) and (ii), we can change
571 // the PHI to a PHI on primed accumulators (as long as we also change
572 // its operands). To detect and change such copies, we first get a list
573 // of all the PHI nodes starting from the root PHI node in BFS order.
574 // We then visit all these PHI nodes to check if they can be changed to
575 // primed accumulator PHI nodes and if so, we change them.
576 MachineInstr *RootPHI = MRI->getVRegDef(Src);
577 if (RootPHI->getOpcode() != PPC::PHI)
578 break;
579
580 SmallVector<MachineInstr *, 4> PHIs;
581 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
582 break;
583
584 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
585
586 ToErase = &MI;
587 break;
588 }
589 case PPC::LI:
590 case PPC::LI8: {
591 // If we are materializing a zero, look for any use operands for which
592 // zero means immediate zero. All such operands can be replaced with
593 // PPC::ZERO.
594 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
595 break;
596 Register MIDestReg = MI.getOperand(0).getReg();
597 bool Folded = false;
598 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
599 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
600 if (MRI->use_nodbg_empty(MIDestReg)) {
601 ++NumLoadImmZeroFoldedAndRemoved;
602 ToErase = &MI;
603 }
604 if (Folded)
605 addRegToUpdate(MIDestReg);
606 Simplified |= Folded;
607 break;
608 }
609 case PPC::STW:
610 case PPC::STD: {
611 MachineFrameInfo &MFI = MF->getFrameInfo();
612 if (MFI.hasVarSizedObjects() ||
613 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
614 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
615 break;
616 // When encountering a TOC save instruction, call UpdateTOCSaves
617 // to add it to the TOCSaves map and mark any existing TOC saves
618 // it dominates as redundant.
619 if (TII->isTOCSaveMI(MI))
620 UpdateTOCSaves(TOCSaves, &MI);
621 break;
622 }
623 case PPC::XXPERMDI: {
624 // Perform simplifications of 2x64 vector swaps and splats.
625 // A swap is identified by an immediate value of 2, and a splat
626 // is identified by an immediate value of 0 or 3.
627 int Immed = MI.getOperand(3).getImm();
628
629 if (Immed == 1)
630 break;
631
632 // For each of these simplifications, we need the two source
633 // regs to match. Unfortunately, MachineCSE ignores COPY and
634 // SUBREG_TO_REG, so for example we can see
635 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
636 // We have to look through chains of COPY and SUBREG_TO_REG
637 // to find the real source values for comparison.
638 Register TrueReg1 =
639 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
640 Register TrueReg2 =
641 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
642
643 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
644 break;
645
646 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
647
648 if (!DefMI)
649 break;
650
651 unsigned DefOpc = DefMI->getOpcode();
652
653 // If this is a splat fed by a splatting load, the splat is
654 // redundant. Replace with a copy. This doesn't happen directly due
655 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
656 // a load of a double to a vector of 64-bit integers.
657 auto isConversionOfLoadAndSplat = [=]() -> bool {
658 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
659 return false;
660 Register FeedReg1 =
661 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
662 if (FeedReg1.isVirtual()) {
663 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
664 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
665 return true;
666 }
667 return false;
668 };
669 if ((Immed == 0 || Immed == 3) &&
670 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
671 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
672 "to load-and-splat/copy: ");
673 LLVM_DEBUG(MI.dump());
674 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
675 MI.getOperand(0).getReg())
676 .add(MI.getOperand(1));
677 addRegToUpdate(MI.getOperand(1).getReg());
678 ToErase = &MI;
679 Simplified = true;
680 }
681
682 // If this is a splat or a swap fed by another splat, we
683 // can replace it with a copy.
684 if (DefOpc == PPC::XXPERMDI) {
685 Register DefReg1 = DefMI->getOperand(1).getReg();
686 Register DefReg2 = DefMI->getOperand(2).getReg();
687 unsigned DefImmed = DefMI->getOperand(3).getImm();
688
689 // If the two inputs are not the same register, check to see if
690 // they originate from the same virtual register after only
691 // copy-like instructions.
692 if (DefReg1 != DefReg2) {
693 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
694 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
695
696 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
697 break;
698 }
699
700 if (DefImmed == 0 || DefImmed == 3) {
701 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
702 "to splat/copy: ");
703 LLVM_DEBUG(MI.dump());
704 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
705 MI.getOperand(0).getReg())
706 .add(MI.getOperand(1));
707 addRegToUpdate(MI.getOperand(1).getReg());
708 ToErase = &MI;
709 Simplified = true;
710 }
711
712 // If this is a splat fed by a swap, we can simplify modify
713 // the splat to splat the other value from the swap's input
714 // parameter.
715 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
716 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
717 LLVM_DEBUG(MI.dump());
718 addRegToUpdate(MI.getOperand(1).getReg());
719 addRegToUpdate(MI.getOperand(2).getReg());
720 MI.getOperand(1).setReg(DefReg1);
721 MI.getOperand(2).setReg(DefReg2);
722 MI.getOperand(3).setImm(3 - Immed);
723 addRegToUpdate(DefReg1);
724 addRegToUpdate(DefReg2);
725 Simplified = true;
726 }
727
728 // If this is a swap fed by a swap, we can replace it
729 // with a copy from the first swap's input.
730 else if (Immed == 2 && DefImmed == 2) {
731 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
732 LLVM_DEBUG(MI.dump());
733 addRegToUpdate(MI.getOperand(1).getReg());
734 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
735 MI.getOperand(0).getReg())
736 .add(DefMI->getOperand(1));
739 ToErase = &MI;
740 Simplified = true;
741 }
742 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
743 DefOpc == PPC::XXPERMDIs &&
744 (DefMI->getOperand(2).getImm() == 0 ||
745 DefMI->getOperand(2).getImm() == 3)) {
746 ToErase = &MI;
747 Simplified = true;
748 // Swap of a splat, convert to copy.
749 if (Immed == 2) {
750 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
751 LLVM_DEBUG(MI.dump());
752 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
753 MI.getOperand(0).getReg())
754 .add(MI.getOperand(1));
755 addRegToUpdate(MI.getOperand(1).getReg());
756 break;
757 }
758 // Splat fed by another splat - switch the output of the first
759 // and remove the second.
760 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
761 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
762 LLVM_DEBUG(MI.dump());
763 } else if (Immed == 2 &&
764 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
765 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
766 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
767 DefOpc == PPC::VSPLTISW)) {
768 // Swap of various vector splats, convert to copy.
769 ToErase = &MI;
770 Simplified = true;
771 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
772 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
773 LLVM_DEBUG(MI.dump());
774 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
775 MI.getOperand(0).getReg())
776 .add(MI.getOperand(1));
777 addRegToUpdate(MI.getOperand(1).getReg());
778 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
779 TII->isLoadFromConstantPool(DefMI)) {
780 const Constant *C = TII->getConstantFromConstantPool(DefMI);
781 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
782 ToErase = &MI;
783 Simplified = true;
785 << "Optimizing swap(splat pattern from constant-pool) "
786 "=> copy(splat pattern from constant-pool): ");
787 LLVM_DEBUG(MI.dump());
788 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
789 MI.getOperand(0).getReg())
790 .add(MI.getOperand(1));
791 addRegToUpdate(MI.getOperand(1).getReg());
792 }
793 }
794 break;
795 }
796 case PPC::VSPLTB:
797 case PPC::VSPLTH:
798 case PPC::XXSPLTW: {
799 unsigned MyOpcode = MI.getOpcode();
800 // The operand number of the source register in the splat instruction.
801 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
802 Register TrueReg =
803 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
804 if (!TrueReg.isVirtual())
805 break;
806 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
807 if (!DefMI)
808 break;
809 unsigned DefOpcode = DefMI->getOpcode();
810 auto isConvertOfSplat = [=]() -> bool {
811 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
812 return false;
813 Register ConvReg = DefMI->getOperand(1).getReg();
814 if (!ConvReg.isVirtual())
815 return false;
816 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
817 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
818 Splt->getOpcode() == PPC::XXSPLTW);
819 };
820 bool AlreadySplat = (MyOpcode == DefOpcode) ||
821 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
822 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
823 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
824 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
825 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
826 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
827
828 // If the instruction[s] that feed this splat have already splat
829 // the value, this splat is redundant.
830 if (AlreadySplat) {
831 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
832 LLVM_DEBUG(MI.dump());
833 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
834 MI.getOperand(0).getReg())
835 .add(MI.getOperand(OpNo));
836 addRegToUpdate(MI.getOperand(OpNo).getReg());
837 ToErase = &MI;
838 Simplified = true;
839 }
840
841 // Splat fed by a shift. Usually when we align value to splat into
842 // vector element zero.
843 if (DefOpcode == PPC::XXSLDWI) {
844 Register ShiftOp1 = DefMI->getOperand(1).getReg();
845
846 if (ShiftOp1 == DefMI->getOperand(2).getReg()) {
847 // For example, We can erase XXSLDWI from in following:
848 // %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
849 // %6:vrrc = VSPLTB 15, killed %2:vrrc
850 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
851 //
852 // --->
853 //
854 // %6:vrrc = VSPLTB 3, killed %1:vrrc
855 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
856
857 if (MRI->hasOneNonDBGUse(DefMI->getOperand(0).getReg())) {
858 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
860 ToErase = DefMI;
861 }
862 Simplified = true;
863 unsigned ShiftImm = DefMI->getOperand(3).getImm();
864 // The operand number of the splat Imm in the instruction.
865 unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
866 unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
867
868 // Calculate the new splat-element immediate. We need to convert the
869 // element index into the proper unit (byte for VSPLTB, halfword for
870 // VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
871 // ShiftImm in 32-bit word units.
872 auto CalculateNewElementIdx = [&](unsigned Opcode) {
873 if (Opcode == PPC::VSPLTB)
874 return (SplatImm + ShiftImm * 4) & 0xF;
875 else if (Opcode == PPC::VSPLTH)
876 return (SplatImm + ShiftImm * 2) & 0x7;
877 else
878 return (SplatImm + ShiftImm) & 0x3;
879 };
880
881 unsigned NewElem = CalculateNewElementIdx(MyOpcode);
882
883 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
884 << " to " << NewElem << " in instruction: ");
885 LLVM_DEBUG(MI.dump());
886 addRegToUpdate(MI.getOperand(OpNo).getReg());
887 addRegToUpdate(ShiftOp1);
888 MI.getOperand(OpNo).setReg(ShiftOp1);
889 MI.getOperand(SplatImmNo).setImm(NewElem);
890 }
891 }
892 break;
893 }
894 case PPC::XVCVDPSP: {
895 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
896 Register TrueReg =
897 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
898 if (!TrueReg.isVirtual())
899 break;
900 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
901
902 // This can occur when building a vector of single precision or integer
903 // values.
904 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
905 Register DefsReg1 =
906 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
907 Register DefsReg2 =
908 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
909 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
910 break;
911 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
912 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
913
914 if (!P1 || !P2)
915 break;
916
917 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
918 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
919 // the FRSP/XSRSP.
920 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
921 unsigned Opc = RoundInstr->getOpcode();
922 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
923 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
924 Simplified = true;
925 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
926 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
927 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
928 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
929 if (Use.getOperand(i).isReg() &&
930 Use.getOperand(i).getReg() == FRSPDefines)
931 Use.getOperand(i).setReg(ConvReg1);
932 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
933 LLVM_DEBUG(RoundInstr->dump());
934 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
935 LLVM_DEBUG(MI.dump());
936 LLVM_DEBUG(dbgs() << "Through instruction:\n");
938 addRegToUpdate(ConvReg1);
939 addRegToUpdate(FRSPDefines);
940 ToErase = RoundInstr;
941 }
942 };
943
944 // If the input to XVCVDPSP is a vector that was built (even
945 // partially) out of FRSP's, the FRSP(s) can safely be removed
946 // since this instruction performs the same operation.
947 if (P1 != P2) {
948 removeFRSPIfPossible(P1);
949 removeFRSPIfPossible(P2);
950 break;
951 }
952 removeFRSPIfPossible(P1);
953 }
954 break;
955 }
956 case PPC::EXTSH:
957 case PPC::EXTSH8:
958 case PPC::EXTSH8_32_64: {
959 if (!EnableSExtElimination) break;
960 Register NarrowReg = MI.getOperand(1).getReg();
961 if (!NarrowReg.isVirtual())
962 break;
963
964 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
965 unsigned SrcOpcode = SrcMI->getOpcode();
966 // If we've used a zero-extending load that we will sign-extend,
967 // just do a sign-extending load.
968 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
969 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
970 break;
971 // Determine the new opcode. We need to make sure that if the original
972 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
973 // Likewise if the source is X-Form the new opcode should also be
974 // X-Form.
975 unsigned Opc = PPC::LHA;
976 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
977 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
978 MI.getOpcode() == PPC::EXTSH8_32_64;
979
980 if (SourceIsXForm && MIIs64Bit)
981 Opc = PPC::LHAX8;
982 else if (SourceIsXForm && !MIIs64Bit)
983 Opc = PPC::LHAX;
984 else if (MIIs64Bit)
985 Opc = PPC::LHA8;
986
987 addRegToUpdate(NarrowReg);
988 addRegToUpdate(MI.getOperand(0).getReg());
989
990 // We are removing a definition of NarrowReg which will cause
991 // problems in AliveBlocks. Add an implicit def that will be
992 // removed so that AliveBlocks are updated correctly.
993 addDummyDef(MBB, &MI, NarrowReg);
994 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
995 LLVM_DEBUG(SrcMI->dump());
996 LLVM_DEBUG(dbgs() << "and sign-extension\n");
997 LLVM_DEBUG(MI.dump());
998 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
999 SrcMI->setDesc(TII->get(Opc));
1000 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1001 ToErase = &MI;
1002 Simplified = true;
1003 NumEliminatedSExt++;
1004 }
1005 break;
1006 }
1007 case PPC::EXTSW:
1008 case PPC::EXTSW_32:
1009 case PPC::EXTSW_32_64: {
1010 if (!EnableSExtElimination) break;
1011 Register NarrowReg = MI.getOperand(1).getReg();
1012 if (!NarrowReg.isVirtual())
1013 break;
1014
1015 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
1016 unsigned SrcOpcode = SrcMI->getOpcode();
1017 // If we've used a zero-extending load that we will sign-extend,
1018 // just do a sign-extending load.
1019 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
1020 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
1021 break;
1022
1023 // The transformation from a zero-extending load to a sign-extending
1024 // load is only legal when the displacement is a multiple of 4.
1025 // If the displacement is not at least 4 byte aligned, don't perform
1026 // the transformation.
1027 bool IsWordAligned = false;
1028 if (SrcMI->getOperand(1).isGlobal()) {
1029 const GlobalVariable *GV =
1031 if (GV && GV->getAlign() && *GV->getAlign() >= 4 &&
1032 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1033 IsWordAligned = true;
1034 } else if (SrcMI->getOperand(1).isImm()) {
1035 int64_t Value = SrcMI->getOperand(1).getImm();
1036 if (Value % 4 == 0)
1037 IsWordAligned = true;
1038 }
1039
1040 // Determine the new opcode. We need to make sure that if the original
1041 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1042 // Likewise if the source is X-Form the new opcode should also be
1043 // X-Form.
1044 unsigned Opc = PPC::LWA_32;
1045 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1046 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1047 MI.getOpcode() == PPC::EXTSW_32_64;
1048
1049 if (SourceIsXForm && MIIs64Bit)
1050 Opc = PPC::LWAX;
1051 else if (SourceIsXForm && !MIIs64Bit)
1052 Opc = PPC::LWAX_32;
1053 else if (MIIs64Bit)
1054 Opc = PPC::LWA;
1055
1056 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1057 break;
1058
1059 addRegToUpdate(NarrowReg);
1060 addRegToUpdate(MI.getOperand(0).getReg());
1061
1062 // We are removing a definition of NarrowReg which will cause
1063 // problems in AliveBlocks. Add an implicit def that will be
1064 // removed so that AliveBlocks are updated correctly.
1065 addDummyDef(MBB, &MI, NarrowReg);
1066 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1067 LLVM_DEBUG(SrcMI->dump());
1068 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1069 LLVM_DEBUG(MI.dump());
1070 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1071 SrcMI->setDesc(TII->get(Opc));
1072 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1073 ToErase = &MI;
1074 Simplified = true;
1075 NumEliminatedSExt++;
1076 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1077 TII->isSignExtended(NarrowReg, MRI)) {
1078 // We can eliminate EXTSW if the input is known to be already
1079 // sign-extended. However, we are not sure whether a spill will occur
1080 // during register allocation. If there is no promotion, it will use
1081 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1082 // since the register class is 32-bits. Consequently, the high 32-bit
1083 // information will be lost. Therefore, all these instructions in the
1084 // chain used to deduce sign extension to eliminate the 'extsw' will
1085 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1086 // is eliminated.
1087 TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1088
1089 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1090 Register TmpReg =
1091 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1092 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1093 TmpReg);
1094 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1095 MI.getOperand(0).getReg())
1096 .addReg(TmpReg)
1097 .addReg(NarrowReg)
1098 .addImm(PPC::sub_32);
1099 ToErase = &MI;
1100 Simplified = true;
1101 NumEliminatedSExt++;
1102 }
1103 break;
1104 }
1105 case PPC::RLDICL: {
1106 // We can eliminate RLDICL (e.g. for zero-extension)
1107 // if all bits to clear are already zero in the input.
1108 // This code assume following code sequence for zero-extension.
1109 // %6 = COPY %5:sub_32; (optional)
1110 // %8 = IMPLICIT_DEF;
1111 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1112 if (!EnableZExtElimination) break;
1113
1114 if (MI.getOperand(2).getImm() != 0)
1115 break;
1116
1117 Register SrcReg = MI.getOperand(1).getReg();
1118 if (!SrcReg.isVirtual())
1119 break;
1120
1121 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1122 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1123 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1124 break;
1125
1126 MachineInstr *ImpDefMI, *SubRegMI;
1127 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1128 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1129 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1130
1131 SrcMI = SubRegMI;
1132 if (SubRegMI->getOpcode() == PPC::COPY) {
1133 Register CopyReg = SubRegMI->getOperand(1).getReg();
1134 if (CopyReg.isVirtual())
1135 SrcMI = MRI->getVRegDef(CopyReg);
1136 }
1137 if (!SrcMI->getOperand(0).isReg())
1138 break;
1139
1140 unsigned KnownZeroCount =
1141 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1142 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1143 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1144 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1145 MI.getOperand(0).getReg())
1146 .addReg(SrcReg);
1147 addRegToUpdate(SrcReg);
1148 ToErase = &MI;
1149 Simplified = true;
1150 NumEliminatedZExt++;
1151 }
1152 break;
1153 }
1154
1155 // TODO: Any instruction that has an immediate form fed only by a PHI
1156 // whose operands are all load immediate can be folded away. We currently
1157 // do this for ADD instructions, but should expand it to arithmetic and
1158 // binary instructions with immediate forms in the future.
1159 case PPC::ADD4:
1160 case PPC::ADD8: {
1161 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1162 assert(PhiOp && "Invalid Operand!");
1163 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1164
1165 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1166 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1167 };
1168
1169 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1170 MachineOperand *PhiOp) {
1171 assert(PhiOp && "Invalid Operand!");
1172 assert(DominatorOp && "Invalid Operand!");
1173 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1174 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1175
1176 // Note: the vregs only show up at odd indices position of PHI Node,
1177 // the even indices position save the BB info.
1178 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1179 MachineInstr *LiMI =
1180 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1181 if (!LiMI ||
1182 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1183 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1184 !MDT->dominates(DefDomMI, LiMI))
1185 return false;
1186 }
1187
1188 return true;
1189 };
1190
1191 MachineOperand Op1 = MI.getOperand(1);
1192 MachineOperand Op2 = MI.getOperand(2);
1193 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1194 std::swap(Op1, Op2);
1195 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1196 break; // We don't have an ADD fed by LI's that can be transformed
1197
1198 // Now we know that Op1 is the PHI node and Op2 is the dominator
1199 Register DominatorReg = Op2.getReg();
1200
1201 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1202 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1203 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1204 MRI->setRegClass(DominatorReg, TRC);
1205
1206 // replace LIs with ADDIs
1207 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1208 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1209 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1210 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1211 LLVM_DEBUG(LiMI->dump());
1212
1213 // There could be repeated registers in the PHI, e.g: %1 =
1214 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1215 // already replaced the def instruction, skip.
1216 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1217 continue;
1218
1219 assert((LiMI->getOpcode() == PPC::LI ||
1220 LiMI->getOpcode() == PPC::LI8) &&
1221 "Invalid Opcode!");
1222 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1223 LiMI->removeOperand(1); // remove the imm of LI
1224 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1225 : PPC::ADDI8));
1226 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1227 .addReg(DominatorReg)
1228 .addImm(LiImm); // restore the imm of LI
1229 LLVM_DEBUG(LiMI->dump());
1230 }
1231
1232 // Replace ADD with COPY
1233 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1234 LLVM_DEBUG(MI.dump());
1235 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1236 MI.getOperand(0).getReg())
1237 .add(Op1);
1238 addRegToUpdate(Op1.getReg());
1239 addRegToUpdate(Op2.getReg());
1240 ToErase = &MI;
1241 Simplified = true;
1242 NumOptADDLIs++;
1243 break;
1244 }
1245 case PPC::RLDICR: {
1246 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1247 combineSEXTAndSHL(MI, ToErase);
1248 break;
1249 }
1250 case PPC::ANDI_rec:
1251 case PPC::ANDI8_rec:
1252 case PPC::ANDIS_rec:
1253 case PPC::ANDIS8_rec: {
1254 Register TrueReg =
1255 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1256 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1257 break;
1258
1259 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1260 if (!SrcMI)
1261 break;
1262
1263 unsigned SrcOpCode = SrcMI->getOpcode();
1264 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1265 break;
1266
1267 Register SrcReg, DstReg;
1268 SrcReg = SrcMI->getOperand(1).getReg();
1269 DstReg = MI.getOperand(1).getReg();
1270 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1271 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1272 if (DstRC != SrcRC)
1273 break;
1274
1275 uint64_t AndImm = MI.getOperand(2).getImm();
1276 if (MI.getOpcode() == PPC::ANDIS_rec ||
1277 MI.getOpcode() == PPC::ANDIS8_rec)
1278 AndImm <<= 16;
1279 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1280 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1281 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1282
1283 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1284 // if all bits to AND are already zero in the input.
1285 bool PatternResultZero =
1286 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1287 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1288
1289 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1290 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1291 bool PatternRemoveRotate =
1292 SrcMI->getOperand(2).getImm() == 0 &&
1293 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1294 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1295
1296 if (!PatternResultZero && !PatternRemoveRotate)
1297 break;
1298
1299 LLVM_DEBUG(dbgs() << "Combining pair: ");
1300 LLVM_DEBUG(SrcMI->dump());
1301 LLVM_DEBUG(MI.dump());
1302 if (PatternResultZero)
1303 MI.getOperand(2).setImm(0);
1304 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1305 LLVM_DEBUG(dbgs() << "To: ");
1306 LLVM_DEBUG(MI.dump());
1307 addRegToUpdate(MI.getOperand(1).getReg());
1308 addRegToUpdate(SrcMI->getOperand(0).getReg());
1309 Simplified = true;
1310 break;
1311 }
1312 case PPC::RLWINM:
1313 case PPC::RLWINM_rec:
1314 case PPC::RLWINM8:
1315 case PPC::RLWINM8_rec: {
1316 // We might replace operand 1 of the instruction which will
1317 // require we recompute kill flags for it.
1318 Register OrigOp1Reg = MI.getOperand(1).isReg()
1319 ? MI.getOperand(1).getReg()
1320 : PPC::NoRegister;
1321 Simplified = TII->combineRLWINM(MI, &ToErase);
1322 if (Simplified) {
1323 addRegToUpdate(OrigOp1Reg);
1324 if (MI.getOperand(1).isReg())
1325 addRegToUpdate(MI.getOperand(1).getReg());
1326 if (ToErase && ToErase->getOperand(1).isReg())
1327 for (auto UseReg : ToErase->explicit_uses())
1328 if (UseReg.isReg())
1329 addRegToUpdate(UseReg.getReg());
1330 ++NumRotatesCollapsed;
1331 }
1332 break;
1333 }
1334 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1335 // always trap, we will delete the node if it will never trap.
1336 case PPC::TDI:
1337 case PPC::TWI:
1338 case PPC::TD:
1339 case PPC::TW: {
1340 if (!EnableTrapOptimization) break;
1341 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1342 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1343 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1344 // We can only do the optimization if we can get immediates
1345 // from both operands
1346 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1347 LiMI1->getOpcode() == PPC::LI8)))
1348 break;
1349 if (!IsOperand2Immediate &&
1350 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1351 LiMI2->getOpcode() == PPC::LI8)))
1352 break;
1353
1354 auto ImmOperand0 = MI.getOperand(0).getImm();
1355 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1356 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1357 : LiMI2->getOperand(1).getImm();
1358
1359 // We will replace the MI with an unconditional trap if it will always
1360 // trap.
1361 if ((ImmOperand0 == 31) ||
1362 ((ImmOperand0 & 0x10) &&
1363 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1364 ((ImmOperand0 & 0x8) &&
1365 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1366 ((ImmOperand0 & 0x2) &&
1367 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1368 ((ImmOperand0 & 0x1) &&
1369 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1370 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1371 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1372 TrapOpt = true;
1373 }
1374 // We will delete the MI if it will never trap.
1375 ToErase = &MI;
1376 Simplified = true;
1377 break;
1378 }
1379 }
1380 }
1381
1382 // If the last instruction was marked for elimination,
1383 // remove it now.
1384 if (ToErase) {
1385 recomputeLVForDyingInstr();
1386 ToErase->eraseFromParent();
1387 ToErase = nullptr;
1388 }
1389 // Reset TrapOpt to false at the end of the basic block.
1391 TrapOpt = false;
1392 }
1393
1394 // Eliminate all the TOC save instructions which are redundant.
1395 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1396 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1397 if (FI->mustSaveTOC())
1398 NumTOCSavesInPrologue++;
1399
1400 // We try to eliminate redundant compare instruction.
1401 Simplified |= eliminateRedundantCompare();
1402
1403 // If we have made any modifications and added any registers to the set of
1404 // registers for which we need to update the kill flags, do so by recomputing
1405 // LiveVariables for those registers.
1406 for (Register Reg : RegsToUpdate) {
1407 if (!MRI->reg_empty(Reg))
1409 }
1410 return Simplified;
1411}
1412
1413// helper functions for eliminateRedundantCompare
1414static bool isEqOrNe(MachineInstr *BI) {
1416 unsigned PredCond = PPC::getPredicateCondition(Pred);
1417 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1418}
1419
1420static bool isSupportedCmpOp(unsigned opCode) {
1421 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1422 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1423 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1424 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1425}
1426
1427static bool is64bitCmpOp(unsigned opCode) {
1428 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1429 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1430}
1431
1432static bool isSignedCmpOp(unsigned opCode) {
1433 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1434 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1435}
1436
1437static unsigned getSignedCmpOpCode(unsigned opCode) {
1438 if (opCode == PPC::CMPLD) return PPC::CMPD;
1439 if (opCode == PPC::CMPLW) return PPC::CMPW;
1440 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1441 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1442 return opCode;
1443}
1444
1445// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1446// (LT x) to (LE x-1)
1447static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1448 uint64_t Imm = CMPI->getOperand(2).getImm();
1449 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1450 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1451 return 0;
1452
1454 unsigned PredCond = PPC::getPredicateCondition(Pred);
1455 unsigned PredHint = PPC::getPredicateHint(Pred);
1456 if (PredCond == PPC::PRED_GE)
1457 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1458 if (PredCond == PPC::PRED_LT)
1459 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1460
1461 return 0;
1462}
1463
1464// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1465// (LE x) to (LT x+1)
1466static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1467 uint64_t Imm = CMPI->getOperand(2).getImm();
1468 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1469 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1470 return 0;
1471
1473 unsigned PredCond = PPC::getPredicateCondition(Pred);
1474 unsigned PredHint = PPC::getPredicateHint(Pred);
1475 if (PredCond == PPC::PRED_GT)
1476 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1477 if (PredCond == PPC::PRED_LE)
1478 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1479
1480 return 0;
1481}
1482
1483// This takes a Phi node and returns a register value for the specified BB.
1484static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1485 MachineBasicBlock *MBB) {
1486 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1487 MachineOperand &MO = Phi->getOperand(I);
1488 if (MO.getMBB() == MBB)
1489 return Phi->getOperand(I-1).getReg();
1490 }
1491 llvm_unreachable("invalid src basic block for this Phi node\n");
1492 return 0;
1493}
1494
1495// This function tracks the source of the register through register copy.
1496// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1497// assuming that the control comes from BB1 into BB2.
1498static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1499 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1500 unsigned SrcReg = Reg;
1501 while (true) {
1502 unsigned NextReg = SrcReg;
1503 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1504 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1505 NextReg = getIncomingRegForBlock(Inst, BB1);
1506 // We track through PHI only once to avoid infinite loop.
1507 BB1 = nullptr;
1508 }
1509 else if (Inst->isFullCopy())
1510 NextReg = Inst->getOperand(1).getReg();
1511 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1512 break;
1513 SrcReg = NextReg;
1514 }
1515 return SrcReg;
1516}
1517
1518static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1519 MachineBasicBlock *&PredMBB,
1520 MachineBasicBlock *&MBBtoMoveCmp,
1521 MachineRegisterInfo *MRI) {
1522
1523 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1524 auto BII = BB.getFirstInstrTerminator();
1525 // We optimize BBs ending with a conditional branch.
1526 // We check only for BCC here, not BCCLR, because BCCLR
1527 // will be formed only later in the pipeline.
1528 if (BB.succ_size() == 2 &&
1529 BII != BB.instr_end() &&
1530 (*BII).getOpcode() == PPC::BCC &&
1531 (*BII).getOperand(1).isReg()) {
1532 // We optimize only if the condition code is used only by one BCC.
1533 Register CndReg = (*BII).getOperand(1).getReg();
1534 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1535 return false;
1536
1537 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1538 // We assume compare and branch are in the same BB for ease of analysis.
1539 if (CMPI->getParent() != &BB)
1540 return false;
1541
1542 // We skip this BB if a physical register is used in comparison.
1543 for (MachineOperand &MO : CMPI->operands())
1544 if (MO.isReg() && !MO.getReg().isVirtual())
1545 return false;
1546
1547 return true;
1548 }
1549 return false;
1550 };
1551
1552 // If this BB has more than one successor, we can create a new BB and
1553 // move the compare instruction in the new BB.
1554 // So far, we do not move compare instruction to a BB having multiple
1555 // successors to avoid potentially increasing code size.
1556 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1557 return BB.succ_size() == 1;
1558 };
1559
1560 if (!isEligibleBB(MBB))
1561 return false;
1562
1563 unsigned NumPredBBs = MBB.pred_size();
1564 if (NumPredBBs == 1) {
1565 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1566 if (isEligibleBB(*TmpMBB)) {
1567 PredMBB = TmpMBB;
1568 MBBtoMoveCmp = nullptr;
1569 return true;
1570 }
1571 }
1572 else if (NumPredBBs == 2) {
1573 // We check for partially redundant case.
1574 // So far, we support cases with only two predecessors
1575 // to avoid increasing the number of instructions.
1577 MachineBasicBlock *Pred1MBB = *PI;
1578 MachineBasicBlock *Pred2MBB = *(PI+1);
1579
1580 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1581 // We assume Pred1MBB is the BB containing the compare to be merged and
1582 // Pred2MBB is the BB to which we will append a compare instruction.
1583 // Proceed as is if Pred1MBB is different from MBB.
1584 }
1585 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1586 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1587 std::swap(Pred1MBB, Pred2MBB);
1588 }
1589 else return false;
1590
1591 if (Pred1MBB == &MBB)
1592 return false;
1593
1594 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1595 // We cannot move the compare instruction if operands are not available
1596 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1597 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1598 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1599 for (int I = 1; I <= 2; I++)
1600 if (CMPI->getOperand(I).isReg()) {
1601 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1602 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1603 return false;
1604 }
1605
1606 PredMBB = Pred1MBB;
1607 MBBtoMoveCmp = Pred2MBB;
1608 return true;
1609 }
1610
1611 return false;
1612}
1613
1614// This function will iterate over the input map containing a pair of TOC save
1615// instruction and a flag. The flag will be set to false if the TOC save is
1616// proven redundant. This function will erase from the basic block all the TOC
1617// saves marked as redundant.
1618bool PPCMIPeephole::eliminateRedundantTOCSaves(
1619 std::map<MachineInstr *, bool> &TOCSaves) {
1620 bool Simplified = false;
1621 int NumKept = 0;
1622 for (auto TOCSave : TOCSaves) {
1623 if (!TOCSave.second) {
1624 TOCSave.first->eraseFromParent();
1625 RemoveTOCSave++;
1626 Simplified = true;
1627 } else {
1628 NumKept++;
1629 }
1630 }
1631
1632 if (NumKept > 1)
1633 MultiTOCSaves++;
1634
1635 return Simplified;
1636}
1637
1638// If multiple conditional branches are executed based on the (essentially)
1639// same comparison, we merge compare instructions into one and make multiple
1640// conditional branches on this comparison.
1641// For example,
1642// if (a == 0) { ... }
1643// else if (a < 0) { ... }
1644// can be executed by one compare and two conditional branches instead of
1645// two pairs of a compare and a conditional branch.
1646//
1647// This method merges two compare instructions in two MBBs and modifies the
1648// compare and conditional branch instructions if needed.
1649// For the above example, the input for this pass looks like:
1650// cmplwi r3, 0
1651// beq 0, .LBB0_3
1652// cmpwi r3, -1
1653// bgt 0, .LBB0_4
1654// So, before merging two compares, we need to modify these instructions as
1655// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1656// beq 0, .LBB0_3
1657// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1658// bge 0, .LBB0_4
1659
1660bool PPCMIPeephole::eliminateRedundantCompare() {
1661 bool Simplified = false;
1662
1663 for (MachineBasicBlock &MBB2 : *MF) {
1664 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1665
1666 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1667 // as an optimization target if
1668 // - both MBBs end with a conditional branch,
1669 // - MBB1 is the only predecessor of MBB2, and
1670 // - compare does not take a physical register as a operand in both MBBs.
1671 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1672 //
1673 // As partially redundant case, we additionally handle if MBB2 has one
1674 // additional predecessor, which has only one successor (MBB2).
1675 // In this case, we move the compare instruction originally in MBB2 into
1676 // MBBtoMoveCmp. This partially redundant case is typically appear by
1677 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1678 //
1679 // Overview of CFG of related basic blocks
1680 // Fully redundant case Partially redundant case
1681 // -------- ---------------- --------
1682 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1683 // -------- ---------------- --------
1684 // | \ (w/ 1 succ) \ | \
1685 // | \ \ | \
1686 // | \ |
1687 // -------- --------
1688 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1689 // -------- and 2 succ) -------- and 2 succ)
1690 // | \ | \
1691 // | \ | \
1692 //
1693 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1694 continue;
1695
1696 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1697 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1698
1699 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1700 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1701 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1702
1703 // We cannot optimize an unsupported compare opcode or
1704 // a mix of 32-bit and 64-bit comparisons
1705 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1706 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1707 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1708 continue;
1709
1710 unsigned NewOpCode = 0;
1711 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1712 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1713 bool SwapOperands = false;
1714
1715 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1716 // Typically, unsigned comparison is used for equality check, but
1717 // we replace it with a signed comparison if the comparison
1718 // to be merged is a signed comparison.
1719 // In other cases of opcode mismatch, we cannot optimize this.
1720
1721 // We cannot change opcode when comparing against an immediate
1722 // if the most significant bit of the immediate is one
1723 // due to the difference in sign extension.
1724 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1725 if (!I->getOperand(2).isImm())
1726 return false;
1727 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1728 return Imm < 0;
1729 };
1730
1731 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1732 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1733 NewOpCode = CMPI1->getOpcode();
1734 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1735 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1736 NewOpCode = CMPI2->getOpcode();
1737 else continue;
1738 }
1739
1740 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1741 // In case of comparisons between two registers, these two registers
1742 // must be same to merge two comparisons.
1743 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1744 nullptr, nullptr, MRI);
1745 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1746 nullptr, nullptr, MRI);
1747 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1748 MBB1, &MBB2, MRI);
1749 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1750 MBB1, &MBB2, MRI);
1751
1752 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1753 // Same pair of registers in the same order; ready to merge as is.
1754 }
1755 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1756 // Same pair of registers in different order.
1757 // We reverse the predicate to merge compare instructions.
1759 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1760 // In case of partial redundancy, we need to swap operands
1761 // in another compare instruction.
1762 SwapOperands = true;
1763 }
1764 else continue;
1765 }
1766 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1767 // In case of comparisons between a register and an immediate,
1768 // the operand register must be same for two compare instructions.
1769 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1770 nullptr, nullptr, MRI);
1771 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1772 MBB1, &MBB2, MRI);
1773 if (Cmp1Operand1 != Cmp2Operand1)
1774 continue;
1775
1776 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1777 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1778
1779 // If immediate are not same, we try to adjust by changing predicate;
1780 // e.g. GT imm means GE (imm+1).
1781 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1782 int Diff = Imm1 - Imm2;
1783 if (Diff < -2 || Diff > 2)
1784 continue;
1785
1786 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1787 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1788 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1789 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1790 if (Diff == 2) {
1791 if (PredToInc2 && PredToDec1) {
1792 NewPredicate2 = PredToInc2;
1793 NewPredicate1 = PredToDec1;
1794 NewImm2++;
1795 NewImm1--;
1796 }
1797 }
1798 else if (Diff == 1) {
1799 if (PredToInc2) {
1800 NewImm2++;
1801 NewPredicate2 = PredToInc2;
1802 }
1803 else if (PredToDec1) {
1804 NewImm1--;
1805 NewPredicate1 = PredToDec1;
1806 }
1807 }
1808 else if (Diff == -1) {
1809 if (PredToDec2) {
1810 NewImm2--;
1811 NewPredicate2 = PredToDec2;
1812 }
1813 else if (PredToInc1) {
1814 NewImm1++;
1815 NewPredicate1 = PredToInc1;
1816 }
1817 }
1818 else if (Diff == -2) {
1819 if (PredToDec2 && PredToInc1) {
1820 NewPredicate2 = PredToDec2;
1821 NewPredicate1 = PredToInc1;
1822 NewImm2--;
1823 NewImm1++;
1824 }
1825 }
1826 }
1827
1828 // We cannot merge two compares if the immediates are not same.
1829 if (NewImm2 != NewImm1)
1830 continue;
1831 }
1832
1833 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1834 LLVM_DEBUG(CMPI1->dump());
1835 LLVM_DEBUG(BI1->dump());
1836 LLVM_DEBUG(CMPI2->dump());
1837 LLVM_DEBUG(BI2->dump());
1838 for (const MachineOperand &MO : CMPI1->operands())
1839 if (MO.isReg())
1840 addRegToUpdate(MO.getReg());
1841 for (const MachineOperand &MO : CMPI2->operands())
1842 if (MO.isReg())
1843 addRegToUpdate(MO.getReg());
1844
1845 // We adjust opcode, predicates and immediate as we determined above.
1846 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1847 CMPI1->setDesc(TII->get(NewOpCode));
1848 }
1849 if (NewPredicate1) {
1850 BI1->getOperand(0).setImm(NewPredicate1);
1851 }
1852 if (NewPredicate2) {
1853 BI2->getOperand(0).setImm(NewPredicate2);
1854 }
1855 if (NewImm1 != Imm1) {
1856 CMPI1->getOperand(2).setImm(NewImm1);
1857 }
1858
1859 if (IsPartiallyRedundant) {
1860 // We touch up the compare instruction in MBB2 and move it to
1861 // a previous BB to handle partially redundant case.
1862 if (SwapOperands) {
1863 Register Op1 = CMPI2->getOperand(1).getReg();
1864 Register Op2 = CMPI2->getOperand(2).getReg();
1865 CMPI2->getOperand(1).setReg(Op2);
1866 CMPI2->getOperand(2).setReg(Op1);
1867 }
1868 if (NewImm2 != Imm2)
1869 CMPI2->getOperand(2).setImm(NewImm2);
1870
1871 for (int I = 1; I <= 2; I++) {
1872 if (CMPI2->getOperand(I).isReg()) {
1873 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1874 if (Inst->getParent() != &MBB2)
1875 continue;
1876
1877 assert(Inst->getOpcode() == PPC::PHI &&
1878 "We cannot support if an operand comes from this BB.");
1879 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1880 CMPI2->getOperand(I).setReg(SrcReg);
1881 addRegToUpdate(SrcReg);
1882 }
1883 }
1884 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1885 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1886
1887 DebugLoc DL = CMPI2->getDebugLoc();
1888 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1889 BuildMI(MBB2, MBB2.begin(), DL,
1890 TII->get(PPC::PHI), NewVReg)
1891 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1892 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1893 BI2->getOperand(1).setReg(NewVReg);
1894 addRegToUpdate(NewVReg);
1895 }
1896 else {
1897 // We finally eliminate compare instruction in MBB2.
1898 // We do not need to treat CMPI2 specially here in terms of re-computing
1899 // live variables even though it is being deleted because:
1900 // - It defines a register that has a single use (already checked in
1901 // eligibleForCompareElimination())
1902 // - The only user (BI2) is no longer using it so the register is dead (no
1903 // def, no uses)
1904 // - We do not attempt to recompute live variables for dead registers
1905 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1906 CMPI2->eraseFromParent();
1907 }
1908
1909 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1910 LLVM_DEBUG(CMPI1->dump());
1911 LLVM_DEBUG(BI1->dump());
1912 LLVM_DEBUG(BI2->dump());
1913 if (IsPartiallyRedundant) {
1914 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1915 << printMBBReference(*MBBtoMoveCmp)
1916 << " to handle partial redundancy.\n");
1917 LLVM_DEBUG(CMPI2->dump());
1918 }
1919 Simplified = true;
1920 }
1921
1922 return Simplified;
1923}
1924
1925// We miss the opportunity to emit an RLDIC when lowering jump tables
1926// since ISEL sees only a single basic block. When selecting, the clear
1927// and shift left will be in different blocks.
1928bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1929 MachineInstr *&ToErase) {
1930 if (MI.getOpcode() != PPC::RLDICR)
1931 return false;
1932
1933 Register SrcReg = MI.getOperand(1).getReg();
1934 if (!SrcReg.isVirtual())
1935 return false;
1936
1937 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1938 if (SrcMI->getOpcode() != PPC::RLDICL)
1939 return false;
1940
1941 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1942 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1943 MachineOperand MOpSHMI = MI.getOperand(2);
1944 MachineOperand MOpMEMI = MI.getOperand(3);
1945 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1946 MOpMEMI.isImm()))
1947 return false;
1948
1949 uint64_t SHSrc = MOpSHSrc.getImm();
1950 uint64_t MBSrc = MOpMBSrc.getImm();
1951 uint64_t SHMI = MOpSHMI.getImm();
1952 uint64_t MEMI = MOpMEMI.getImm();
1953 uint64_t NewSH = SHSrc + SHMI;
1954 uint64_t NewMB = MBSrc - SHMI;
1955 if (NewMB > 63 || NewSH > 63)
1956 return false;
1957
1958 // The bits cleared with RLDICL are [0, MBSrc).
1959 // The bits cleared with RLDICR are (MEMI, 63].
1960 // After the sequence, the bits cleared are:
1961 // [0, MBSrc-SHMI) and (MEMI, 63).
1962 //
1963 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1964 if ((63 - NewSH) != MEMI)
1965 return false;
1966
1967 LLVM_DEBUG(dbgs() << "Converting pair: ");
1968 LLVM_DEBUG(SrcMI->dump());
1969 LLVM_DEBUG(MI.dump());
1970
1971 MI.setDesc(TII->get(PPC::RLDIC));
1972 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1973 MI.getOperand(2).setImm(NewSH);
1974 MI.getOperand(3).setImm(NewMB);
1975 addRegToUpdate(MI.getOperand(1).getReg());
1976 addRegToUpdate(SrcMI->getOperand(0).getReg());
1977
1978 LLVM_DEBUG(dbgs() << "To: ");
1979 LLVM_DEBUG(MI.dump());
1980 NumRotatesCollapsed++;
1981 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1982 if (MRI->use_nodbg_empty(SrcReg)) {
1983 assert(!SrcMI->hasImplicitDef() &&
1984 "Not expecting an implicit def with this instr.");
1985 ToErase = SrcMI;
1986 }
1987 return true;
1988}
1989
1990// For case in LLVM IR
1991// entry:
1992// %iconv = sext i32 %index to i64
1993// br i1 undef label %true, label %false
1994// true:
1995// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1996// ...
1997// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1998// different BBs when conducting instruction selection. We can do a peephole
1999// optimization to combine these two instructions into extswsli after
2000// instruction selection.
2001bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
2002 MachineInstr *&ToErase) {
2003 if (MI.getOpcode() != PPC::RLDICR)
2004 return false;
2005
2006 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
2007 return false;
2008
2009 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
2010
2011 MachineOperand MOpSHMI = MI.getOperand(2);
2012 MachineOperand MOpMEMI = MI.getOperand(3);
2013 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
2014 return false;
2015
2016 uint64_t SHMI = MOpSHMI.getImm();
2017 uint64_t MEMI = MOpMEMI.getImm();
2018 if (SHMI + MEMI != 63)
2019 return false;
2020
2021 Register SrcReg = MI.getOperand(1).getReg();
2022 if (!SrcReg.isVirtual())
2023 return false;
2024
2025 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2026 if (SrcMI->getOpcode() != PPC::EXTSW &&
2027 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2028 return false;
2029
2030 // If the register defined by extsw has more than one use, combination is not
2031 // needed.
2032 if (!MRI->hasOneNonDBGUse(SrcReg))
2033 return false;
2034
2035 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2036 assert(SrcMI->getOperand(1).isReg() &&
2037 "EXTSW's second operand should be a register");
2038 if (!SrcMI->getOperand(1).getReg().isVirtual())
2039 return false;
2040
2041 LLVM_DEBUG(dbgs() << "Combining pair: ");
2042 LLVM_DEBUG(SrcMI->dump());
2043 LLVM_DEBUG(MI.dump());
2044
2045 MachineInstr *NewInstr =
2046 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2047 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2048 : TII->get(PPC::EXTSWSLI_32_64),
2049 MI.getOperand(0).getReg())
2050 .add(SrcMI->getOperand(1))
2051 .add(MOpSHMI);
2052 (void)NewInstr;
2053
2054 LLVM_DEBUG(dbgs() << "TO: ");
2055 LLVM_DEBUG(NewInstr->dump());
2056 ++NumEXTSWAndSLDICombined;
2057 ToErase = &MI;
2058 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2059 // can recompute its kill flags. We run DCE immediately after this pass
2060 // to clean up dead instructions such as this.
2061 addRegToUpdate(NewInstr->getOperand(1).getReg());
2062 addRegToUpdate(SrcMI->getOperand(0).getReg());
2063 return true;
2064}
2065
2066} // end default namespace
2067
2069 "PowerPC MI Peephole Optimization", false, false)
2075 "PowerPC MI Peephole Optimization", false, false)
2076
2077char PPCMIPeephole::ID = 0;
2079llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
static Register UseReg(const MachineOperand &MO)
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define addRegToUpdate(R)
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool shouldExecute(CounterInfo &Counter)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MaybeAlign getAlign() const
Returns the alignment of the given variable.
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool isFullCopy() const
mop_range operands()
mop_range explicit_uses()
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
const TargetRegisterInfo & getRegisterInfo() const
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ Keep
No function return thunk.
Definition CodeGen.h:162
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872