LLVM 23.0.0git
PPCMIPeephole.cpp
Go to the documentation of this file.
1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
32#include "PPC.h"
33#include "PPCInstrInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
47#include "llvm/Support/Debug.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(false), cl::Hidden);
98
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {}
117
118private:
119 MachineDominatorTree *MDT;
120 MachinePostDominatorTree *MPDT;
121 MachineBlockFrequencyInfo *MBFI;
122 BlockFrequency EntryFreq;
123 SmallSet<Register, 16> RegsToUpdate;
124
125 // Initialize class variables.
126 void initialize(MachineFunction &MFParm);
127
128 // Perform peepholes.
129 bool simplifyCode();
130
131 // Perform peepholes.
132 bool eliminateRedundantCompare();
133 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
134 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
135 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
136 MachineInstr *&ToErase);
137 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
138 MachineInstr *MI);
139
140 // A number of transformations will eliminate the definition of a register
141 // as all of its uses will be removed. However, this leaves a register
142 // without a definition for LiveVariables. Such transformations should
143 // use this function to provide a dummy definition of the register that
144 // will simply be removed by DCE.
145 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
146 BuildMI(MBB, At, At->getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), Reg);
147 }
148 void addRegToUpdateWithLine(Register Reg, int Line);
149 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
150 SmallVectorImpl<MachineInstr *> &PHIs,
151 Register Dst);
152
153public:
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.addRequired<LiveVariablesWrapperPass>();
157 AU.addRequired<MachineDominatorTreeWrapperPass>();
158 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
159 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
160 AU.addPreserved<LiveVariablesWrapperPass>();
161 AU.addPreserved<MachineDominatorTreeWrapperPass>();
162 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
163 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
165 }
166
167 // Main entry point for this pass.
168 bool runOnMachineFunction(MachineFunction &MF) override {
169 initialize(MF);
170 // At this point, TOC pointer should not be used in a function that uses
171 // PC-Relative addressing.
172 assert((MF.getRegInfo().use_empty(PPC::X2) ||
173 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
174 "TOC pointer used in a function using PC-Relative addressing!");
175 if (skipFunction(MF.getFunction()))
176 return false;
177 bool Changed = simplifyCode();
178#ifndef NDEBUG
179 if (Changed)
180 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
181 "-mllvm -disable-ppc-peephole");
182#endif
183 return Changed;
184 }
185};
186
187#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
188void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
189 if (!Reg.isVirtual())
190 return;
191 if (RegsToUpdate.insert(Reg).second)
192 LLVM_DEBUG(dbgs() << "Adding register: " << printReg(Reg) << " on line "
193 << Line << " for re-computation of kill flags\n");
194}
195
196// Initialize class variables.
197void PPCMIPeephole::initialize(MachineFunction &MFParm) {
198 MF = &MFParm;
199 MRI = &MF->getRegInfo();
200 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
201 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
202 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
203 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
204 EntryFreq = MBFI->getEntryFreq();
205 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
206 RegsToUpdate.clear();
207 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
208 LLVM_DEBUG(MF->dump());
209}
210
211static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
212 MachineRegisterInfo *MRI) {
213 assert(Op && "Invalid Operand!");
214 if (!Op->isReg())
215 return nullptr;
216
217 Register Reg = Op->getReg();
218 if (!Reg.isVirtual())
219 return nullptr;
220
221 return MRI->getVRegDef(Reg);
222}
223
224// This function returns number of known zero bits in output of MI
225// starting from the most significant bit.
226static unsigned getKnownLeadingZeroCount(const unsigned Reg,
227 const PPCInstrInfo *TII,
228 const MachineRegisterInfo *MRI) {
229 MachineInstr *MI = MRI->getVRegDef(Reg);
230 unsigned Opcode = MI->getOpcode();
231 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
232 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
233 return MI->getOperand(3).getImm();
234
235 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
236 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
237 return MI->getOperand(3).getImm();
238
239 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
240 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
241 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
242 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
243 return 32 + MI->getOperand(3).getImm();
244
245 if (Opcode == PPC::ANDI_rec) {
246 uint16_t Imm = MI->getOperand(2).getImm();
247 return 48 + llvm::countl_zero(Imm);
248 }
249
250 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
251 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
252 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
253 // The result ranges from 0 to 32.
254 return 58;
255
256 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
257 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
258 // The result ranges from 0 to 64.
259 return 57;
260
261 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
262 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
263 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
264 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
265 return 48;
266
267 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
268 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
269 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
270 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
271 return 56;
272
273 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
274 Opcode == PPC::AND8_rec)
275 return std::max(
276 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
277 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
278
279 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
280 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
281 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
282 Opcode == PPC::XOR8_rec)
283 return std::min(
284 getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
285 getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
286
287 if (TII->isZeroExtended(Reg, MRI))
288 return 32;
289
290 return 0;
291}
292
293// This function maintains a map for the pairs <TOC Save Instr, Keep>
294// Each time a new TOC save is encountered, it checks if any of the existing
295// ones are dominated by the new one. If so, it marks the existing one as
296// redundant by setting it's entry in the map as false. It then adds the new
297// instruction to the map with either true or false depending on if any
298// existing instructions dominated the new one.
299void PPCMIPeephole::UpdateTOCSaves(
300 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
301 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
302 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
303 // here only support it under ELFv2.
304 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
305 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
306
307 MachineBasicBlock *Entry = &MF->front();
308 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MI->getParent());
309
310 // If the block in which the TOC save resides is in a block that
311 // post-dominates Entry, or a block that is hotter than entry (keep in mind
312 // that early MachineLICM has already run so the TOC save won't be hoisted)
313 // we can just do the save in the prologue.
314 if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
315 FI->setMustSaveTOC(true);
316
317 // If we are saving the TOC in the prologue, all the TOC saves can be
318 // removed from the code.
319 if (FI->mustSaveTOC()) {
320 for (auto &TOCSave : TOCSaves)
321 TOCSave.second = false;
322 // Add new instruction to map.
323 TOCSaves[MI] = false;
324 return;
325 }
326 }
327
328 bool Keep = true;
329 for (auto &I : TOCSaves) {
330 MachineInstr *CurrInst = I.first;
331 // If new instruction dominates an existing one, mark existing one as
332 // redundant.
333 if (I.second && MDT->dominates(MI, CurrInst))
334 I.second = false;
335 // Check if the new instruction is redundant.
336 if (MDT->dominates(CurrInst, MI)) {
337 Keep = false;
338 break;
339 }
340 }
341 // Add new instruction to map.
342 TOCSaves[MI] = Keep;
343}
344
345// This function returns a list of all PHI nodes in the tree starting from
346// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
347// The list initially only contains the root PHI. When we visit a PHI node, we
348// add it to the list. We continue to look for other PHI node operands while
349// there are nodes to visit in the list. The function returns false if the
350// optimization cannot be applied on this tree.
351static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
352 MachineInstr *RootPHI,
353 SmallVectorImpl<MachineInstr *> &PHIs) {
354 PHIs.push_back(RootPHI);
355 unsigned VisitedIndex = 0;
356 while (VisitedIndex < PHIs.size()) {
357 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
358 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
359 PHIOp != NumOps; PHIOp += 2) {
360 Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
361 if (!RegOp.isVirtual())
362 return false;
363 MachineInstr *Instr = MRI->getVRegDef(RegOp);
364 // While collecting the PHI nodes, we check if they can be converted (i.e.
365 // all the operands are either copies, implicit defs or PHI nodes).
366 unsigned Opcode = Instr->getOpcode();
367 if (Opcode == PPC::COPY) {
368 Register Reg = Instr->getOperand(1).getReg();
369 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
370 return false;
371 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
372 return false;
373 // If we detect a cycle in the PHI nodes, we exit. It would be
374 // possible to change cycles as well, but that would add a lot
375 // of complexity for a case that is unlikely to occur with MMA
376 // code.
377 if (Opcode != PPC::PHI)
378 continue;
379 if (llvm::is_contained(PHIs, Instr))
380 return false;
381 PHIs.push_back(Instr);
382 }
383 VisitedIndex++;
384 }
385 return true;
386}
387
388// This function changes the unprimed accumulator PHI nodes in the PHIs list to
389// primed accumulator PHI nodes. The list is traversed in reverse order to
390// change all the PHI operands of a PHI node before changing the node itself.
391// We keep a map to associate each changed PHI node to its non-changed form.
392void PPCMIPeephole::convertUnprimedAccPHIs(
393 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
394 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
395 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
396 for (MachineInstr *PHI : llvm::reverse(PHIs)) {
398 // We check if the current PHI node can be changed by looking at its
399 // operands. If all the operands are either copies from primed
400 // accumulators, implicit definitions or other unprimed accumulator
401 // PHI nodes, we change it.
402 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
403 PHIOp += 2) {
404 Register RegOp = PHI->getOperand(PHIOp).getReg();
405 MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
406 unsigned Opcode = PHIInput->getOpcode();
407 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
408 Opcode == PPC::PHI) &&
409 "Unexpected instruction");
410 if (Opcode == PPC::COPY) {
411 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
412 &PPC::ACCRCRegClass &&
413 "Unexpected register class");
414 PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
415 } else if (Opcode == PPC::IMPLICIT_DEF) {
416 Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
417 BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
418 TII->get(PPC::IMPLICIT_DEF), AccReg);
419 PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
420 PHI->getOperand(PHIOp + 1)});
421 } else if (Opcode == PPC::PHI) {
422 // We found a PHI operand. At this point we know this operand
423 // has already been changed so we get its associated changed form
424 // from the map.
425 assert(ChangedPHIMap.count(PHIInput) == 1 &&
426 "This PHI node should have already been changed.");
427 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
429 PrimedAccPHI->getOperand(0).getReg(), false),
430 PHI->getOperand(PHIOp + 1)});
431 }
432 }
433 Register AccReg = Dst;
434 // If the PHI node we are changing is the root node, the register it defines
435 // will be the destination register of the original copy (of the PHI def).
436 // For all other PHI's in the list, we need to create another primed
437 // accumulator virtual register as the PHI will no longer define the
438 // unprimed accumulator.
439 if (PHI != PHIs[0])
440 AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
441 MachineInstrBuilder NewPHI = BuildMI(
442 *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
443 for (auto RegMBB : PHIOps) {
444 NewPHI.add(RegMBB.first).add(RegMBB.second);
445 if (MRI->isSSA())
446 addRegToUpdate(RegMBB.first.getReg());
447 }
448 // The liveness of old PHI and new PHI have to be updated.
449 addRegToUpdate(PHI->getOperand(0).getReg());
450 addRegToUpdate(AccReg);
451 ChangedPHIMap[PHI] = NewPHI.getInstr();
452 LLVM_DEBUG(dbgs() << "Converting PHI: ");
453 LLVM_DEBUG(PHI->dump());
454 LLVM_DEBUG(dbgs() << "To: ");
455 LLVM_DEBUG(NewPHI.getInstr()->dump());
456 }
457}
458
459// Perform peephole optimizations.
460bool PPCMIPeephole::simplifyCode() {
461 bool Simplified = false;
462 bool TrapOpt = false;
463 MachineInstr* ToErase = nullptr;
464 std::map<MachineInstr *, bool> TOCSaves;
465 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
466 NumFunctionsEnteredInMIPeephole++;
467 if (ConvertRegReg) {
468 // Fixed-point conversion of reg/reg instructions fed by load-immediate
469 // into reg/imm instructions. FIXME: This is expensive, control it with
470 // an option.
471 bool SomethingChanged = false;
472 do {
473 NumFixedPointIterations++;
474 SomethingChanged = false;
475 for (MachineBasicBlock &MBB : *MF) {
476 for (MachineInstr &MI : MBB) {
477 if (MI.isDebugInstr())
478 continue;
479
480 if (!DebugCounter::shouldExecute(PeepholeXToICounter))
481 continue;
482
483 SmallSet<Register, 4> RRToRIRegsToUpdate;
484 if (!TII->convertToImmediateForm(MI, RRToRIRegsToUpdate))
485 continue;
486 for (Register R : RRToRIRegsToUpdate)
488 // The updated instruction may now have new register operands.
489 // Conservatively add them to recompute the flags as well.
490 for (const MachineOperand &MO : MI.operands())
491 if (MO.isReg())
492 addRegToUpdate(MO.getReg());
493 // We don't erase anything in case the def has other uses. Let DCE
494 // remove it if it can be removed.
495 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
496 LLVM_DEBUG(MI.dump());
497 NumConvertedToImmediateForm++;
498 SomethingChanged = true;
499 Simplified = true;
500 }
501 }
502 } while (SomethingChanged && FixedPointRegToImm);
503 }
504
505 // Since we are deleting this instruction, we need to run LiveVariables
506 // on any of its definitions that are marked as needing an update since
507 // we can't run LiveVariables on a deleted register. This only needs
508 // to be done for defs since uses will have their own defining
509 // instructions so we won't be running LiveVariables on a deleted reg.
510 auto recomputeLVForDyingInstr = [&]() {
511 if (RegsToUpdate.empty())
512 return;
513 for (MachineOperand &MO : ToErase->operands()) {
514 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(MO.getReg()))
515 continue;
516 Register RegToUpdate = MO.getReg();
517 RegsToUpdate.erase(RegToUpdate);
518 // If some transformation has introduced an additional definition of
519 // this register (breaking SSA), we can safely convert this def to
520 // a def of an invalid register as the instruction is going away.
521 if (!MRI->getUniqueVRegDef(RegToUpdate))
522 MO.setReg(PPC::NoRegister);
523 LV->recomputeForSingleDefVirtReg(RegToUpdate);
524 }
525 };
526
527 for (MachineBasicBlock &MBB : *MF) {
528 for (MachineInstr &MI : MBB) {
529
530 // If the previous instruction was marked for elimination,
531 // remove it now.
532 if (ToErase) {
533 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
534 LLVM_DEBUG(ToErase->dump());
535 recomputeLVForDyingInstr();
536 ToErase->eraseFromParent();
537 ToErase = nullptr;
538 }
539 // If a conditional trap instruction got optimized to an
540 // unconditional trap, eliminate all the instructions after
541 // the trap.
542 if (EnableTrapOptimization && TrapOpt) {
543 ToErase = &MI;
544 continue;
545 }
546
547 // Ignore debug instructions.
548 if (MI.isDebugInstr())
549 continue;
550
551 if (!DebugCounter::shouldExecute(PeepholePerOpCounter))
552 continue;
553
554 // Per-opcode peepholes.
555 switch (MI.getOpcode()) {
556
557 default:
558 break;
559 case PPC::COPY: {
560 Register Src = MI.getOperand(1).getReg();
561 Register Dst = MI.getOperand(0).getReg();
562 if (!Src.isVirtual() || !Dst.isVirtual())
563 break;
564 if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
565 MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
566 break;
567
568 // We are copying an unprimed accumulator to a primed accumulator.
569 // If the input to the copy is a PHI that is fed only by (i) copies in
570 // the other direction (ii) implicitly defined unprimed accumulators or
571 // (iii) other PHI nodes satisfying (i) and (ii), we can change
572 // the PHI to a PHI on primed accumulators (as long as we also change
573 // its operands). To detect and change such copies, we first get a list
574 // of all the PHI nodes starting from the root PHI node in BFS order.
575 // We then visit all these PHI nodes to check if they can be changed to
576 // primed accumulator PHI nodes and if so, we change them.
577 MachineInstr *RootPHI = MRI->getVRegDef(Src);
578 if (RootPHI->getOpcode() != PPC::PHI)
579 break;
580
581 SmallVector<MachineInstr *, 4> PHIs;
582 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
583 break;
584
585 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
586
587 ToErase = &MI;
588 break;
589 }
590 case PPC::LI:
591 case PPC::LI8: {
592 // If we are materializing a zero, look for any use operands for which
593 // zero means immediate zero. All such operands can be replaced with
594 // PPC::ZERO.
595 if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
596 break;
597 Register MIDestReg = MI.getOperand(0).getReg();
598 bool Folded = false;
599 for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
600 Folded |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
601 if (MRI->use_nodbg_empty(MIDestReg)) {
602 ++NumLoadImmZeroFoldedAndRemoved;
603 ToErase = &MI;
604 }
605 if (Folded)
606 addRegToUpdate(MIDestReg);
607 Simplified |= Folded;
608 break;
609 }
610 case PPC::STW:
611 case PPC::STD: {
612 MachineFrameInfo &MFI = MF->getFrameInfo();
613 if (MFI.hasVarSizedObjects() ||
614 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
615 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
616 break;
617 // When encountering a TOC save instruction, call UpdateTOCSaves
618 // to add it to the TOCSaves map and mark any existing TOC saves
619 // it dominates as redundant.
620 if (TII->isTOCSaveMI(MI))
621 UpdateTOCSaves(TOCSaves, &MI);
622 break;
623 }
624 case PPC::XXPERMDI: {
625 // Perform simplifications of 2x64 vector swaps and splats.
626 // A swap is identified by an immediate value of 2, and a splat
627 // is identified by an immediate value of 0 or 3.
628 int Immed = MI.getOperand(3).getImm();
629
630 if (Immed == 1)
631 break;
632
633 // For each of these simplifications, we need the two source
634 // regs to match. Unfortunately, MachineCSE ignores COPY and
635 // SUBREG_TO_REG, so for example we can see
636 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
637 // We have to look through chains of COPY and SUBREG_TO_REG
638 // to find the real source values for comparison.
639 Register TrueReg1 =
640 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
641 Register TrueReg2 =
642 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
643
644 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
645 break;
646
647 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
648
649 if (!DefMI)
650 break;
651
652 unsigned DefOpc = DefMI->getOpcode();
653
654 // If this is a splat fed by a splatting load, the splat is
655 // redundant. Replace with a copy. This doesn't happen directly due
656 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
657 // a load of a double to a vector of 64-bit integers.
658 auto isConversionOfLoadAndSplat = [=]() -> bool {
659 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
660 return false;
661 Register FeedReg1 =
662 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
663 if (FeedReg1.isVirtual()) {
664 MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
665 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
666 return true;
667 }
668 return false;
669 };
670 if ((Immed == 0 || Immed == 3) &&
671 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
672 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
673 "to load-and-splat/copy: ");
674 LLVM_DEBUG(MI.dump());
675 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
676 MI.getOperand(0).getReg())
677 .add(MI.getOperand(1));
678 addRegToUpdate(MI.getOperand(1).getReg());
679 ToErase = &MI;
680 Simplified = true;
681 }
682
683 // If this is a splat or a swap fed by another splat, we
684 // can replace it with a copy.
685 if (DefOpc == PPC::XXPERMDI) {
686 Register DefReg1 = DefMI->getOperand(1).getReg();
687 Register DefReg2 = DefMI->getOperand(2).getReg();
688 unsigned DefImmed = DefMI->getOperand(3).getImm();
689
690 // If the two inputs are not the same register, check to see if
691 // they originate from the same virtual register after only
692 // copy-like instructions.
693 if (DefReg1 != DefReg2) {
694 Register FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
695 Register FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
696
697 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
698 break;
699 }
700
701 if (DefImmed == 0 || DefImmed == 3) {
702 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
703 "to splat/copy: ");
704 LLVM_DEBUG(MI.dump());
705 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
706 MI.getOperand(0).getReg())
707 .add(MI.getOperand(1));
708 addRegToUpdate(MI.getOperand(1).getReg());
709 ToErase = &MI;
710 Simplified = true;
711 }
712
713 // If this is a splat fed by a swap, we can simplify modify
714 // the splat to splat the other value from the swap's input
715 // parameter.
716 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
717 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
718 LLVM_DEBUG(MI.dump());
719 addRegToUpdate(MI.getOperand(1).getReg());
720 addRegToUpdate(MI.getOperand(2).getReg());
721 MI.getOperand(1).setReg(DefReg1);
722 MI.getOperand(2).setReg(DefReg2);
723 MI.getOperand(3).setImm(3 - Immed);
724 addRegToUpdate(DefReg1);
725 addRegToUpdate(DefReg2);
726 Simplified = true;
727 }
728
729 // If this is a swap fed by a swap, we can replace it
730 // with a copy from the first swap's input.
731 else if (Immed == 2 && DefImmed == 2) {
732 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
733 LLVM_DEBUG(MI.dump());
734 addRegToUpdate(MI.getOperand(1).getReg());
735
736 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
737 MI.getOperand(0).getReg())
738 .add(DefMI->getOperand(1));
741 ToErase = &MI;
742 Simplified = true;
743 }
744 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
745 DefOpc == PPC::XXPERMDIs &&
746 (DefMI->getOperand(2).getImm() == 0 ||
747 DefMI->getOperand(2).getImm() == 3)) {
748
749 if (!MRI->hasOneNonDBGUser(DefMI->getOperand(0).getReg()))
750 break;
751 Simplified = true;
752 // Swap of a splat, convert to copy.
753 if (Immed == 2) {
754 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
755 LLVM_DEBUG(MI.dump());
756 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
757 MI.getOperand(0).getReg())
758 .add(MI.getOperand(1));
759 addRegToUpdate(MI.getOperand(1).getReg());
760 ToErase = &MI;
761 break;
762 }
763 // Splat fed by another splat - switch the output of the first
764 // and remove the second.
765 ToErase = &MI;
766 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
767 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
768 LLVM_DEBUG(MI.dump());
769 } else if (Immed == 2 &&
770 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
771 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
772 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
773 DefOpc == PPC::VSPLTISW)) {
774 // Swap of various vector splats, convert to copy.
775 ToErase = &MI;
776 Simplified = true;
777 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
778 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
779 LLVM_DEBUG(MI.dump());
780 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
781 MI.getOperand(0).getReg())
782 .add(MI.getOperand(1));
783 addRegToUpdate(MI.getOperand(1).getReg());
784 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
785 TII->isLoadFromConstantPool(DefMI)) {
786 const Constant *C = TII->getConstantFromConstantPool(DefMI);
787 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
788 ToErase = &MI;
789 Simplified = true;
791 << "Optimizing swap(splat pattern from constant-pool) "
792 "=> copy(splat pattern from constant-pool): ");
793 LLVM_DEBUG(MI.dump());
794 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
795 MI.getOperand(0).getReg())
796 .add(MI.getOperand(1));
797 addRegToUpdate(MI.getOperand(1).getReg());
798 }
799 }
800 break;
801 }
802 case PPC::VSPLTB:
803 case PPC::VSPLTH:
804 case PPC::XXSPLTW: {
805 unsigned MyOpcode = MI.getOpcode();
806 // The operand number of the source register in the splat instruction.
807 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
808 Register TrueReg =
809 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
810 if (!TrueReg.isVirtual())
811 break;
812 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
813 if (!DefMI)
814 break;
815 unsigned DefOpcode = DefMI->getOpcode();
816 auto isConvertOfSplat = [=]() -> bool {
817 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
818 return false;
819 Register ConvReg = DefMI->getOperand(1).getReg();
820 if (!ConvReg.isVirtual())
821 return false;
822 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
823 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
824 Splt->getOpcode() == PPC::XXSPLTW);
825 };
826 bool AlreadySplat = (MyOpcode == DefOpcode) ||
827 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
828 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
829 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
830 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
831 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
832 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
833
834 // If the instruction[s] that feed this splat have already splat
835 // the value, this splat is redundant.
836 if (AlreadySplat) {
837 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
838 LLVM_DEBUG(MI.dump());
839 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
840 MI.getOperand(0).getReg())
841 .add(MI.getOperand(OpNo));
842 addRegToUpdate(MI.getOperand(OpNo).getReg());
843 ToErase = &MI;
844 Simplified = true;
845 }
846
847 // Splat fed by a shift. Usually when we align value to splat into
848 // vector element zero.
849 if (DefOpcode == PPC::XXSLDWI) {
850 Register ShiftOp1 = DefMI->getOperand(1).getReg();
851
852 if (ShiftOp1 == DefMI->getOperand(2).getReg()) {
853 // For example, We can erase XXSLDWI from in following:
854 // %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
855 // %6:vrrc = VSPLTB 15, killed %2:vrrc
856 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
857 //
858 // --->
859 //
860 // %6:vrrc = VSPLTB 3, killed %1:vrrc
861 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
862
863 if (MRI->hasOneNonDBGUse(DefMI->getOperand(0).getReg())) {
864 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
866 ToErase = DefMI;
867 }
868 Simplified = true;
869 unsigned ShiftImm = DefMI->getOperand(3).getImm();
870 // The operand number of the splat Imm in the instruction.
871 unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
872 unsigned SplatImm = MI.getOperand(SplatImmNo).getImm();
873
874 // Calculate the new splat-element immediate. We need to convert the
875 // element index into the proper unit (byte for VSPLTB, halfword for
876 // VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
877 // ShiftImm in 32-bit word units.
878 auto CalculateNewElementIdx = [&](unsigned Opcode) {
879 if (Opcode == PPC::VSPLTB)
880 return (SplatImm + ShiftImm * 4) & 0xF;
881 else if (Opcode == PPC::VSPLTH)
882 return (SplatImm + ShiftImm * 2) & 0x7;
883 else
884 return (SplatImm + ShiftImm) & 0x3;
885 };
886
887 unsigned NewElem = CalculateNewElementIdx(MyOpcode);
888
889 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
890 << " to " << NewElem << " in instruction: ");
891 LLVM_DEBUG(MI.dump());
892 addRegToUpdate(MI.getOperand(OpNo).getReg());
893 addRegToUpdate(ShiftOp1);
894 MI.getOperand(OpNo).setReg(ShiftOp1);
895 MI.getOperand(SplatImmNo).setImm(NewElem);
896 }
897 }
898 break;
899 }
900 case PPC::XVCVDPSP: {
901 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
902 Register TrueReg =
903 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
904 if (!TrueReg.isVirtual())
905 break;
906 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
907
908 // This can occur when building a vector of single precision or integer
909 // values.
910 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
911 Register DefsReg1 =
912 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
913 Register DefsReg2 =
914 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
915 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
916 break;
917 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
918 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
919
920 if (!P1 || !P2)
921 break;
922
923 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
924 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
925 // the FRSP/XSRSP.
926 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
927 unsigned Opc = RoundInstr->getOpcode();
928 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
929 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
930 Simplified = true;
931 Register ConvReg1 = RoundInstr->getOperand(1).getReg();
932 Register FRSPDefines = RoundInstr->getOperand(0).getReg();
933 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
934 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
935 if (Use.getOperand(i).isReg() &&
936 Use.getOperand(i).getReg() == FRSPDefines)
937 Use.getOperand(i).setReg(ConvReg1);
938 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
939 LLVM_DEBUG(RoundInstr->dump());
940 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
941 LLVM_DEBUG(MI.dump());
942 LLVM_DEBUG(dbgs() << "Through instruction:\n");
944 addRegToUpdate(ConvReg1);
945 addRegToUpdate(FRSPDefines);
946 ToErase = RoundInstr;
947 }
948 };
949
950 // If the input to XVCVDPSP is a vector that was built (even
951 // partially) out of FRSP's, the FRSP(s) can safely be removed
952 // since this instruction performs the same operation.
953 if (P1 != P2) {
954 removeFRSPIfPossible(P1);
955 removeFRSPIfPossible(P2);
956 break;
957 }
958 removeFRSPIfPossible(P1);
959 }
960 break;
961 }
962 case PPC::EXTSH:
963 case PPC::EXTSH8:
964 case PPC::EXTSH8_32_64: {
965 if (!EnableSExtElimination) break;
966 Register NarrowReg = MI.getOperand(1).getReg();
967 if (!NarrowReg.isVirtual())
968 break;
969
970 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
971 unsigned SrcOpcode = SrcMI->getOpcode();
972 // If we've used a zero-extending load that we will sign-extend,
973 // just do a sign-extending load.
974 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
975 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
976 break;
977 // Determine the new opcode. We need to make sure that if the original
978 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
979 // Likewise if the source is X-Form the new opcode should also be
980 // X-Form.
981 unsigned Opc = PPC::LHA;
982 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
983 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
984 MI.getOpcode() == PPC::EXTSH8_32_64;
985
986 if (SourceIsXForm && MIIs64Bit)
987 Opc = PPC::LHAX8;
988 else if (SourceIsXForm && !MIIs64Bit)
989 Opc = PPC::LHAX;
990 else if (MIIs64Bit)
991 Opc = PPC::LHA8;
992
993 addRegToUpdate(NarrowReg);
994 addRegToUpdate(MI.getOperand(0).getReg());
995
996 // We are removing a definition of NarrowReg which will cause
997 // problems in AliveBlocks. Add an implicit def that will be
998 // removed so that AliveBlocks are updated correctly.
999 addDummyDef(MBB, &MI, NarrowReg);
1000 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1001 LLVM_DEBUG(SrcMI->dump());
1002 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1003 LLVM_DEBUG(MI.dump());
1004 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1005 SrcMI->setDesc(TII->get(Opc));
1006 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1007 ToErase = &MI;
1008 Simplified = true;
1009 NumEliminatedSExt++;
1010 }
1011 break;
1012 }
1013 case PPC::EXTSW:
1014 case PPC::EXTSW_32:
1015 case PPC::EXTSW_32_64: {
1016 if (!EnableSExtElimination) break;
1017 Register NarrowReg = MI.getOperand(1).getReg();
1018 if (!NarrowReg.isVirtual())
1019 break;
1020
1021 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
1022 unsigned SrcOpcode = SrcMI->getOpcode();
1023 // If we've used a zero-extending load that we will sign-extend,
1024 // just do a sign-extending load.
1025 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
1026 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
1027 break;
1028
1029 // The transformation from a zero-extending load to a sign-extending
1030 // load is only legal when the displacement is a multiple of 4.
1031 // If the displacement is not at least 4 byte aligned, don't perform
1032 // the transformation.
1033 bool IsWordAligned = false;
1034 if (SrcMI->getOperand(1).isGlobal()) {
1035 const GlobalVariable *GV =
1037 if (GV && GV->getAlign() && *GV->getAlign() >= 4 &&
1038 (SrcMI->getOperand(1).getOffset() % 4 == 0))
1039 IsWordAligned = true;
1040 } else if (SrcMI->getOperand(1).isImm()) {
1041 int64_t Value = SrcMI->getOperand(1).getImm();
1042 if (Value % 4 == 0)
1043 IsWordAligned = true;
1044 }
1045
1046 // Determine the new opcode. We need to make sure that if the original
1047 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1048 // Likewise if the source is X-Form the new opcode should also be
1049 // X-Form.
1050 unsigned Opc = PPC::LWA_32;
1051 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1052 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1053 MI.getOpcode() == PPC::EXTSW_32_64;
1054
1055 if (SourceIsXForm && MIIs64Bit)
1056 Opc = PPC::LWAX;
1057 else if (SourceIsXForm && !MIIs64Bit)
1058 Opc = PPC::LWAX_32;
1059 else if (MIIs64Bit)
1060 Opc = PPC::LWA;
1061
1062 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1063 break;
1064
1065 addRegToUpdate(NarrowReg);
1066 addRegToUpdate(MI.getOperand(0).getReg());
1067
1068 // We are removing a definition of NarrowReg which will cause
1069 // problems in AliveBlocks. Add an implicit def that will be
1070 // removed so that AliveBlocks are updated correctly.
1071 addDummyDef(MBB, &MI, NarrowReg);
1072 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1073 LLVM_DEBUG(SrcMI->dump());
1074 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1075 LLVM_DEBUG(MI.dump());
1076 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1077 SrcMI->setDesc(TII->get(Opc));
1078 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
1079 ToErase = &MI;
1080 Simplified = true;
1081 NumEliminatedSExt++;
1082 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1083 TII->isSignExtended(NarrowReg, MRI)) {
1084 // We can eliminate EXTSW if the input is known to be already
1085 // sign-extended. However, we are not sure whether a spill will occur
1086 // during register allocation. If there is no promotion, it will use
1087 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1088 // since the register class is 32-bits. Consequently, the high 32-bit
1089 // information will be lost. Therefore, all these instructions in the
1090 // chain used to deduce sign extension to eliminate the 'extsw' will
1091 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1092 // is eliminated.
1093 TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
1094
1095 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1096 Register TmpReg =
1097 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
1098 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
1099 TmpReg);
1100 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
1101 MI.getOperand(0).getReg())
1102 .addReg(TmpReg)
1103 .addReg(NarrowReg)
1104 .addImm(PPC::sub_32);
1105 ToErase = &MI;
1106 Simplified = true;
1107 NumEliminatedSExt++;
1108 }
1109 break;
1110 }
1111 case PPC::RLDICL: {
1112 // We can eliminate RLDICL (e.g. for zero-extension)
1113 // if all bits to clear are already zero in the input.
1114 // This code assume following code sequence for zero-extension.
1115 // %6 = COPY %5:sub_32; (optional)
1116 // %8 = IMPLICIT_DEF;
1117 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1118 if (!EnableZExtElimination) break;
1119
1120 if (MI.getOperand(2).getImm() != 0)
1121 break;
1122
1123 Register SrcReg = MI.getOperand(1).getReg();
1124 if (!SrcReg.isVirtual())
1125 break;
1126
1127 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1128 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1129 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
1130 break;
1131
1132 MachineInstr *ImpDefMI, *SubRegMI;
1133 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
1134 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
1135 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1136
1137 SrcMI = SubRegMI;
1138 if (SubRegMI->getOpcode() == PPC::COPY) {
1139 Register CopyReg = SubRegMI->getOperand(1).getReg();
1140 if (CopyReg.isVirtual())
1141 SrcMI = MRI->getVRegDef(CopyReg);
1142 }
1143 if (!SrcMI->getOperand(0).isReg())
1144 break;
1145
1146 unsigned KnownZeroCount =
1147 getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
1148 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
1149 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1150 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1151 MI.getOperand(0).getReg())
1152 .addReg(SrcReg);
1153 addRegToUpdate(SrcReg);
1154 ToErase = &MI;
1155 Simplified = true;
1156 NumEliminatedZExt++;
1157 }
1158 break;
1159 }
1160
1161 // TODO: Any instruction that has an immediate form fed only by a PHI
1162 // whose operands are all load immediate can be folded away. We currently
1163 // do this for ADD instructions, but should expand it to arithmetic and
1164 // binary instructions with immediate forms in the future.
1165 case PPC::ADD4:
1166 case PPC::ADD8: {
1167 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1168 assert(PhiOp && "Invalid Operand!");
1169 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1170
1171 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1172 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
1173 };
1174
1175 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1176 MachineOperand *PhiOp) {
1177 assert(PhiOp && "Invalid Operand!");
1178 assert(DominatorOp && "Invalid Operand!");
1179 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
1180 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
1181
1182 // Note: the vregs only show up at odd indices position of PHI Node,
1183 // the even indices position save the BB info.
1184 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1185 MachineInstr *LiMI =
1186 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1187 if (!LiMI ||
1188 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1189 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
1190 !MDT->dominates(DefDomMI, LiMI))
1191 return false;
1192 }
1193
1194 return true;
1195 };
1196
1197 MachineOperand Op1 = MI.getOperand(1);
1198 MachineOperand Op2 = MI.getOperand(2);
1199 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1200 std::swap(Op1, Op2);
1201 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1202 break; // We don't have an ADD fed by LI's that can be transformed
1203
1204 // Now we know that Op1 is the PHI node and Op2 is the dominator
1205 Register DominatorReg = Op2.getReg();
1206
1207 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1208 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1209 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1210 MRI->setRegClass(DominatorReg, TRC);
1211
1212 // replace LIs with ADDIs
1213 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
1214 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1215 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
1216 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1217 LLVM_DEBUG(LiMI->dump());
1218
1219 // There could be repeated registers in the PHI, e.g: %1 =
1220 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1221 // already replaced the def instruction, skip.
1222 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1223 continue;
1224
1225 assert((LiMI->getOpcode() == PPC::LI ||
1226 LiMI->getOpcode() == PPC::LI8) &&
1227 "Invalid Opcode!");
1228 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
1229 LiMI->removeOperand(1); // remove the imm of LI
1230 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1231 : PPC::ADDI8));
1232 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1233 .addReg(DominatorReg)
1234 .addImm(LiImm); // restore the imm of LI
1235 LLVM_DEBUG(LiMI->dump());
1236 }
1237
1238 // Replace ADD with COPY
1239 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1240 LLVM_DEBUG(MI.dump());
1241 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
1242 MI.getOperand(0).getReg())
1243 .add(Op1);
1244 addRegToUpdate(Op1.getReg());
1245 addRegToUpdate(Op2.getReg());
1246 ToErase = &MI;
1247 Simplified = true;
1248 NumOptADDLIs++;
1249 break;
1250 }
1251 case PPC::RLDICR: {
1252 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1253 combineSEXTAndSHL(MI, ToErase);
1254 break;
1255 }
1256 case PPC::ANDI_rec:
1257 case PPC::ANDI8_rec:
1258 case PPC::ANDIS_rec:
1259 case PPC::ANDIS8_rec: {
1260 Register TrueReg =
1261 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
1262 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(TrueReg))
1263 break;
1264
1265 MachineInstr *SrcMI = MRI->getVRegDef(TrueReg);
1266 if (!SrcMI)
1267 break;
1268
1269 unsigned SrcOpCode = SrcMI->getOpcode();
1270 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1271 break;
1272
1273 Register SrcReg, DstReg;
1274 SrcReg = SrcMI->getOperand(1).getReg();
1275 DstReg = MI.getOperand(1).getReg();
1276 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(SrcReg);
1277 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
1278 if (DstRC != SrcRC)
1279 break;
1280
1281 uint64_t AndImm = MI.getOperand(2).getImm();
1282 if (MI.getOpcode() == PPC::ANDIS_rec ||
1283 MI.getOpcode() == PPC::ANDIS8_rec)
1284 AndImm <<= 16;
1285 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(AndImm);
1286 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(AndImm);
1287 uint64_t ImmSrc = SrcMI->getOperand(3).getImm();
1288
1289 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1290 // if all bits to AND are already zero in the input.
1291 bool PatternResultZero =
1292 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1293 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1294
1295 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1296 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1297 bool PatternRemoveRotate =
1298 SrcMI->getOperand(2).getImm() == 0 &&
1299 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1300 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1301
1302 if (!PatternResultZero && !PatternRemoveRotate)
1303 break;
1304
1305 LLVM_DEBUG(dbgs() << "Combining pair: ");
1306 LLVM_DEBUG(SrcMI->dump());
1307 LLVM_DEBUG(MI.dump());
1308 if (PatternResultZero)
1309 MI.getOperand(2).setImm(0);
1310 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1311 LLVM_DEBUG(dbgs() << "To: ");
1312 LLVM_DEBUG(MI.dump());
1313 addRegToUpdate(MI.getOperand(1).getReg());
1314 addRegToUpdate(SrcMI->getOperand(0).getReg());
1315 Simplified = true;
1316 break;
1317 }
1318 case PPC::RLWINM:
1319 case PPC::RLWINM_rec:
1320 case PPC::RLWINM8:
1321 case PPC::RLWINM8_rec: {
1322 // We might replace operand 1 of the instruction which will
1323 // require we recompute kill flags for it.
1324 Register OrigOp1Reg = MI.getOperand(1).isReg()
1325 ? MI.getOperand(1).getReg()
1326 : PPC::NoRegister;
1327 Simplified = TII->combineRLWINM(MI, &ToErase);
1328 if (Simplified) {
1329 addRegToUpdate(OrigOp1Reg);
1330 if (MI.getOperand(1).isReg())
1331 addRegToUpdate(MI.getOperand(1).getReg());
1332 if (ToErase && ToErase->getOperand(1).isReg())
1333 for (auto UseReg : ToErase->explicit_uses())
1334 if (UseReg.isReg())
1335 addRegToUpdate(UseReg.getReg());
1336 ++NumRotatesCollapsed;
1337 }
1338 break;
1339 }
1340 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1341 // always trap, we will delete the node if it will never trap.
1342 case PPC::TDI:
1343 case PPC::TWI:
1344 case PPC::TD:
1345 case PPC::TW: {
1346 if (!EnableTrapOptimization) break;
1347 MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
1348 MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
1349 bool IsOperand2Immediate = MI.getOperand(2).isImm();
1350 // We can only do the optimization if we can get immediates
1351 // from both operands
1352 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1353 LiMI1->getOpcode() == PPC::LI8)))
1354 break;
1355 if (!IsOperand2Immediate &&
1356 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1357 LiMI2->getOpcode() == PPC::LI8)))
1358 break;
1359
1360 auto ImmOperand0 = MI.getOperand(0).getImm();
1361 auto ImmOperand1 = LiMI1->getOperand(1).getImm();
1362 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
1363 : LiMI2->getOperand(1).getImm();
1364
1365 // We will replace the MI with an unconditional trap if it will always
1366 // trap.
1367 if ((ImmOperand0 == 31) ||
1368 ((ImmOperand0 & 0x10) &&
1369 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1370 ((ImmOperand0 & 0x8) &&
1371 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1372 ((ImmOperand0 & 0x2) &&
1373 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1374 ((ImmOperand0 & 0x1) &&
1375 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1376 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1377 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
1378 TrapOpt = true;
1379 }
1380 // We will delete the MI if it will never trap.
1381 ToErase = &MI;
1382 Simplified = true;
1383 break;
1384 }
1385 }
1386 }
1387
1388 // If the last instruction was marked for elimination,
1389 // remove it now.
1390 if (ToErase) {
1391 recomputeLVForDyingInstr();
1392 ToErase->eraseFromParent();
1393 ToErase = nullptr;
1394 }
1395 // Reset TrapOpt to false at the end of the basic block.
1397 TrapOpt = false;
1398 }
1399
1400 // Eliminate all the TOC save instructions which are redundant.
1401 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1402 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1403 if (FI->mustSaveTOC())
1404 NumTOCSavesInPrologue++;
1405
1406 // We try to eliminate redundant compare instruction.
1407 Simplified |= eliminateRedundantCompare();
1408
1409 // If we have made any modifications and added any registers to the set of
1410 // registers for which we need to update the kill flags, do so by recomputing
1411 // LiveVariables for those registers.
1412 for (Register Reg : RegsToUpdate) {
1413 if (!MRI->reg_empty(Reg))
1415 }
1416 return Simplified;
1417}
1418
1419// helper functions for eliminateRedundantCompare
1420static bool isEqOrNe(MachineInstr *BI) {
1422 unsigned PredCond = PPC::getPredicateCondition(Pred);
1423 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1424}
1425
1426static bool isSupportedCmpOp(unsigned opCode) {
1427 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1428 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1429 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1430 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1431}
1432
1433static bool is64bitCmpOp(unsigned opCode) {
1434 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1435 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1436}
1437
1438static bool isSignedCmpOp(unsigned opCode) {
1439 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1440 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1441}
1442
1443static unsigned getSignedCmpOpCode(unsigned opCode) {
1444 if (opCode == PPC::CMPLD) return PPC::CMPD;
1445 if (opCode == PPC::CMPLW) return PPC::CMPW;
1446 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1447 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1448 return opCode;
1449}
1450
1451// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1452// (LT x) to (LE x-1)
1453static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1454 uint64_t Imm = CMPI->getOperand(2).getImm();
1455 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1456 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1457 return 0;
1458
1460 unsigned PredCond = PPC::getPredicateCondition(Pred);
1461 unsigned PredHint = PPC::getPredicateHint(Pred);
1462 if (PredCond == PPC::PRED_GE)
1463 return PPC::getPredicate(PPC::PRED_GT, PredHint);
1464 if (PredCond == PPC::PRED_LT)
1465 return PPC::getPredicate(PPC::PRED_LE, PredHint);
1466
1467 return 0;
1468}
1469
1470// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1471// (LE x) to (LT x+1)
1472static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1473 uint64_t Imm = CMPI->getOperand(2).getImm();
1474 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1475 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1476 return 0;
1477
1479 unsigned PredCond = PPC::getPredicateCondition(Pred);
1480 unsigned PredHint = PPC::getPredicateHint(Pred);
1481 if (PredCond == PPC::PRED_GT)
1482 return PPC::getPredicate(PPC::PRED_GE, PredHint);
1483 if (PredCond == PPC::PRED_LE)
1484 return PPC::getPredicate(PPC::PRED_LT, PredHint);
1485
1486 return 0;
1487}
1488
1489// This takes a Phi node and returns a register value for the specified BB.
1490static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1491 MachineBasicBlock *MBB) {
1492 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1493 MachineOperand &MO = Phi->getOperand(I);
1494 if (MO.getMBB() == MBB)
1495 return Phi->getOperand(I-1).getReg();
1496 }
1497 llvm_unreachable("invalid src basic block for this Phi node\n");
1498 return 0;
1499}
1500
1501// This function tracks the source of the register through register copy.
1502// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1503// assuming that the control comes from BB1 into BB2.
1504static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1505 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1506 unsigned SrcReg = Reg;
1507 while (true) {
1508 unsigned NextReg = SrcReg;
1509 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1510 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1511 NextReg = getIncomingRegForBlock(Inst, BB1);
1512 // We track through PHI only once to avoid infinite loop.
1513 BB1 = nullptr;
1514 }
1515 else if (Inst->isFullCopy())
1516 NextReg = Inst->getOperand(1).getReg();
1517 if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1518 break;
1519 SrcReg = NextReg;
1520 }
1521 return SrcReg;
1522}
1523
1524static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1525 MachineBasicBlock *&PredMBB,
1526 MachineBasicBlock *&MBBtoMoveCmp,
1527 MachineRegisterInfo *MRI) {
1528
1529 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1530 auto BII = BB.getFirstInstrTerminator();
1531 // We optimize BBs ending with a conditional branch.
1532 // We check only for BCC here, not BCCLR, because BCCLR
1533 // will be formed only later in the pipeline.
1534 if (BB.succ_size() == 2 &&
1535 BII != BB.instr_end() &&
1536 (*BII).getOpcode() == PPC::BCC &&
1537 (*BII).getOperand(1).isReg()) {
1538 // We optimize only if the condition code is used only by one BCC.
1539 Register CndReg = (*BII).getOperand(1).getReg();
1540 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(CndReg))
1541 return false;
1542
1543 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1544 // We assume compare and branch are in the same BB for ease of analysis.
1545 if (CMPI->getParent() != &BB)
1546 return false;
1547
1548 // We skip this BB if a physical register is used in comparison.
1549 for (MachineOperand &MO : CMPI->operands())
1550 if (MO.isReg() && !MO.getReg().isVirtual())
1551 return false;
1552
1553 return true;
1554 }
1555 return false;
1556 };
1557
1558 // If this BB has more than one successor, we can create a new BB and
1559 // move the compare instruction in the new BB.
1560 // So far, we do not move compare instruction to a BB having multiple
1561 // successors to avoid potentially increasing code size.
1562 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1563 return BB.succ_size() == 1;
1564 };
1565
1566 if (!isEligibleBB(MBB))
1567 return false;
1568
1569 unsigned NumPredBBs = MBB.pred_size();
1570 if (NumPredBBs == 1) {
1571 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1572 if (isEligibleBB(*TmpMBB)) {
1573 PredMBB = TmpMBB;
1574 MBBtoMoveCmp = nullptr;
1575 return true;
1576 }
1577 }
1578 else if (NumPredBBs == 2) {
1579 // We check for partially redundant case.
1580 // So far, we support cases with only two predecessors
1581 // to avoid increasing the number of instructions.
1583 MachineBasicBlock *Pred1MBB = *PI;
1584 MachineBasicBlock *Pred2MBB = *(PI+1);
1585
1586 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1587 // We assume Pred1MBB is the BB containing the compare to be merged and
1588 // Pred2MBB is the BB to which we will append a compare instruction.
1589 // Proceed as is if Pred1MBB is different from MBB.
1590 }
1591 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1592 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1593 std::swap(Pred1MBB, Pred2MBB);
1594 }
1595 else return false;
1596
1597 if (Pred1MBB == &MBB)
1598 return false;
1599
1600 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1601 // We cannot move the compare instruction if operands are not available
1602 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1603 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1604 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1605 for (int I = 1; I <= 2; I++)
1606 if (CMPI->getOperand(I).isReg()) {
1607 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1608 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1609 return false;
1610 }
1611
1612 PredMBB = Pred1MBB;
1613 MBBtoMoveCmp = Pred2MBB;
1614 return true;
1615 }
1616
1617 return false;
1618}
1619
1620// This function will iterate over the input map containing a pair of TOC save
1621// instruction and a flag. The flag will be set to false if the TOC save is
1622// proven redundant. This function will erase from the basic block all the TOC
1623// saves marked as redundant.
1624bool PPCMIPeephole::eliminateRedundantTOCSaves(
1625 std::map<MachineInstr *, bool> &TOCSaves) {
1626 bool Simplified = false;
1627 int NumKept = 0;
1628 for (auto TOCSave : TOCSaves) {
1629 if (!TOCSave.second) {
1630 TOCSave.first->eraseFromParent();
1631 RemoveTOCSave++;
1632 Simplified = true;
1633 } else {
1634 NumKept++;
1635 }
1636 }
1637
1638 if (NumKept > 1)
1639 MultiTOCSaves++;
1640
1641 return Simplified;
1642}
1643
1644// If multiple conditional branches are executed based on the (essentially)
1645// same comparison, we merge compare instructions into one and make multiple
1646// conditional branches on this comparison.
1647// For example,
1648// if (a == 0) { ... }
1649// else if (a < 0) { ... }
1650// can be executed by one compare and two conditional branches instead of
1651// two pairs of a compare and a conditional branch.
1652//
1653// This method merges two compare instructions in two MBBs and modifies the
1654// compare and conditional branch instructions if needed.
1655// For the above example, the input for this pass looks like:
1656// cmplwi r3, 0
1657// beq 0, .LBB0_3
1658// cmpwi r3, -1
1659// bgt 0, .LBB0_4
1660// So, before merging two compares, we need to modify these instructions as
1661// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1662// beq 0, .LBB0_3
1663// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1664// bge 0, .LBB0_4
1665
1666bool PPCMIPeephole::eliminateRedundantCompare() {
1667 bool Simplified = false;
1668
1669 for (MachineBasicBlock &MBB2 : *MF) {
1670 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1671
1672 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1673 // as an optimization target if
1674 // - both MBBs end with a conditional branch,
1675 // - MBB1 is the only predecessor of MBB2, and
1676 // - compare does not take a physical register as a operand in both MBBs.
1677 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1678 //
1679 // As partially redundant case, we additionally handle if MBB2 has one
1680 // additional predecessor, which has only one successor (MBB2).
1681 // In this case, we move the compare instruction originally in MBB2 into
1682 // MBBtoMoveCmp. This partially redundant case is typically appear by
1683 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1684 //
1685 // Overview of CFG of related basic blocks
1686 // Fully redundant case Partially redundant case
1687 // -------- ---------------- --------
1688 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1689 // -------- ---------------- --------
1690 // | \ (w/ 1 succ) \ | \
1691 // | \ \ | \
1692 // | \ |
1693 // -------- --------
1694 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1695 // -------- and 2 succ) -------- and 2 succ)
1696 // | \ | \
1697 // | \ | \
1698 //
1699 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1700 continue;
1701
1702 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1703 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1704
1705 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1706 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1707 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1708
1709 // We cannot optimize an unsupported compare opcode or
1710 // a mix of 32-bit and 64-bit comparisons
1711 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1712 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1713 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1714 continue;
1715
1716 unsigned NewOpCode = 0;
1717 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1718 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1719 bool SwapOperands = false;
1720
1721 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1722 // Typically, unsigned comparison is used for equality check, but
1723 // we replace it with a signed comparison if the comparison
1724 // to be merged is a signed comparison.
1725 // In other cases of opcode mismatch, we cannot optimize this.
1726
1727 // We cannot change opcode when comparing against an immediate
1728 // if the most significant bit of the immediate is one
1729 // due to the difference in sign extension.
1730 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1731 if (!I->getOperand(2).isImm())
1732 return false;
1733 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1734 return Imm < 0;
1735 };
1736
1737 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1738 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1739 NewOpCode = CMPI1->getOpcode();
1740 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1741 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1742 NewOpCode = CMPI2->getOpcode();
1743 else continue;
1744 }
1745
1746 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1747 // In case of comparisons between two registers, these two registers
1748 // must be same to merge two comparisons.
1749 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1750 nullptr, nullptr, MRI);
1751 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1752 nullptr, nullptr, MRI);
1753 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1754 MBB1, &MBB2, MRI);
1755 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1756 MBB1, &MBB2, MRI);
1757
1758 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1759 // Same pair of registers in the same order; ready to merge as is.
1760 }
1761 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1762 // Same pair of registers in different order.
1763 // We reverse the predicate to merge compare instructions.
1765 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1766 // In case of partial redundancy, we need to swap operands
1767 // in another compare instruction.
1768 SwapOperands = true;
1769 }
1770 else continue;
1771 }
1772 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1773 // In case of comparisons between a register and an immediate,
1774 // the operand register must be same for two compare instructions.
1775 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1776 nullptr, nullptr, MRI);
1777 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1778 MBB1, &MBB2, MRI);
1779 if (Cmp1Operand1 != Cmp2Operand1)
1780 continue;
1781
1782 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1783 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1784
1785 // If immediate are not same, we try to adjust by changing predicate;
1786 // e.g. GT imm means GE (imm+1).
1787 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1788 int Diff = Imm1 - Imm2;
1789 if (Diff < -2 || Diff > 2)
1790 continue;
1791
1792 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1793 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1794 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1795 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1796 if (Diff == 2) {
1797 if (PredToInc2 && PredToDec1) {
1798 NewPredicate2 = PredToInc2;
1799 NewPredicate1 = PredToDec1;
1800 NewImm2++;
1801 NewImm1--;
1802 }
1803 }
1804 else if (Diff == 1) {
1805 if (PredToInc2) {
1806 NewImm2++;
1807 NewPredicate2 = PredToInc2;
1808 }
1809 else if (PredToDec1) {
1810 NewImm1--;
1811 NewPredicate1 = PredToDec1;
1812 }
1813 }
1814 else if (Diff == -1) {
1815 if (PredToDec2) {
1816 NewImm2--;
1817 NewPredicate2 = PredToDec2;
1818 }
1819 else if (PredToInc1) {
1820 NewImm1++;
1821 NewPredicate1 = PredToInc1;
1822 }
1823 }
1824 else if (Diff == -2) {
1825 if (PredToDec2 && PredToInc1) {
1826 NewPredicate2 = PredToDec2;
1827 NewPredicate1 = PredToInc1;
1828 NewImm2--;
1829 NewImm1++;
1830 }
1831 }
1832 }
1833
1834 // We cannot merge two compares if the immediates are not same.
1835 if (NewImm2 != NewImm1)
1836 continue;
1837 }
1838
1839 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1840 LLVM_DEBUG(CMPI1->dump());
1841 LLVM_DEBUG(BI1->dump());
1842 LLVM_DEBUG(CMPI2->dump());
1843 LLVM_DEBUG(BI2->dump());
1844 for (const MachineOperand &MO : CMPI1->operands())
1845 if (MO.isReg())
1846 addRegToUpdate(MO.getReg());
1847 for (const MachineOperand &MO : CMPI2->operands())
1848 if (MO.isReg())
1849 addRegToUpdate(MO.getReg());
1850
1851 // We adjust opcode, predicates and immediate as we determined above.
1852 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1853 CMPI1->setDesc(TII->get(NewOpCode));
1854 }
1855 if (NewPredicate1) {
1856 BI1->getOperand(0).setImm(NewPredicate1);
1857 }
1858 if (NewPredicate2) {
1859 BI2->getOperand(0).setImm(NewPredicate2);
1860 }
1861 if (NewImm1 != Imm1) {
1862 CMPI1->getOperand(2).setImm(NewImm1);
1863 }
1864
1865 if (IsPartiallyRedundant) {
1866 // We touch up the compare instruction in MBB2 and move it to
1867 // a previous BB to handle partially redundant case.
1868 if (SwapOperands) {
1869 Register Op1 = CMPI2->getOperand(1).getReg();
1870 Register Op2 = CMPI2->getOperand(2).getReg();
1871 CMPI2->getOperand(1).setReg(Op2);
1872 CMPI2->getOperand(2).setReg(Op1);
1873 }
1874 if (NewImm2 != Imm2)
1875 CMPI2->getOperand(2).setImm(NewImm2);
1876
1877 for (int I = 1; I <= 2; I++) {
1878 if (CMPI2->getOperand(I).isReg()) {
1879 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1880 if (Inst->getParent() != &MBB2)
1881 continue;
1882
1883 assert(Inst->getOpcode() == PPC::PHI &&
1884 "We cannot support if an operand comes from this BB.");
1885 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1886 CMPI2->getOperand(I).setReg(SrcReg);
1887 addRegToUpdate(SrcReg);
1888 }
1889 }
1890 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1891 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1892
1893 DebugLoc DL = CMPI2->getDebugLoc();
1894 Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1895 BuildMI(MBB2, MBB2.begin(), DL,
1896 TII->get(PPC::PHI), NewVReg)
1897 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1898 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1899 BI2->getOperand(1).setReg(NewVReg);
1900 addRegToUpdate(NewVReg);
1901 }
1902 else {
1903 // We finally eliminate compare instruction in MBB2.
1904 // We do not need to treat CMPI2 specially here in terms of re-computing
1905 // live variables even though it is being deleted because:
1906 // - It defines a register that has a single use (already checked in
1907 // eligibleForCompareElimination())
1908 // - The only user (BI2) is no longer using it so the register is dead (no
1909 // def, no uses)
1910 // - We do not attempt to recompute live variables for dead registers
1911 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1912 CMPI2->eraseFromParent();
1913 }
1914
1915 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1916 LLVM_DEBUG(CMPI1->dump());
1917 LLVM_DEBUG(BI1->dump());
1918 LLVM_DEBUG(BI2->dump());
1919 if (IsPartiallyRedundant) {
1920 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1921 << printMBBReference(*MBBtoMoveCmp)
1922 << " to handle partial redundancy.\n");
1923 LLVM_DEBUG(CMPI2->dump());
1924 }
1925 Simplified = true;
1926 }
1927
1928 return Simplified;
1929}
1930
1931// We miss the opportunity to emit an RLDIC when lowering jump tables
1932// since ISEL sees only a single basic block. When selecting, the clear
1933// and shift left will be in different blocks.
1934bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1935 MachineInstr *&ToErase) {
1936 if (MI.getOpcode() != PPC::RLDICR)
1937 return false;
1938
1939 Register SrcReg = MI.getOperand(1).getReg();
1940 if (!SrcReg.isVirtual())
1941 return false;
1942
1943 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1944 if (SrcMI->getOpcode() != PPC::RLDICL)
1945 return false;
1946
1947 MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1948 MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1949 MachineOperand MOpSHMI = MI.getOperand(2);
1950 MachineOperand MOpMEMI = MI.getOperand(3);
1951 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1952 MOpMEMI.isImm()))
1953 return false;
1954
1955 uint64_t SHSrc = MOpSHSrc.getImm();
1956 uint64_t MBSrc = MOpMBSrc.getImm();
1957 uint64_t SHMI = MOpSHMI.getImm();
1958 uint64_t MEMI = MOpMEMI.getImm();
1959 uint64_t NewSH = SHSrc + SHMI;
1960 uint64_t NewMB = MBSrc - SHMI;
1961 if (NewMB > 63 || NewSH > 63)
1962 return false;
1963
1964 // The bits cleared with RLDICL are [0, MBSrc).
1965 // The bits cleared with RLDICR are (MEMI, 63].
1966 // After the sequence, the bits cleared are:
1967 // [0, MBSrc-SHMI) and (MEMI, 63).
1968 //
1969 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1970 if ((63 - NewSH) != MEMI)
1971 return false;
1972
1973 LLVM_DEBUG(dbgs() << "Converting pair: ");
1974 LLVM_DEBUG(SrcMI->dump());
1975 LLVM_DEBUG(MI.dump());
1976
1977 MI.setDesc(TII->get(PPC::RLDIC));
1978 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1979 MI.getOperand(2).setImm(NewSH);
1980 MI.getOperand(3).setImm(NewMB);
1981 addRegToUpdate(MI.getOperand(1).getReg());
1982 addRegToUpdate(SrcMI->getOperand(0).getReg());
1983
1984 LLVM_DEBUG(dbgs() << "To: ");
1985 LLVM_DEBUG(MI.dump());
1986 NumRotatesCollapsed++;
1987 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1988 if (MRI->use_nodbg_empty(SrcReg)) {
1989 assert(!SrcMI->hasImplicitDef() &&
1990 "Not expecting an implicit def with this instr.");
1991 ToErase = SrcMI;
1992 }
1993 return true;
1994}
1995
1996// For case in LLVM IR
1997// entry:
1998// %iconv = sext i32 %index to i64
1999// br i1 undef label %true, label %false
2000// true:
2001// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
2002// ...
2003// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
2004// different BBs when conducting instruction selection. We can do a peephole
2005// optimization to combine these two instructions into extswsli after
2006// instruction selection.
2007bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
2008 MachineInstr *&ToErase) {
2009 if (MI.getOpcode() != PPC::RLDICR)
2010 return false;
2011
2012 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
2013 return false;
2014
2015 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
2016
2017 MachineOperand MOpSHMI = MI.getOperand(2);
2018 MachineOperand MOpMEMI = MI.getOperand(3);
2019 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
2020 return false;
2021
2022 uint64_t SHMI = MOpSHMI.getImm();
2023 uint64_t MEMI = MOpMEMI.getImm();
2024 if (SHMI + MEMI != 63)
2025 return false;
2026
2027 Register SrcReg = MI.getOperand(1).getReg();
2028 if (!SrcReg.isVirtual())
2029 return false;
2030
2031 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
2032 if (SrcMI->getOpcode() != PPC::EXTSW &&
2033 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2034 return false;
2035
2036 // If the register defined by extsw has more than one use, combination is not
2037 // needed.
2038 if (!MRI->hasOneNonDBGUse(SrcReg))
2039 return false;
2040
2041 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2042 assert(SrcMI->getOperand(1).isReg() &&
2043 "EXTSW's second operand should be a register");
2044 if (!SrcMI->getOperand(1).getReg().isVirtual())
2045 return false;
2046
2047 LLVM_DEBUG(dbgs() << "Combining pair: ");
2048 LLVM_DEBUG(SrcMI->dump());
2049 LLVM_DEBUG(MI.dump());
2050
2051 MachineInstr *NewInstr =
2052 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
2053 SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
2054 : TII->get(PPC::EXTSWSLI_32_64),
2055 MI.getOperand(0).getReg())
2056 .add(SrcMI->getOperand(1))
2057 .add(MOpSHMI);
2058 (void)NewInstr;
2059
2060 LLVM_DEBUG(dbgs() << "TO: ");
2061 LLVM_DEBUG(NewInstr->dump());
2062 ++NumEXTSWAndSLDICombined;
2063 ToErase = &MI;
2064 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2065 // can recompute its kill flags. We run DCE immediately after this pass
2066 // to clean up dead instructions such as this.
2067 addRegToUpdate(NewInstr->getOperand(1).getReg());
2068 addRegToUpdate(SrcMI->getOperand(0).getReg());
2069 return true;
2070}
2071
2072} // end default namespace
2073
2075 "PowerPC MI Peephole Optimization", false, false)
2081 "PowerPC MI Peephole Optimization", false, false)
2082
2083char PPCMIPeephole::ID = 0;
2085llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
#define DEBUG_TYPE
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define addRegToUpdate(R)
static cl::opt< bool > EnableZExtElimination("ppc-eliminate-zeroext", cl::desc("enable elimination of zero-extensions"), cl::init(true), cl::Hidden)
static cl::opt< bool > FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), cl::desc("Iterate to a fixed point when attempting to " "convert reg-reg instructions to reg-imm"))
static cl::opt< bool > EnableTrapOptimization("ppc-opt-conditional-trap", cl::desc("enable optimization of conditional traps"), cl::init(false), cl::Hidden)
static cl::opt< bool > ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), cl::desc("Convert eligible reg+reg instructions to reg+imm"))
static cl::opt< bool > EnableSExtElimination("ppc-eliminate-signext", cl::desc("enable elimination of sign-extensions"), cl::init(true), cl::Hidden)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool shouldExecute(CounterInfo &Counter)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
MaybeAlign getAlign() const
Returns the alignment of the given variable.
const HexagonRegisterInfo & getRegisterInfo() const
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool isFullCopy() const
mop_range operands()
mop_range explicit_uses()
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
void push_back(const T &Elt)
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
@ Keep
No function return thunk.
Definition CodeGen.h:162
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
FunctionPass * createPPCMIPeepholePass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872