LLVM 19.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (P) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
742 bool DoRegPressureReduce) const {
743 // Using the machine combiner in this way is potentially expensive, so
744 // restrict to when aggressive optimizations are desired.
746 return false;
747
748 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
749 return true;
750
752 DoRegPressureReduce);
753}
754
759 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
760 switch (Pattern) {
765 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
766 break;
767 default:
768 // Reassociate default patterns.
770 DelInstrs, InstrIdxForVirtReg);
771 break;
772 }
773}
774
775void PPCInstrInfo::reassociateFMA(
779 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
780 MachineFunction *MF = Root.getMF();
783 MachineOperand &OpC = Root.getOperand(0);
784 Register RegC = OpC.getReg();
785 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
786 MRI.constrainRegClass(RegC, RC);
787
788 unsigned FmaOp = Root.getOpcode();
789 int16_t Idx = getFMAOpIdxInfo(FmaOp);
790 assert(Idx >= 0 && "Root must be a FMA instruction");
791
792 bool IsILPReassociate =
795
798
799 MachineInstr *Prev = nullptr;
800 MachineInstr *Leaf = nullptr;
801 switch (Pattern) {
802 default:
803 llvm_unreachable("not recognized pattern!");
806 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
807 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
808 break;
810 Register MULReg =
811 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
812 Leaf = MRI.getVRegDef(MULReg);
813 break;
814 }
816 Register MULReg = TRI->lookThruCopyLike(
817 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
818 Leaf = MRI.getVRegDef(MULReg);
819 break;
820 }
821 }
822
823 uint32_t IntersectedFlags = 0;
824 if (IsILPReassociate)
825 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
826 else
827 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
828
829 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
830 bool &KillFlag) {
831 Reg = Operand.getReg();
832 MRI.constrainRegClass(Reg, RC);
833 KillFlag = Operand.isKill();
834 };
835
836 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
837 Register &MulOp2, Register &AddOp,
838 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
839 bool &AddOpKillFlag) {
840 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
841 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
842 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
843 };
844
845 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
846 RegA21, RegB;
847 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
848 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
849 KillA11 = false, KillA21 = false, KillB = false;
850
851 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
852
853 if (IsILPReassociate)
854 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
855
857 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
858 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
860 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
861 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
862 } else {
863 // Get FSUB instruction info.
864 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
865 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
866 }
867
868 // Create new virtual registers for the new results instead of
869 // recycling legacy ones because the MachineCombiner's computation of the
870 // critical path requires a new register definition rather than an existing
871 // one.
872 // For register pressure reassociation, we only need create one virtual
873 // register for the new fma.
874 Register NewVRA = MRI.createVirtualRegister(RC);
875 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
876
877 Register NewVRB = 0;
878 if (IsILPReassociate) {
879 NewVRB = MRI.createVirtualRegister(RC);
880 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
881 }
882
883 Register NewVRD = 0;
885 NewVRD = MRI.createVirtualRegister(RC);
886 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
887 }
888
889 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
890 Register RegMul1, bool KillRegMul1,
891 Register RegMul2, bool KillRegMul2) {
892 MI->getOperand(AddOpIdx).setReg(RegAdd);
893 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
894 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
895 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
896 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
897 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
898 };
899
900 MachineInstrBuilder NewARegPressure, NewCRegPressure;
901 switch (Pattern) {
902 default:
903 llvm_unreachable("not recognized pattern!");
905 // Create new instructions for insertion.
906 MachineInstrBuilder MINewB =
907 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
908 .addReg(RegX, getKillRegState(KillX))
909 .addReg(RegM21, getKillRegState(KillM21))
910 .addReg(RegM22, getKillRegState(KillM22));
911 MachineInstrBuilder MINewA =
912 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
913 .addReg(RegY, getKillRegState(KillY))
914 .addReg(RegM31, getKillRegState(KillM31))
915 .addReg(RegM32, getKillRegState(KillM32));
916 // If AddOpIdx is not 1, adjust the order.
917 if (AddOpIdx != 1) {
918 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
919 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
920 }
921
922 MachineInstrBuilder MINewC =
923 BuildMI(*MF, Root.getDebugLoc(),
925 .addReg(NewVRB, getKillRegState(true))
926 .addReg(NewVRA, getKillRegState(true));
927
928 // Update flags for newly created instructions.
929 setSpecialOperandAttr(*MINewA, IntersectedFlags);
930 setSpecialOperandAttr(*MINewB, IntersectedFlags);
931 setSpecialOperandAttr(*MINewC, IntersectedFlags);
932
933 // Record new instructions for insertion.
934 InsInstrs.push_back(MINewA);
935 InsInstrs.push_back(MINewB);
936 InsInstrs.push_back(MINewC);
937 break;
938 }
940 assert(NewVRD && "new FMA register not created!");
941 // Create new instructions for insertion.
942 MachineInstrBuilder MINewA =
943 BuildMI(*MF, Leaf->getDebugLoc(),
945 .addReg(RegM11, getKillRegState(KillM11))
946 .addReg(RegM12, getKillRegState(KillM12));
947 MachineInstrBuilder MINewB =
948 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
949 .addReg(RegX, getKillRegState(KillX))
950 .addReg(RegM21, getKillRegState(KillM21))
951 .addReg(RegM22, getKillRegState(KillM22));
952 MachineInstrBuilder MINewD =
953 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
954 .addReg(NewVRA, getKillRegState(true))
955 .addReg(RegM31, getKillRegState(KillM31))
956 .addReg(RegM32, getKillRegState(KillM32));
957 // If AddOpIdx is not 1, adjust the order.
958 if (AddOpIdx != 1) {
959 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
960 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
961 KillM32);
962 }
963
964 MachineInstrBuilder MINewC =
965 BuildMI(*MF, Root.getDebugLoc(),
967 .addReg(NewVRB, getKillRegState(true))
968 .addReg(NewVRD, getKillRegState(true));
969
970 // Update flags for newly created instructions.
971 setSpecialOperandAttr(*MINewA, IntersectedFlags);
972 setSpecialOperandAttr(*MINewB, IntersectedFlags);
973 setSpecialOperandAttr(*MINewD, IntersectedFlags);
974 setSpecialOperandAttr(*MINewC, IntersectedFlags);
975
976 // Record new instructions for insertion.
977 InsInstrs.push_back(MINewA);
978 InsInstrs.push_back(MINewB);
979 InsInstrs.push_back(MINewD);
980 InsInstrs.push_back(MINewC);
981 break;
982 }
985 Register VarReg;
986 bool KillVarReg = false;
988 VarReg = RegM31;
989 KillVarReg = KillM31;
990 } else {
991 VarReg = RegM32;
992 KillVarReg = KillM32;
993 }
994 // We don't want to get negative const from memory pool too early, as the
995 // created entry will not be deleted even if it has no users. Since all
996 // operand of Leaf and Root are virtual register, we use zero register
997 // here as a placeholder. When the InsInstrs is selected in
998 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
999 // with a virtual register which is a load from constant pool.
1000 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1001 .addReg(RegB, getKillRegState(RegB))
1002 .addReg(RegY, getKillRegState(KillY))
1003 .addReg(PPC::ZERO8);
1004 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1005 .addReg(NewVRA, getKillRegState(true))
1006 .addReg(RegX, getKillRegState(KillX))
1007 .addReg(VarReg, getKillRegState(KillVarReg));
1008 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1009 // both at index 1, no need to adjust.
1010 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1011 // the operand index here.
1012 break;
1013 }
1014 }
1015
1016 if (!IsILPReassociate) {
1017 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1018 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1019
1020 InsInstrs.push_back(NewARegPressure);
1021 InsInstrs.push_back(NewCRegPressure);
1022 }
1023
1024 assert(!InsInstrs.empty() &&
1025 "Insertion instructions set should not be empty!");
1026
1027 // Record old instructions for deletion.
1028 DelInstrs.push_back(Leaf);
1029 if (IsILPReassociate)
1030 DelInstrs.push_back(Prev);
1031 DelInstrs.push_back(&Root);
1032}
1033
1034// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1036 Register &SrcReg, Register &DstReg,
1037 unsigned &SubIdx) const {
1038 switch (MI.getOpcode()) {
1039 default: return false;
1040 case PPC::EXTSW:
1041 case PPC::EXTSW_32:
1042 case PPC::EXTSW_32_64:
1043 SrcReg = MI.getOperand(1).getReg();
1044 DstReg = MI.getOperand(0).getReg();
1045 SubIdx = PPC::sub_32;
1046 return true;
1047 }
1048}
1049
1051 int &FrameIndex) const {
1052 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1053 // Check for the operands added by addFrameReference (the immediate is the
1054 // offset which defaults to 0).
1055 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1056 MI.getOperand(2).isFI()) {
1057 FrameIndex = MI.getOperand(2).getIndex();
1058 return MI.getOperand(0).getReg();
1059 }
1060 }
1061 return 0;
1062}
1063
1064// For opcodes with the ReMaterializable flag set, this function is called to
1065// verify the instruction is really rematable.
1067 const MachineInstr &MI) const {
1068 switch (MI.getOpcode()) {
1069 default:
1070 // Let base implementaion decide.
1071 break;
1072 case PPC::LI:
1073 case PPC::LI8:
1074 case PPC::PLI:
1075 case PPC::PLI8:
1076 case PPC::LIS:
1077 case PPC::LIS8:
1078 case PPC::ADDIStocHA:
1079 case PPC::ADDIStocHA8:
1080 case PPC::ADDItocL:
1081 case PPC::LOAD_STACK_GUARD:
1082 case PPC::PPCLdFixedAddr:
1083 case PPC::XXLXORz:
1084 case PPC::XXLXORspz:
1085 case PPC::XXLXORdpz:
1086 case PPC::XXLEQVOnes:
1087 case PPC::XXSPLTI32DX:
1088 case PPC::XXSPLTIW:
1089 case PPC::XXSPLTIDP:
1090 case PPC::V_SET0B:
1091 case PPC::V_SET0H:
1092 case PPC::V_SET0:
1093 case PPC::V_SETALLONESB:
1094 case PPC::V_SETALLONESH:
1095 case PPC::V_SETALLONES:
1096 case PPC::CRSET:
1097 case PPC::CRUNSET:
1098 case PPC::XXSETACCZ:
1099 case PPC::XXSETACCZW:
1100 return true;
1101 }
1103}
1104
1106 int &FrameIndex) const {
1107 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1108 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1109 MI.getOperand(2).isFI()) {
1110 FrameIndex = MI.getOperand(2).getIndex();
1111 return MI.getOperand(0).getReg();
1112 }
1113 }
1114 return 0;
1115}
1116
1118 unsigned OpIdx1,
1119 unsigned OpIdx2) const {
1120 MachineFunction &MF = *MI.getParent()->getParent();
1121
1122 // Normal instructions can be commuted the obvious way.
1123 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1124 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1125 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1126 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1127 // changing the relative order of the mask operands might change what happens
1128 // to the high-bits of the mask (and, thus, the result).
1129
1130 // Cannot commute if it has a non-zero rotate count.
1131 if (MI.getOperand(3).getImm() != 0)
1132 return nullptr;
1133
1134 // If we have a zero rotate count, we have:
1135 // M = mask(MB,ME)
1136 // Op0 = (Op1 & ~M) | (Op2 & M)
1137 // Change this to:
1138 // M = mask((ME+1)&31, (MB-1)&31)
1139 // Op0 = (Op2 & ~M) | (Op1 & M)
1140
1141 // Swap op1/op2
1142 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1143 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1144 Register Reg0 = MI.getOperand(0).getReg();
1145 Register Reg1 = MI.getOperand(1).getReg();
1146 Register Reg2 = MI.getOperand(2).getReg();
1147 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1148 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1149 bool Reg1IsKill = MI.getOperand(1).isKill();
1150 bool Reg2IsKill = MI.getOperand(2).isKill();
1151 bool ChangeReg0 = false;
1152 // If machine instrs are no longer in two-address forms, update
1153 // destination register as well.
1154 if (Reg0 == Reg1) {
1155 // Must be two address instruction (i.e. op1 is tied to op0).
1156 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1157 "Expecting a two-address instruction!");
1158 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1159 Reg2IsKill = false;
1160 ChangeReg0 = true;
1161 }
1162
1163 // Masks.
1164 unsigned MB = MI.getOperand(4).getImm();
1165 unsigned ME = MI.getOperand(5).getImm();
1166
1167 // We can't commute a trivial mask (there is no way to represent an all-zero
1168 // mask).
1169 if (MB == 0 && ME == 31)
1170 return nullptr;
1171
1172 if (NewMI) {
1173 // Create a new instruction.
1174 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1175 bool Reg0IsDead = MI.getOperand(0).isDead();
1176 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1177 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1178 .addReg(Reg2, getKillRegState(Reg2IsKill))
1179 .addReg(Reg1, getKillRegState(Reg1IsKill))
1180 .addImm((ME + 1) & 31)
1181 .addImm((MB - 1) & 31);
1182 }
1183
1184 if (ChangeReg0) {
1185 MI.getOperand(0).setReg(Reg2);
1186 MI.getOperand(0).setSubReg(SubReg2);
1187 }
1188 MI.getOperand(2).setReg(Reg1);
1189 MI.getOperand(1).setReg(Reg2);
1190 MI.getOperand(2).setSubReg(SubReg1);
1191 MI.getOperand(1).setSubReg(SubReg2);
1192 MI.getOperand(2).setIsKill(Reg1IsKill);
1193 MI.getOperand(1).setIsKill(Reg2IsKill);
1194
1195 // Swap the mask around.
1196 MI.getOperand(4).setImm((ME + 1) & 31);
1197 MI.getOperand(5).setImm((MB - 1) & 31);
1198 return &MI;
1199}
1200
1202 unsigned &SrcOpIdx1,
1203 unsigned &SrcOpIdx2) const {
1204 // For VSX A-Type FMA instructions, it is the first two operands that can be
1205 // commuted, however, because the non-encoded tied input operand is listed
1206 // first, the operands to swap are actually the second and third.
1207
1208 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1209 if (AltOpc == -1)
1210 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1211
1212 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1213 // and SrcOpIdx2.
1214 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1215}
1216
1219 // This function is used for scheduling, and the nop wanted here is the type
1220 // that terminates dispatch groups on the POWER cores.
1221 unsigned Directive = Subtarget.getCPUDirective();
1222 unsigned Opcode;
1223 switch (Directive) {
1224 default: Opcode = PPC::NOP; break;
1225 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1226 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1227 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1228 // FIXME: Update when POWER9 scheduling model is ready.
1229 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1230 }
1231
1232 DebugLoc DL;
1233 BuildMI(MBB, MI, DL, get(Opcode));
1234}
1235
1236/// Return the noop instruction to use for a noop.
1238 MCInst Nop;
1239 Nop.setOpcode(PPC::NOP);
1240 return Nop;
1241}
1242
1243// Branch analysis.
1244// Note: If the condition register is set to CTR or CTR8 then this is a
1245// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1248 MachineBasicBlock *&FBB,
1250 bool AllowModify) const {
1251 bool isPPC64 = Subtarget.isPPC64();
1252
1253 // If the block has no terminators, it just falls into the block after it.
1255 if (I == MBB.end())
1256 return false;
1257
1258 if (!isUnpredicatedTerminator(*I))
1259 return false;
1260
1261 if (AllowModify) {
1262 // If the BB ends with an unconditional branch to the fallthrough BB,
1263 // we eliminate the branch instruction.
1264 if (I->getOpcode() == PPC::B &&
1265 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1266 I->eraseFromParent();
1267
1268 // We update iterator after deleting the last branch.
1270 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1271 return false;
1272 }
1273 }
1274
1275 // Get the last instruction in the block.
1276 MachineInstr &LastInst = *I;
1277
1278 // If there is only one terminator instruction, process it.
1279 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1280 if (LastInst.getOpcode() == PPC::B) {
1281 if (!LastInst.getOperand(0).isMBB())
1282 return true;
1283 TBB = LastInst.getOperand(0).getMBB();
1284 return false;
1285 } else if (LastInst.getOpcode() == PPC::BCC) {
1286 if (!LastInst.getOperand(2).isMBB())
1287 return true;
1288 // Block ends with fall-through condbranch.
1289 TBB = LastInst.getOperand(2).getMBB();
1290 Cond.push_back(LastInst.getOperand(0));
1291 Cond.push_back(LastInst.getOperand(1));
1292 return false;
1293 } else if (LastInst.getOpcode() == PPC::BC) {
1294 if (!LastInst.getOperand(1).isMBB())
1295 return true;
1296 // Block ends with fall-through condbranch.
1297 TBB = LastInst.getOperand(1).getMBB();
1299 Cond.push_back(LastInst.getOperand(0));
1300 return false;
1301 } else if (LastInst.getOpcode() == PPC::BCn) {
1302 if (!LastInst.getOperand(1).isMBB())
1303 return true;
1304 // Block ends with fall-through condbranch.
1305 TBB = LastInst.getOperand(1).getMBB();
1307 Cond.push_back(LastInst.getOperand(0));
1308 return false;
1309 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1310 LastInst.getOpcode() == PPC::BDNZ) {
1311 if (!LastInst.getOperand(0).isMBB())
1312 return true;
1314 return true;
1315 TBB = LastInst.getOperand(0).getMBB();
1316 Cond.push_back(MachineOperand::CreateImm(1));
1317 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1318 true));
1319 return false;
1320 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1321 LastInst.getOpcode() == PPC::BDZ) {
1322 if (!LastInst.getOperand(0).isMBB())
1323 return true;
1325 return true;
1326 TBB = LastInst.getOperand(0).getMBB();
1327 Cond.push_back(MachineOperand::CreateImm(0));
1328 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1329 true));
1330 return false;
1331 }
1332
1333 // Otherwise, don't know what this is.
1334 return true;
1335 }
1336
1337 // Get the instruction before it if it's a terminator.
1338 MachineInstr &SecondLastInst = *I;
1339
1340 // If there are three terminators, we don't know what sort of block this is.
1341 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1342 return true;
1343
1344 // If the block ends with PPC::B and PPC:BCC, handle it.
1345 if (SecondLastInst.getOpcode() == PPC::BCC &&
1346 LastInst.getOpcode() == PPC::B) {
1347 if (!SecondLastInst.getOperand(2).isMBB() ||
1348 !LastInst.getOperand(0).isMBB())
1349 return true;
1350 TBB = SecondLastInst.getOperand(2).getMBB();
1351 Cond.push_back(SecondLastInst.getOperand(0));
1352 Cond.push_back(SecondLastInst.getOperand(1));
1353 FBB = LastInst.getOperand(0).getMBB();
1354 return false;
1355 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1356 LastInst.getOpcode() == PPC::B) {
1357 if (!SecondLastInst.getOperand(1).isMBB() ||
1358 !LastInst.getOperand(0).isMBB())
1359 return true;
1360 TBB = SecondLastInst.getOperand(1).getMBB();
1362 Cond.push_back(SecondLastInst.getOperand(0));
1363 FBB = LastInst.getOperand(0).getMBB();
1364 return false;
1365 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1366 LastInst.getOpcode() == PPC::B) {
1367 if (!SecondLastInst.getOperand(1).isMBB() ||
1368 !LastInst.getOperand(0).isMBB())
1369 return true;
1370 TBB = SecondLastInst.getOperand(1).getMBB();
1372 Cond.push_back(SecondLastInst.getOperand(0));
1373 FBB = LastInst.getOperand(0).getMBB();
1374 return false;
1375 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1376 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1377 LastInst.getOpcode() == PPC::B) {
1378 if (!SecondLastInst.getOperand(0).isMBB() ||
1379 !LastInst.getOperand(0).isMBB())
1380 return true;
1382 return true;
1383 TBB = SecondLastInst.getOperand(0).getMBB();
1384 Cond.push_back(MachineOperand::CreateImm(1));
1385 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1386 true));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(0));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 }
1404
1405 // If the block ends with two PPC:Bs, handle it. The second one is not
1406 // executed, so remove it.
1407 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1408 if (!SecondLastInst.getOperand(0).isMBB())
1409 return true;
1410 TBB = SecondLastInst.getOperand(0).getMBB();
1411 I = LastInst;
1412 if (AllowModify)
1413 I->eraseFromParent();
1414 return false;
1415 }
1416
1417 // Otherwise, can't handle this.
1418 return true;
1419}
1420
1422 int *BytesRemoved) const {
1423 assert(!BytesRemoved && "code size not handled");
1424
1426 if (I == MBB.end())
1427 return 0;
1428
1429 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1430 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1431 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1432 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1433 return 0;
1434
1435 // Remove the branch.
1436 I->eraseFromParent();
1437
1438 I = MBB.end();
1439
1440 if (I == MBB.begin()) return 1;
1441 --I;
1442 if (I->getOpcode() != PPC::BCC &&
1443 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1444 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1445 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1446 return 1;
1447
1448 // Remove the branch.
1449 I->eraseFromParent();
1450 return 2;
1451}
1452
1455 MachineBasicBlock *FBB,
1457 const DebugLoc &DL,
1458 int *BytesAdded) const {
1459 // Shouldn't be a fall through.
1460 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1461 assert((Cond.size() == 2 || Cond.size() == 0) &&
1462 "PPC branch conditions have two components!");
1463 assert(!BytesAdded && "code size not handled");
1464
1465 bool isPPC64 = Subtarget.isPPC64();
1466
1467 // One-way branch.
1468 if (!FBB) {
1469 if (Cond.empty()) // Unconditional branch
1470 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1471 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1472 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1473 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1474 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1475 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1476 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1477 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1478 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1479 else // Conditional branch
1480 BuildMI(&MBB, DL, get(PPC::BCC))
1481 .addImm(Cond[0].getImm())
1482 .add(Cond[1])
1483 .addMBB(TBB);
1484 return 1;
1485 }
1486
1487 // Two-way Conditional Branch.
1488 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1489 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1490 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1491 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1492 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1493 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1494 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1495 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1496 else
1497 BuildMI(&MBB, DL, get(PPC::BCC))
1498 .addImm(Cond[0].getImm())
1499 .add(Cond[1])
1500 .addMBB(TBB);
1501 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1502 return 2;
1503}
1504
1505// Select analysis.
1508 Register DstReg, Register TrueReg,
1509 Register FalseReg, int &CondCycles,
1510 int &TrueCycles, int &FalseCycles) const {
1511 if (!Subtarget.hasISEL())
1512 return false;
1513
1514 if (Cond.size() != 2)
1515 return false;
1516
1517 // If this is really a bdnz-like condition, then it cannot be turned into a
1518 // select.
1519 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1520 return false;
1521
1522 // If the conditional branch uses a physical register, then it cannot be
1523 // turned into a select.
1524 if (Cond[1].getReg().isPhysical())
1525 return false;
1526
1527 // Check register classes.
1529 const TargetRegisterClass *RC =
1530 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1531 if (!RC)
1532 return false;
1533
1534 // isel is for regular integer GPRs only.
1535 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1536 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1537 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1538 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1539 return false;
1540
1541 // FIXME: These numbers are for the A2, how well they work for other cores is
1542 // an open question. On the A2, the isel instruction has a 2-cycle latency
1543 // but single-cycle throughput. These numbers are used in combination with
1544 // the MispredictPenalty setting from the active SchedMachineModel.
1545 CondCycles = 1;
1546 TrueCycles = 1;
1547 FalseCycles = 1;
1548
1549 return true;
1550}
1551
1554 const DebugLoc &dl, Register DestReg,
1556 Register FalseReg) const {
1557 assert(Cond.size() == 2 &&
1558 "PPC branch conditions have two components!");
1559
1560 // Get the register classes.
1562 const TargetRegisterClass *RC =
1563 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1564 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1565
1566 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1567 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1568 assert((Is64Bit ||
1569 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1570 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1571 "isel is for regular integer GPRs only");
1572
1573 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1574 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1575
1576 unsigned SubIdx = 0;
1577 bool SwapOps = false;
1578 switch (SelectPred) {
1579 case PPC::PRED_EQ:
1580 case PPC::PRED_EQ_MINUS:
1581 case PPC::PRED_EQ_PLUS:
1582 SubIdx = PPC::sub_eq; SwapOps = false; break;
1583 case PPC::PRED_NE:
1584 case PPC::PRED_NE_MINUS:
1585 case PPC::PRED_NE_PLUS:
1586 SubIdx = PPC::sub_eq; SwapOps = true; break;
1587 case PPC::PRED_LT:
1588 case PPC::PRED_LT_MINUS:
1589 case PPC::PRED_LT_PLUS:
1590 SubIdx = PPC::sub_lt; SwapOps = false; break;
1591 case PPC::PRED_GE:
1592 case PPC::PRED_GE_MINUS:
1593 case PPC::PRED_GE_PLUS:
1594 SubIdx = PPC::sub_lt; SwapOps = true; break;
1595 case PPC::PRED_GT:
1596 case PPC::PRED_GT_MINUS:
1597 case PPC::PRED_GT_PLUS:
1598 SubIdx = PPC::sub_gt; SwapOps = false; break;
1599 case PPC::PRED_LE:
1600 case PPC::PRED_LE_MINUS:
1601 case PPC::PRED_LE_PLUS:
1602 SubIdx = PPC::sub_gt; SwapOps = true; break;
1603 case PPC::PRED_UN:
1604 case PPC::PRED_UN_MINUS:
1605 case PPC::PRED_UN_PLUS:
1606 SubIdx = PPC::sub_un; SwapOps = false; break;
1607 case PPC::PRED_NU:
1608 case PPC::PRED_NU_MINUS:
1609 case PPC::PRED_NU_PLUS:
1610 SubIdx = PPC::sub_un; SwapOps = true; break;
1611 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1612 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1613 }
1614
1615 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1616 SecondReg = SwapOps ? TrueReg : FalseReg;
1617
1618 // The first input register of isel cannot be r0. If it is a member
1619 // of a register class that can be r0, then copy it first (the
1620 // register allocator should eliminate the copy).
1621 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1622 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1623 const TargetRegisterClass *FirstRC =
1624 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1625 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1626 Register OldFirstReg = FirstReg;
1627 FirstReg = MRI.createVirtualRegister(FirstRC);
1628 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1629 .addReg(OldFirstReg);
1630 }
1631
1632 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1633 .addReg(FirstReg).addReg(SecondReg)
1634 .addReg(Cond[1].getReg(), 0, SubIdx);
1635}
1636
1637static unsigned getCRBitValue(unsigned CRBit) {
1638 unsigned Ret = 4;
1639 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1640 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1641 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1642 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1643 Ret = 3;
1644 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1645 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1646 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1647 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1648 Ret = 2;
1649 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1650 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1651 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1652 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1653 Ret = 1;
1654 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1655 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1656 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1657 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1658 Ret = 0;
1659
1660 assert(Ret != 4 && "Invalid CR bit register");
1661 return Ret;
1662}
1663
1666 const DebugLoc &DL, MCRegister DestReg,
1667 MCRegister SrcReg, bool KillSrc) const {
1668 // We can end up with self copies and similar things as a result of VSX copy
1669 // legalization. Promote them here.
1671 if (PPC::F8RCRegClass.contains(DestReg) &&
1672 PPC::VSRCRegClass.contains(SrcReg)) {
1673 MCRegister SuperReg =
1674 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1675
1676 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1677 llvm_unreachable("nop VSX copy");
1678
1679 DestReg = SuperReg;
1680 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1681 PPC::VSRCRegClass.contains(DestReg)) {
1682 MCRegister SuperReg =
1683 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1684
1685 if (VSXSelfCopyCrash && DestReg == SuperReg)
1686 llvm_unreachable("nop VSX copy");
1687
1688 SrcReg = SuperReg;
1689 }
1690
1691 // Different class register copy
1692 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1693 PPC::GPRCRegClass.contains(DestReg)) {
1694 MCRegister CRReg = getCRFromCRBit(SrcReg);
1695 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1696 getKillRegState(KillSrc);
1697 // Rotate the CR bit in the CR fields to be the least significant bit and
1698 // then mask with 0x1 (MB = ME = 31).
1699 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1700 .addReg(DestReg, RegState::Kill)
1701 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1702 .addImm(31)
1703 .addImm(31);
1704 return;
1705 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1706 (PPC::G8RCRegClass.contains(DestReg) ||
1707 PPC::GPRCRegClass.contains(DestReg))) {
1708 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1709 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1710 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1711 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1712 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1713 getKillRegState(KillSrc);
1714 if (CRNum == 7)
1715 return;
1716 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1717 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1718 .addReg(DestReg, RegState::Kill)
1719 .addImm(CRNum * 4 + 4)
1720 .addImm(28)
1721 .addImm(31);
1722 return;
1723 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1724 PPC::VSFRCRegClass.contains(DestReg)) {
1725 assert(Subtarget.hasDirectMove() &&
1726 "Subtarget doesn't support directmove, don't know how to copy.");
1727 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1728 NumGPRtoVSRSpill++;
1729 getKillRegState(KillSrc);
1730 return;
1731 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1732 PPC::G8RCRegClass.contains(DestReg)) {
1733 assert(Subtarget.hasDirectMove() &&
1734 "Subtarget doesn't support directmove, don't know how to copy.");
1735 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1736 getKillRegState(KillSrc);
1737 return;
1738 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1739 PPC::GPRCRegClass.contains(DestReg)) {
1740 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1741 getKillRegState(KillSrc);
1742 return;
1743 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1744 PPC::SPERCRegClass.contains(DestReg)) {
1745 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1746 getKillRegState(KillSrc);
1747 return;
1748 }
1749
1750 unsigned Opc;
1751 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1752 Opc = PPC::OR;
1753 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1754 Opc = PPC::OR8;
1755 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1756 Opc = PPC::FMR;
1757 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1758 Opc = PPC::MCRF;
1759 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1760 Opc = PPC::VOR;
1761 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1762 // There are two different ways this can be done:
1763 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1764 // issue in VSU pipeline 0.
1765 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1766 // can go to either pipeline.
1767 // We'll always use xxlor here, because in practically all cases where
1768 // copies are generated, they are close enough to some use that the
1769 // lower-latency form is preferable.
1770 Opc = PPC::XXLOR;
1771 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1772 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1773 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1774 else if (Subtarget.pairedVectorMemops() &&
1775 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1776 if (SrcReg > PPC::VSRp15)
1777 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1778 else
1779 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1780 if (DestReg > PPC::VSRp15)
1781 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1782 else
1783 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1784 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1785 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1786 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1787 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1788 return;
1789 }
1790 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1791 Opc = PPC::CROR;
1792 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1793 Opc = PPC::EVOR;
1794 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1795 PPC::UACCRCRegClass.contains(DestReg)) &&
1796 (PPC::ACCRCRegClass.contains(SrcReg) ||
1797 PPC::UACCRCRegClass.contains(SrcReg))) {
1798 // If primed, de-prime the source register, copy the individual registers
1799 // and prime the destination if needed. The vector subregisters are
1800 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1801 // source is primed, we need to re-prime it after the copy as well.
1802 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1803 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1804 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1805 MCRegister VSLSrcReg =
1806 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1807 MCRegister VSLDestReg =
1808 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1809 if (SrcPrimed)
1810 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1811 for (unsigned Idx = 0; Idx < 4; Idx++)
1812 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1813 .addReg(VSLSrcReg + Idx)
1814 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1815 if (DestPrimed)
1816 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1817 if (SrcPrimed && !KillSrc)
1818 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1819 return;
1820 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1821 PPC::G8pRCRegClass.contains(SrcReg)) {
1822 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1823 unsigned DestRegIdx = DestReg - PPC::G8p0;
1824 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1825 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1826 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1827 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1828 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1829 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1830 .addReg(SrcRegSub0)
1831 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1832 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1833 .addReg(SrcRegSub1)
1834 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1835 return;
1836 } else
1837 llvm_unreachable("Impossible reg-to-reg copy");
1838
1839 const MCInstrDesc &MCID = get(Opc);
1840 if (MCID.getNumOperands() == 3)
1841 BuildMI(MBB, I, DL, MCID, DestReg)
1842 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1843 else
1844 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1845}
1846
1847unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1848 int OpcodeIndex = 0;
1849
1850 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1851 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1853 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1854 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1856 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1858 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1860 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1862 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1864 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1866 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1868 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1877 assert(Subtarget.pairedVectorMemops() &&
1878 "Register unexpected when paired memops are disabled.");
1880 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1881 assert(Subtarget.pairedVectorMemops() &&
1882 "Register unexpected when paired memops are disabled.");
1884 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1885 assert(Subtarget.pairedVectorMemops() &&
1886 "Register unexpected when paired memops are disabled.");
1888 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1889 assert(Subtarget.pairedVectorMemops() &&
1890 "Register unexpected when paired memops are disabled.");
1892 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1894 } else {
1895 llvm_unreachable("Unknown regclass!");
1896 }
1897 return OpcodeIndex;
1898}
1899
1900unsigned
1902 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1903 return OpcodesForSpill[getSpillIndex(RC)];
1904}
1905
1906unsigned
1908 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1909 return OpcodesForSpill[getSpillIndex(RC)];
1910}
1911
1912void PPCInstrInfo::StoreRegToStackSlot(
1913 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1914 const TargetRegisterClass *RC,
1915 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1916 unsigned Opcode = getStoreOpcodeForSpill(RC);
1917 DebugLoc DL;
1918
1919 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1920 FuncInfo->setHasSpills();
1921
1923 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1924 FrameIdx));
1925
1926 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1927 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1928 FuncInfo->setSpillsCR();
1929
1930 if (isXFormMemOp(Opcode))
1931 FuncInfo->setHasNonRISpills();
1932}
1933
1936 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1937 const TargetRegisterInfo *TRI) const {
1938 MachineFunction &MF = *MBB.getParent();
1940
1941 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1942
1943 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1944 MBB.insert(MI, NewMIs[i]);
1945
1946 const MachineFrameInfo &MFI = MF.getFrameInfo();
1950 MFI.getObjectAlign(FrameIdx));
1951 NewMIs.back()->addMemOperand(MF, MMO);
1952}
1953
1956 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1957 const TargetRegisterInfo *TRI, Register VReg) const {
1958 // We need to avoid a situation in which the value from a VRRC register is
1959 // spilled using an Altivec instruction and reloaded into a VSRC register
1960 // using a VSX instruction. The issue with this is that the VSX
1961 // load/store instructions swap the doublewords in the vector and the Altivec
1962 // ones don't. The register classes on the spill/reload may be different if
1963 // the register is defined using an Altivec instruction and is then used by a
1964 // VSX instruction.
1965 RC = updatedRC(RC);
1966 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1967}
1968
1969void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1970 unsigned DestReg, int FrameIdx,
1971 const TargetRegisterClass *RC,
1973 const {
1974 unsigned Opcode = getLoadOpcodeForSpill(RC);
1975 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1976 FrameIdx));
1977}
1978
1981 int FrameIdx, const TargetRegisterClass *RC,
1982 const TargetRegisterInfo *TRI) const {
1983 MachineFunction &MF = *MBB.getParent();
1985 DebugLoc DL;
1986 if (MI != MBB.end()) DL = MI->getDebugLoc();
1987
1988 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
1989
1990 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1991 MBB.insert(MI, NewMIs[i]);
1992
1993 const MachineFrameInfo &MFI = MF.getFrameInfo();
1997 MFI.getObjectAlign(FrameIdx));
1998 NewMIs.back()->addMemOperand(MF, MMO);
1999}
2000
2003 Register DestReg, int FrameIdx,
2004 const TargetRegisterClass *RC,
2005 const TargetRegisterInfo *TRI,
2006 Register VReg) const {
2007 // We need to avoid a situation in which the value from a VRRC register is
2008 // spilled using an Altivec instruction and reloaded into a VSRC register
2009 // using a VSX instruction. The issue with this is that the VSX
2010 // load/store instructions swap the doublewords in the vector and the Altivec
2011 // ones don't. The register classes on the spill/reload may be different if
2012 // the register is defined using an Altivec instruction and is then used by a
2013 // VSX instruction.
2014 RC = updatedRC(RC);
2015
2016 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2017}
2018
2021 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2022 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2023 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2024 else
2025 // Leave the CR# the same, but invert the condition.
2026 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2027 return false;
2028}
2029
2030// For some instructions, it is legal to fold ZERO into the RA register field.
2031// This function performs that fold by replacing the operand with PPC::ZERO,
2032// it does not consider whether the load immediate zero is no longer in use.
2034 Register Reg) const {
2035 // A zero immediate should always be loaded with a single li.
2036 unsigned DefOpc = DefMI.getOpcode();
2037 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2038 return false;
2039 if (!DefMI.getOperand(1).isImm())
2040 return false;
2041 if (DefMI.getOperand(1).getImm() != 0)
2042 return false;
2043
2044 // Note that we cannot here invert the arguments of an isel in order to fold
2045 // a ZERO into what is presented as the second argument. All we have here
2046 // is the condition bit, and that might come from a CR-logical bit operation.
2047
2048 const MCInstrDesc &UseMCID = UseMI.getDesc();
2049
2050 // Only fold into real machine instructions.
2051 if (UseMCID.isPseudo())
2052 return false;
2053
2054 // We need to find which of the User's operands is to be folded, that will be
2055 // the operand that matches the given register ID.
2056 unsigned UseIdx;
2057 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2058 if (UseMI.getOperand(UseIdx).isReg() &&
2059 UseMI.getOperand(UseIdx).getReg() == Reg)
2060 break;
2061
2062 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2063 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2064
2065 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2066
2067 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2068 // register (which might also be specified as a pointer class kind).
2069 if (UseInfo->isLookupPtrRegClass()) {
2070 if (UseInfo->RegClass /* Kind */ != 1)
2071 return false;
2072 } else {
2073 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2074 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2075 return false;
2076 }
2077
2078 // Make sure this is not tied to an output register (or otherwise
2079 // constrained). This is true for ST?UX registers, for example, which
2080 // are tied to their output registers.
2081 if (UseInfo->Constraints != 0)
2082 return false;
2083
2084 MCRegister ZeroReg;
2085 if (UseInfo->isLookupPtrRegClass()) {
2086 bool isPPC64 = Subtarget.isPPC64();
2087 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2088 } else {
2089 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2090 PPC::ZERO8 : PPC::ZERO;
2091 }
2092
2093 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2094 LLVM_DEBUG(UseMI.dump());
2095 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2096 LLVM_DEBUG(dbgs() << "Into: ");
2097 LLVM_DEBUG(UseMI.dump());
2098 return true;
2099}
2100
2101// Folds zero into instructions which have a load immediate zero as an operand
2102// but also recognize zero as immediate zero. If the definition of the load
2103// has no more users it is deleted.
2105 Register Reg, MachineRegisterInfo *MRI) const {
2106 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2107 if (MRI->use_nodbg_empty(Reg))
2108 DefMI.eraseFromParent();
2109 return Changed;
2110}
2111
2113 for (MachineInstr &MI : MBB)
2114 if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
2115 return true;
2116 return false;
2117}
2118
2119// We should make sure that, if we're going to predicate both sides of a
2120// condition (a diamond), that both sides don't define the counter register. We
2121// can predicate counter-decrement-based branches, but while that predicates
2122// the branching, it does not predicate the counter decrement. If we tried to
2123// merge the triangle into one predicated block, we'd decrement the counter
2124// twice.
2126 unsigned NumT, unsigned ExtraT,
2127 MachineBasicBlock &FMBB,
2128 unsigned NumF, unsigned ExtraF,
2129 BranchProbability Probability) const {
2130 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2131}
2132
2133
2135 // The predicated branches are identified by their type, not really by the
2136 // explicit presence of a predicate. Furthermore, some of them can be
2137 // predicated more than once. Because if conversion won't try to predicate
2138 // any instruction which already claims to be predicated (by returning true
2139 // here), always return false. In doing so, we let isPredicable() be the
2140 // final word on whether not the instruction can be (further) predicated.
2141
2142 return false;
2143}
2144
2146 const MachineBasicBlock *MBB,
2147 const MachineFunction &MF) const {
2148 switch (MI.getOpcode()) {
2149 default:
2150 break;
2151 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2152 // across them, since some FP operations may change content of FPSCR.
2153 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2154 case PPC::MFFS:
2155 case PPC::MTFSF:
2156 case PPC::FENCE:
2157 return true;
2158 }
2160}
2161
2163 ArrayRef<MachineOperand> Pred) const {
2164 unsigned OpC = MI.getOpcode();
2165 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2166 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2167 bool isPPC64 = Subtarget.isPPC64();
2168 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2169 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2170 // Need add Def and Use for CTR implicit operand.
2171 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2172 .addReg(Pred[1].getReg(), RegState::Implicit)
2174 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2175 MI.setDesc(get(PPC::BCLR));
2176 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2177 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2178 MI.setDesc(get(PPC::BCLRn));
2179 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2180 } else {
2181 MI.setDesc(get(PPC::BCCLR));
2182 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2183 .addImm(Pred[0].getImm())
2184 .add(Pred[1]);
2185 }
2186
2187 return true;
2188 } else if (OpC == PPC::B) {
2189 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2190 bool isPPC64 = Subtarget.isPPC64();
2191 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2192 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2193 // Need add Def and Use for CTR implicit operand.
2194 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2195 .addReg(Pred[1].getReg(), RegState::Implicit)
2197 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2198 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2199 MI.removeOperand(0);
2200
2201 MI.setDesc(get(PPC::BC));
2202 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2203 .add(Pred[1])
2204 .addMBB(MBB);
2205 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2206 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2207 MI.removeOperand(0);
2208
2209 MI.setDesc(get(PPC::BCn));
2210 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2211 .add(Pred[1])
2212 .addMBB(MBB);
2213 } else {
2214 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2215 MI.removeOperand(0);
2216
2217 MI.setDesc(get(PPC::BCC));
2218 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2219 .addImm(Pred[0].getImm())
2220 .add(Pred[1])
2221 .addMBB(MBB);
2222 }
2223
2224 return true;
2225 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2226 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2227 OpC == PPC::BCTRL8_RM) {
2228 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2229 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2230
2231 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2232 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2233 bool isPPC64 = Subtarget.isPPC64();
2234
2235 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2236 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2237 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2238 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2239 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2240 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2241 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2242 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2243 } else {
2244 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2245 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2246 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2247 .addImm(Pred[0].getImm())
2248 .add(Pred[1]);
2249 }
2250
2251 // Need add Def and Use for LR implicit operand.
2252 if (setLR)
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2254 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2255 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2256 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2259
2260 return true;
2261 }
2262
2263 return false;
2264}
2265
2267 ArrayRef<MachineOperand> Pred2) const {
2268 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2269 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2270
2271 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2272 return false;
2273 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2274 return false;
2275
2276 // P1 can only subsume P2 if they test the same condition register.
2277 if (Pred1[1].getReg() != Pred2[1].getReg())
2278 return false;
2279
2280 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2281 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2282
2283 if (P1 == P2)
2284 return true;
2285
2286 // Does P1 subsume P2, e.g. GE subsumes GT.
2287 if (P1 == PPC::PRED_LE &&
2288 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2289 return true;
2290 if (P1 == PPC::PRED_GE &&
2291 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2292 return true;
2293
2294 return false;
2295}
2296
2298 std::vector<MachineOperand> &Pred,
2299 bool SkipDead) const {
2300 // Note: At the present time, the contents of Pred from this function is
2301 // unused by IfConversion. This implementation follows ARM by pushing the
2302 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2303 // predicate, instructions defining CTR or CTR8 are also included as
2304 // predicate-defining instructions.
2305
2306 const TargetRegisterClass *RCs[] =
2307 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2308 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2309
2310 bool Found = false;
2311 for (const MachineOperand &MO : MI.operands()) {
2312 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2313 const TargetRegisterClass *RC = RCs[c];
2314 if (MO.isReg()) {
2315 if (MO.isDef() && RC->contains(MO.getReg())) {
2316 Pred.push_back(MO);
2317 Found = true;
2318 }
2319 } else if (MO.isRegMask()) {
2320 for (MCPhysReg R : *RC)
2321 if (MO.clobbersPhysReg(R)) {
2322 Pred.push_back(MO);
2323 Found = true;
2324 }
2325 }
2326 }
2327 }
2328
2329 return Found;
2330}
2331
2333 Register &SrcReg2, int64_t &Mask,
2334 int64_t &Value) const {
2335 unsigned Opc = MI.getOpcode();
2336
2337 switch (Opc) {
2338 default: return false;
2339 case PPC::CMPWI:
2340 case PPC::CMPLWI:
2341 case PPC::CMPDI:
2342 case PPC::CMPLDI:
2343 SrcReg = MI.getOperand(1).getReg();
2344 SrcReg2 = 0;
2345 Value = MI.getOperand(2).getImm();
2346 Mask = 0xFFFF;
2347 return true;
2348 case PPC::CMPW:
2349 case PPC::CMPLW:
2350 case PPC::CMPD:
2351 case PPC::CMPLD:
2352 case PPC::FCMPUS:
2353 case PPC::FCMPUD:
2354 SrcReg = MI.getOperand(1).getReg();
2355 SrcReg2 = MI.getOperand(2).getReg();
2356 Value = 0;
2357 Mask = 0;
2358 return true;
2359 }
2360}
2361
2363 Register SrcReg2, int64_t Mask,
2364 int64_t Value,
2365 const MachineRegisterInfo *MRI) const {
2366 if (DisableCmpOpt)
2367 return false;
2368
2369 int OpC = CmpInstr.getOpcode();
2370 Register CRReg = CmpInstr.getOperand(0).getReg();
2371
2372 // FP record forms set CR1 based on the exception status bits, not a
2373 // comparison with zero.
2374 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2375 return false;
2376
2378 // The record forms set the condition register based on a signed comparison
2379 // with zero (so says the ISA manual). This is not as straightforward as it
2380 // seems, however, because this is always a 64-bit comparison on PPC64, even
2381 // for instructions that are 32-bit in nature (like slw for example).
2382 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2383 // for equality checks (as those don't depend on the sign). On PPC64,
2384 // we are restricted to equality for unsigned 64-bit comparisons and for
2385 // signed 32-bit comparisons the applicability is more restricted.
2386 bool isPPC64 = Subtarget.isPPC64();
2387 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2388 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2389 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2390
2391 // Look through copies unless that gets us to a physical register.
2392 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2393 if (ActualSrc.isVirtual())
2394 SrcReg = ActualSrc;
2395
2396 // Get the unique definition of SrcReg.
2397 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2398 if (!MI) return false;
2399
2400 bool equalityOnly = false;
2401 bool noSub = false;
2402 if (isPPC64) {
2403 if (is32BitSignedCompare) {
2404 // We can perform this optimization only if SrcReg is sign-extending.
2405 if (isSignExtended(SrcReg, MRI))
2406 noSub = true;
2407 else
2408 return false;
2409 } else if (is32BitUnsignedCompare) {
2410 // We can perform this optimization, equality only, if SrcReg is
2411 // zero-extending.
2412 if (isZeroExtended(SrcReg, MRI)) {
2413 noSub = true;
2414 equalityOnly = true;
2415 } else
2416 return false;
2417 } else
2418 equalityOnly = is64BitUnsignedCompare;
2419 } else
2420 equalityOnly = is32BitUnsignedCompare;
2421
2422 if (equalityOnly) {
2423 // We need to check the uses of the condition register in order to reject
2424 // non-equality comparisons.
2426 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2427 I != IE; ++I) {
2428 MachineInstr *UseMI = &*I;
2429 if (UseMI->getOpcode() == PPC::BCC) {
2431 unsigned PredCond = PPC::getPredicateCondition(Pred);
2432 // We ignore hint bits when checking for non-equality comparisons.
2433 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2434 return false;
2435 } else if (UseMI->getOpcode() == PPC::ISEL ||
2436 UseMI->getOpcode() == PPC::ISEL8) {
2437 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2438 if (SubIdx != PPC::sub_eq)
2439 return false;
2440 } else
2441 return false;
2442 }
2443 }
2444
2445 MachineBasicBlock::iterator I = CmpInstr;
2446
2447 // Scan forward to find the first use of the compare.
2448 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2449 ++I) {
2450 bool FoundUse = false;
2452 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2453 J != JE; ++J)
2454 if (&*J == &*I) {
2455 FoundUse = true;
2456 break;
2457 }
2458
2459 if (FoundUse)
2460 break;
2461 }
2462
2465
2466 // There are two possible candidates which can be changed to set CR[01].
2467 // One is MI, the other is a SUB instruction.
2468 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2469 MachineInstr *Sub = nullptr;
2470 if (SrcReg2 != 0)
2471 // MI is not a candidate for CMPrr.
2472 MI = nullptr;
2473 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2474 // same BB as the comparison. This is to allow the check below to avoid calls
2475 // (and other explicit clobbers); instead we should really check for these
2476 // more explicitly (in at least a few predecessors).
2477 else if (MI->getParent() != CmpInstr.getParent())
2478 return false;
2479 else if (Value != 0) {
2480 // The record-form instructions set CR bit based on signed comparison
2481 // against 0. We try to convert a compare against 1 or -1 into a compare
2482 // against 0 to exploit record-form instructions. For example, we change
2483 // the condition "greater than -1" into "greater than or equal to 0"
2484 // and "less than 1" into "less than or equal to 0".
2485
2486 // Since we optimize comparison based on a specific branch condition,
2487 // we don't optimize if condition code is used by more than once.
2488 if (equalityOnly || !MRI->hasOneUse(CRReg))
2489 return false;
2490
2491 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2492 if (UseMI->getOpcode() != PPC::BCC)
2493 return false;
2494
2496 unsigned PredCond = PPC::getPredicateCondition(Pred);
2497 unsigned PredHint = PPC::getPredicateHint(Pred);
2498 int16_t Immed = (int16_t)Value;
2499
2500 // When modifying the condition in the predicate, we propagate hint bits
2501 // from the original predicate to the new one.
2502 if (Immed == -1 && PredCond == PPC::PRED_GT)
2503 // We convert "greater than -1" into "greater than or equal to 0",
2504 // since we are assuming signed comparison by !equalityOnly
2505 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2506 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2507 // We convert "less than or equal to -1" into "less than 0".
2508 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2509 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2510 // We convert "less than 1" into "less than or equal to 0".
2511 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2512 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2513 // We convert "greater than or equal to 1" into "greater than 0".
2514 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2515 else
2516 return false;
2517
2518 // Convert the comparison and its user to a compare against zero with the
2519 // appropriate predicate on the branch. Zero comparison might provide
2520 // optimization opportunities post-RA (see optimization in
2521 // PPCPreEmitPeephole.cpp).
2522 UseMI->getOperand(0).setImm(Pred);
2523 CmpInstr.getOperand(2).setImm(0);
2524 }
2525
2526 // Search for Sub.
2527 --I;
2528
2529 // Get ready to iterate backward from CmpInstr.
2530 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2531
2532 for (; I != E && !noSub; --I) {
2533 const MachineInstr &Instr = *I;
2534 unsigned IOpC = Instr.getOpcode();
2535
2536 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2537 Instr.readsRegister(PPC::CR0, TRI)))
2538 // This instruction modifies or uses the record condition register after
2539 // the one we want to change. While we could do this transformation, it
2540 // would likely not be profitable. This transformation removes one
2541 // instruction, and so even forcing RA to generate one move probably
2542 // makes it unprofitable.
2543 return false;
2544
2545 // Check whether CmpInstr can be made redundant by the current instruction.
2546 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2547 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2548 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2549 ((Instr.getOperand(1).getReg() == SrcReg &&
2550 Instr.getOperand(2).getReg() == SrcReg2) ||
2551 (Instr.getOperand(1).getReg() == SrcReg2 &&
2552 Instr.getOperand(2).getReg() == SrcReg))) {
2553 Sub = &*I;
2554 break;
2555 }
2556
2557 if (I == B)
2558 // The 'and' is below the comparison instruction.
2559 return false;
2560 }
2561
2562 // Return false if no candidates exist.
2563 if (!MI && !Sub)
2564 return false;
2565
2566 // The single candidate is called MI.
2567 if (!MI) MI = Sub;
2568
2569 int NewOpC = -1;
2570 int MIOpC = MI->getOpcode();
2571 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2572 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2573 NewOpC = MIOpC;
2574 else {
2575 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2576 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2577 NewOpC = MIOpC;
2578 }
2579
2580 // FIXME: On the non-embedded POWER architectures, only some of the record
2581 // forms are fast, and we should use only the fast ones.
2582
2583 // The defining instruction has a record form (or is already a record
2584 // form). It is possible, however, that we'll need to reverse the condition
2585 // code of the users.
2586 if (NewOpC == -1)
2587 return false;
2588
2589 // This transformation should not be performed if `nsw` is missing and is not
2590 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2591 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2592 // CRReg can reflect if compared values are equal, this optz is still valid.
2593 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2594 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2595 return false;
2596
2597 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2598 // needs to be updated to be based on SUB. Push the condition code
2599 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2600 // condition code of these operands will be modified.
2601 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2602 // comparison against 0, which may modify predicate.
2603 bool ShouldSwap = false;
2604 if (Sub && Value == 0) {
2605 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2606 Sub->getOperand(2).getReg() == SrcReg;
2607
2608 // The operands to subf are the opposite of sub, so only in the fixed-point
2609 // case, invert the order.
2610 ShouldSwap = !ShouldSwap;
2611 }
2612
2613 if (ShouldSwap)
2615 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2616 I != IE; ++I) {
2617 MachineInstr *UseMI = &*I;
2618 if (UseMI->getOpcode() == PPC::BCC) {
2620 unsigned PredCond = PPC::getPredicateCondition(Pred);
2621 assert((!equalityOnly ||
2622 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2623 "Invalid predicate for equality-only optimization");
2624 (void)PredCond; // To suppress warning in release build.
2625 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2627 } else if (UseMI->getOpcode() == PPC::ISEL ||
2628 UseMI->getOpcode() == PPC::ISEL8) {
2629 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2630 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2631 "Invalid CR bit for equality-only optimization");
2632
2633 if (NewSubReg == PPC::sub_lt)
2634 NewSubReg = PPC::sub_gt;
2635 else if (NewSubReg == PPC::sub_gt)
2636 NewSubReg = PPC::sub_lt;
2637
2638 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2639 NewSubReg));
2640 } else // We need to abort on a user we don't understand.
2641 return false;
2642 }
2643 assert(!(Value != 0 && ShouldSwap) &&
2644 "Non-zero immediate support and ShouldSwap"
2645 "may conflict in updating predicate");
2646
2647 // Create a new virtual register to hold the value of the CR set by the
2648 // record-form instruction. If the instruction was not previously in
2649 // record form, then set the kill flag on the CR.
2650 CmpInstr.eraseFromParent();
2651
2653 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2654 get(TargetOpcode::COPY), CRReg)
2655 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2656
2657 // Even if CR0 register were dead before, it is alive now since the
2658 // instruction we just built uses it.
2659 MI->clearRegisterDeads(PPC::CR0);
2660
2661 if (MIOpC != NewOpC) {
2662 // We need to be careful here: we're replacing one instruction with
2663 // another, and we need to make sure that we get all of the right
2664 // implicit uses and defs. On the other hand, the caller may be holding
2665 // an iterator to this instruction, and so we can't delete it (this is
2666 // specifically the case if this is the instruction directly after the
2667 // compare).
2668
2669 // Rotates are expensive instructions. If we're emitting a record-form
2670 // rotate that can just be an andi/andis, we should just emit that.
2671 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2672 Register GPRRes = MI->getOperand(0).getReg();
2673 int64_t SH = MI->getOperand(2).getImm();
2674 int64_t MB = MI->getOperand(3).getImm();
2675 int64_t ME = MI->getOperand(4).getImm();
2676 // We can only do this if both the start and end of the mask are in the
2677 // same halfword.
2678 bool MBInLoHWord = MB >= 16;
2679 bool MEInLoHWord = ME >= 16;
2680 uint64_t Mask = ~0LLU;
2681
2682 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2683 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2684 // The mask value needs to shift right 16 if we're emitting andis.
2685 Mask >>= MBInLoHWord ? 0 : 16;
2686 NewOpC = MIOpC == PPC::RLWINM
2687 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2688 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2689 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2690 (ME - MB + 1 == SH) && (MB >= 16)) {
2691 // If we are rotating by the exact number of bits as are in the mask
2692 // and the mask is in the least significant bits of the register,
2693 // that's just an andis. (as long as the GPR result has no uses).
2694 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2695 Mask >>= 16;
2696 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2697 }
2698 // If we've set the mask, we can transform.
2699 if (Mask != ~0LLU) {
2700 MI->removeOperand(4);
2701 MI->removeOperand(3);
2702 MI->getOperand(2).setImm(Mask);
2703 NumRcRotatesConvertedToRcAnd++;
2704 }
2705 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2706 int64_t MB = MI->getOperand(3).getImm();
2707 if (MB >= 48) {
2708 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2709 NewOpC = PPC::ANDI8_rec;
2710 MI->removeOperand(3);
2711 MI->getOperand(2).setImm(Mask);
2712 NumRcRotatesConvertedToRcAnd++;
2713 }
2714 }
2715
2716 const MCInstrDesc &NewDesc = get(NewOpC);
2717 MI->setDesc(NewDesc);
2718
2719 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2720 if (!MI->definesRegister(ImpDef)) {
2721 MI->addOperand(*MI->getParent()->getParent(),
2722 MachineOperand::CreateReg(ImpDef, true, true));
2723 }
2724 }
2725 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2726 if (!MI->readsRegister(ImpUse)) {
2727 MI->addOperand(*MI->getParent()->getParent(),
2728 MachineOperand::CreateReg(ImpUse, false, true));
2729 }
2730 }
2731 }
2732 assert(MI->definesRegister(PPC::CR0) &&
2733 "Record-form instruction does not define cr0?");
2734
2735 // Modify the condition code of operands in OperandsToUpdate.
2736 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2737 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2738 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2739 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2740
2741 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2742 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2743
2744 return true;
2745}
2746
2749 if (MRI->isSSA())
2750 return false;
2751
2752 Register SrcReg, SrcReg2;
2753 int64_t CmpMask, CmpValue;
2754 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2755 return false;
2756
2757 // Try to optimize the comparison against 0.
2758 if (CmpValue || !CmpMask || SrcReg2)
2759 return false;
2760
2761 // The record forms set the condition register based on a signed comparison
2762 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2763 // equality checks in post-RA, we are more restricted on a unsigned
2764 // comparison.
2765 unsigned Opc = CmpMI.getOpcode();
2766 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2767 return false;
2768
2769 // The record forms are always based on a 64-bit comparison on PPC64
2770 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2771 // comparison. Since we can't do the equality checks in post-RA, we bail out
2772 // the case.
2773 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2774 return false;
2775
2776 // CmpMI can't be deleted if it has implicit def.
2777 if (CmpMI.hasImplicitDef())
2778 return false;
2779
2780 bool SrcRegHasOtherUse = false;
2781 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2782 if (!SrcMI || !SrcMI->definesRegister(SrcReg))
2783 return false;
2784
2785 MachineOperand RegMO = CmpMI.getOperand(0);
2786 Register CRReg = RegMO.getReg();
2787 if (CRReg != PPC::CR0)
2788 return false;
2789
2790 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2791 bool SeenUseOfCRReg = false;
2792 bool IsCRRegKilled = false;
2793 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2794 SeenUseOfCRReg) ||
2795 SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
2796 return false;
2797
2798 int SrcMIOpc = SrcMI->getOpcode();
2799 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2800 if (NewOpC == -1)
2801 return false;
2802
2803 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2804 LLVM_DEBUG(SrcMI->dump());
2805
2806 const MCInstrDesc &NewDesc = get(NewOpC);
2807 SrcMI->setDesc(NewDesc);
2808 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2810 SrcMI->clearRegisterDeads(CRReg);
2811
2812 assert(SrcMI->definesRegister(PPC::CR0) &&
2813 "Record-form instruction does not define cr0?");
2814
2815 LLVM_DEBUG(dbgs() << "with: ");
2816 LLVM_DEBUG(SrcMI->dump());
2817 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2818 LLVM_DEBUG(CmpMI.dump());
2819 return true;
2820}
2821
2824 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2825 const TargetRegisterInfo *TRI) const {
2826 const MachineOperand *BaseOp;
2827 OffsetIsScalable = false;
2828 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2829 return false;
2830 BaseOps.push_back(BaseOp);
2831 return true;
2832}
2833
2834static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2835 const TargetRegisterInfo *TRI) {
2836 // If this is a volatile load/store, don't mess with it.
2837 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2838 return false;
2839
2840 if (LdSt.getOperand(2).isFI())
2841 return true;
2842
2843 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2844 // Can't cluster if the instruction modifies the base register
2845 // or it is update form. e.g. ld r2,3(r2)
2846 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2847 return false;
2848
2849 return true;
2850}
2851
2852// Only cluster instruction pair that have the same opcode, and they are
2853// clusterable according to PowerPC specification.
2854static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2855 const PPCSubtarget &Subtarget) {
2856 switch (FirstOpc) {
2857 default:
2858 return false;
2859 case PPC::STD:
2860 case PPC::STFD:
2861 case PPC::STXSD:
2862 case PPC::DFSTOREf64:
2863 return FirstOpc == SecondOpc;
2864 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2865 // 32bit and 64bit instruction selection. They are clusterable pair though
2866 // they are different opcode.
2867 case PPC::STW:
2868 case PPC::STW8:
2869 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2870 }
2871}
2872
2874 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2875 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2876 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2877 unsigned NumBytes) const {
2878
2879 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2880 const MachineOperand &BaseOp1 = *BaseOps1.front();
2881 const MachineOperand &BaseOp2 = *BaseOps2.front();
2882 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2883 "Only base registers and frame indices are supported.");
2884
2885 // ClusterSize means the number of memory operations that will have been
2886 // clustered if this hook returns true.
2887 // Don't cluster memory op if there are already two ops clustered at least.
2888 if (ClusterSize > 2)
2889 return false;
2890
2891 // Cluster the load/store only when they have the same base
2892 // register or FI.
2893 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2894 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2895 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2896 return false;
2897
2898 // Check if the load/store are clusterable according to the PowerPC
2899 // specification.
2900 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2901 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2902 unsigned FirstOpc = FirstLdSt.getOpcode();
2903 unsigned SecondOpc = SecondLdSt.getOpcode();
2905 // Cluster the load/store only when they have the same opcode, and they are
2906 // clusterable opcode according to PowerPC specification.
2907 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2908 return false;
2909
2910 // Can't cluster load/store that have ordered or volatile memory reference.
2911 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2912 !isLdStSafeToCluster(SecondLdSt, TRI))
2913 return false;
2914
2915 int64_t Offset1 = 0, Offset2 = 0;
2916 unsigned Width1 = 0, Width2 = 0;
2917 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2918 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2919 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2920 Width1 != Width2)
2921 return false;
2922
2923 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2924 "getMemOperandWithOffsetWidth return incorrect base op");
2925 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2926 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2927 return Offset1 + Width1 == Offset2;
2928}
2929
2930/// GetInstSize - Return the number of bytes of code the specified
2931/// instruction may be. This returns the maximum number of bytes.
2932///
2934 unsigned Opcode = MI.getOpcode();
2935
2936 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2937 const MachineFunction *MF = MI.getParent()->getParent();
2938 const char *AsmStr = MI.getOperand(0).getSymbolName();
2939 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2940 } else if (Opcode == TargetOpcode::STACKMAP) {
2941 StackMapOpers Opers(&MI);
2942 return Opers.getNumPatchBytes();
2943 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2944 PatchPointOpers Opers(&MI);
2945 return Opers.getNumPatchBytes();
2946 } else {
2947 return get(Opcode).getSize();
2948 }
2949}
2950
2951std::pair<unsigned, unsigned>
2953 // PPC always uses a direct mask.
2954 return std::make_pair(TF, 0u);
2955}
2956
2959 using namespace PPCII;
2960 static const std::pair<unsigned, const char *> TargetFlags[] = {
2961 {MO_PLT, "ppc-plt"},
2962 {MO_PIC_FLAG, "ppc-pic"},
2963 {MO_PCREL_FLAG, "ppc-pcrel"},
2964 {MO_GOT_FLAG, "ppc-got"},
2965 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2966 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2967 {MO_TPREL_FLAG, "ppc-tprel"},
2968 {MO_TLSLD_FLAG, "ppc-tlsld"},
2969 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2970 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2971 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2972 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2973 {MO_LO, "ppc-lo"},
2974 {MO_HA, "ppc-ha"},
2975 {MO_TPREL_LO, "ppc-tprel-lo"},
2976 {MO_TPREL_HA, "ppc-tprel-ha"},
2977 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2978 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2979 {MO_TOC_LO, "ppc-toc-lo"},
2980 {MO_TLS, "ppc-tls"},
2981 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2982 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2983 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
2984 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
2985 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
2986 };
2987 return ArrayRef(TargetFlags);
2988}
2989
2990// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
2991// The VSX versions have the advantage of a full 64-register target whereas
2992// the FP ones have the advantage of lower latency and higher throughput. So
2993// what we are after is using the faster instructions in low register pressure
2994// situations and using the larger register file in high register pressure
2995// situations.
2997 unsigned UpperOpcode, LowerOpcode;
2998 switch (MI.getOpcode()) {
2999 case PPC::DFLOADf32:
3000 UpperOpcode = PPC::LXSSP;
3001 LowerOpcode = PPC::LFS;
3002 break;
3003 case PPC::DFLOADf64:
3004 UpperOpcode = PPC::LXSD;
3005 LowerOpcode = PPC::LFD;
3006 break;
3007 case PPC::DFSTOREf32:
3008 UpperOpcode = PPC::STXSSP;
3009 LowerOpcode = PPC::STFS;
3010 break;
3011 case PPC::DFSTOREf64:
3012 UpperOpcode = PPC::STXSD;
3013 LowerOpcode = PPC::STFD;
3014 break;
3015 case PPC::XFLOADf32:
3016 UpperOpcode = PPC::LXSSPX;
3017 LowerOpcode = PPC::LFSX;
3018 break;
3019 case PPC::XFLOADf64:
3020 UpperOpcode = PPC::LXSDX;
3021 LowerOpcode = PPC::LFDX;
3022 break;
3023 case PPC::XFSTOREf32:
3024 UpperOpcode = PPC::STXSSPX;
3025 LowerOpcode = PPC::STFSX;
3026 break;
3027 case PPC::XFSTOREf64:
3028 UpperOpcode = PPC::STXSDX;
3029 LowerOpcode = PPC::STFDX;
3030 break;
3031 case PPC::LIWAX:
3032 UpperOpcode = PPC::LXSIWAX;
3033 LowerOpcode = PPC::LFIWAX;
3034 break;
3035 case PPC::LIWZX:
3036 UpperOpcode = PPC::LXSIWZX;
3037 LowerOpcode = PPC::LFIWZX;
3038 break;
3039 case PPC::STIWX:
3040 UpperOpcode = PPC::STXSIWX;
3041 LowerOpcode = PPC::STFIWX;
3042 break;
3043 default:
3044 llvm_unreachable("Unknown Operation!");
3045 }
3046
3047 Register TargetReg = MI.getOperand(0).getReg();
3048 unsigned Opcode;
3049 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3050 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3051 Opcode = LowerOpcode;
3052 else
3053 Opcode = UpperOpcode;
3054 MI.setDesc(get(Opcode));
3055 return true;
3056}
3057
3058static bool isAnImmediateOperand(const MachineOperand &MO) {
3059 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3060}
3061
3063 auto &MBB = *MI.getParent();
3064 auto DL = MI.getDebugLoc();
3065
3066 switch (MI.getOpcode()) {
3067 case PPC::BUILD_UACC: {
3068 MCRegister ACC = MI.getOperand(0).getReg();
3069 MCRegister UACC = MI.getOperand(1).getReg();
3070 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3071 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3072 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3073 // FIXME: This can easily be improved to look up to the top of the MBB
3074 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3075 // we can just re-target any such XXLOR's to DstVSR + offset.
3076 for (int VecNo = 0; VecNo < 4; VecNo++)
3077 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3078 .addReg(SrcVSR + VecNo)
3079 .addReg(SrcVSR + VecNo);
3080 }
3081 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3082 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3083 // with a NOP.
3084 [[fallthrough]];
3085 }
3086 case PPC::KILL_PAIR: {
3087 MI.setDesc(get(PPC::UNENCODED_NOP));
3088 MI.removeOperand(1);
3089 MI.removeOperand(0);
3090 return true;
3091 }
3092 case TargetOpcode::LOAD_STACK_GUARD: {
3093 assert(Subtarget.isTargetLinux() &&
3094 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3095 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3096 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3097 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3098 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3099 .addImm(Offset)
3100 .addReg(Reg);
3101 return true;
3102 }
3103 case PPC::PPCLdFixedAddr: {
3104 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3105 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3106 int64_t Offset = 0;
3107 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3108 MI.setDesc(get(PPC::LWZ));
3109 uint64_t FAType = MI.getOperand(1).getImm();
3110#undef PPC_LNX_FEATURE
3111#undef PPC_LNX_CPU
3112#define PPC_LNX_DEFINE_OFFSETS
3113#include "llvm/TargetParser/PPCTargetParser.def"
3114 bool IsLE = Subtarget.isLittleEndian();
3115 bool Is64 = Subtarget.isPPC64();
3116 if (FAType == PPC_FAWORD_HWCAP) {
3117 if (IsLE)
3118 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3119 else
3120 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3121 } else if (FAType == PPC_FAWORD_HWCAP2) {
3122 if (IsLE)
3123 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3124 else
3125 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3126 } else if (FAType == PPC_FAWORD_CPUID) {
3127 if (IsLE)
3128 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3129 else
3130 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3131 }
3132 assert(Offset && "Do not know the offset for this fixed addr load");
3133 MI.removeOperand(1);
3135 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3136 .addImm(Offset)
3137 .addReg(Reg);
3138 return true;
3139#define PPC_TGT_PARSER_UNDEF_MACROS
3140#include "llvm/TargetParser/PPCTargetParser.def"
3141#undef PPC_TGT_PARSER_UNDEF_MACROS
3142 }
3143 case PPC::DFLOADf32:
3144 case PPC::DFLOADf64:
3145 case PPC::DFSTOREf32:
3146 case PPC::DFSTOREf64: {
3147 assert(Subtarget.hasP9Vector() &&
3148 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3149 assert(MI.getOperand(2).isReg() &&
3150 isAnImmediateOperand(MI.getOperand(1)) &&
3151 "D-form op must have register and immediate operands");
3152 return expandVSXMemPseudo(MI);
3153 }
3154 case PPC::XFLOADf32:
3155 case PPC::XFSTOREf32:
3156 case PPC::LIWAX:
3157 case PPC::LIWZX:
3158 case PPC::STIWX: {
3159 assert(Subtarget.hasP8Vector() &&
3160 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3161 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3162 "X-form op must have register and register operands");
3163 return expandVSXMemPseudo(MI);
3164 }
3165 case PPC::XFLOADf64:
3166 case PPC::XFSTOREf64: {
3167 assert(Subtarget.hasVSX() &&
3168 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3169 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3170 "X-form op must have register and register operands");
3171 return expandVSXMemPseudo(MI);
3172 }
3173 case PPC::SPILLTOVSR_LD: {
3174 Register TargetReg = MI.getOperand(0).getReg();
3175 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3176 MI.setDesc(get(PPC::DFLOADf64));
3177 return expandPostRAPseudo(MI);
3178 }
3179 else
3180 MI.setDesc(get(PPC::LD));
3181 return true;
3182 }
3183 case PPC::SPILLTOVSR_ST: {
3184 Register SrcReg = MI.getOperand(0).getReg();
3185 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3186 NumStoreSPILLVSRRCAsVec++;
3187 MI.setDesc(get(PPC::DFSTOREf64));
3188 return expandPostRAPseudo(MI);
3189 } else {
3190 NumStoreSPILLVSRRCAsGpr++;
3191 MI.setDesc(get(PPC::STD));
3192 }
3193 return true;
3194 }
3195 case PPC::SPILLTOVSR_LDX: {
3196 Register TargetReg = MI.getOperand(0).getReg();
3197 if (PPC::VSFRCRegClass.contains(TargetReg))
3198 MI.setDesc(get(PPC::LXSDX));
3199 else
3200 MI.setDesc(get(PPC::LDX));
3201 return true;
3202 }
3203 case PPC::SPILLTOVSR_STX: {
3204 Register SrcReg = MI.getOperand(0).getReg();
3205 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3206 NumStoreSPILLVSRRCAsVec++;
3207 MI.setDesc(get(PPC::STXSDX));
3208 } else {
3209 NumStoreSPILLVSRRCAsGpr++;
3210 MI.setDesc(get(PPC::STDX));
3211 }
3212 return true;
3213 }
3214
3215 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3216 case PPC::CFENCE:
3217 case PPC::CFENCE8: {
3218 auto Val = MI.getOperand(0).getReg();
3219 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3220 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3221 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3223 .addReg(PPC::CR7)
3224 .addImm(1);
3225 MI.setDesc(get(PPC::ISYNC));
3226 MI.removeOperand(0);
3227 return true;
3228 }
3229 }
3230 return false;
3231}
3232
3233// Essentially a compile-time implementation of a compare->isel sequence.
3234// It takes two constants to compare, along with the true/false registers
3235// and the comparison type (as a subreg to a CR field) and returns one
3236// of the true/false registers, depending on the comparison results.
3237static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3238 unsigned TrueReg, unsigned FalseReg,
3239 unsigned CRSubReg) {
3240 // Signed comparisons. The immediates are assumed to be sign-extended.
3241 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3242 switch (CRSubReg) {
3243 default: llvm_unreachable("Unknown integer comparison type.");
3244 case PPC::sub_lt:
3245 return Imm1 < Imm2 ? TrueReg : FalseReg;
3246 case PPC::sub_gt:
3247 return Imm1 > Imm2 ? TrueReg : FalseReg;
3248 case PPC::sub_eq:
3249 return Imm1 == Imm2 ? TrueReg : FalseReg;
3250 }
3251 }
3252 // Unsigned comparisons.
3253 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3254 switch (CRSubReg) {
3255 default: llvm_unreachable("Unknown integer comparison type.");
3256 case PPC::sub_lt:
3257 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3258 case PPC::sub_gt:
3259 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3260 case PPC::sub_eq:
3261 return Imm1 == Imm2 ? TrueReg : FalseReg;
3262 }
3263 }
3264 return PPC::NoRegister;
3265}
3266
3268 unsigned OpNo,
3269 int64_t Imm) const {
3270 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3271 // Replace the REG with the Immediate.
3272 Register InUseReg = MI.getOperand(OpNo).getReg();
3273 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3274
3275 // We need to make sure that the MI didn't have any implicit use
3276 // of this REG any more. We don't call MI.implicit_operands().empty() to
3277 // return early, since MI's MCID might be changed in calling context, as a
3278 // result its number of explicit operands may be changed, thus the begin of
3279 // implicit operand is changed.
3281 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
3282 if (UseOpIdx >= 0) {
3283 MachineOperand &MO = MI.getOperand(UseOpIdx);
3284 if (MO.isImplicit())
3285 // The operands must always be in the following order:
3286 // - explicit reg defs,
3287 // - other explicit operands (reg uses, immediates, etc.),
3288 // - implicit reg defs
3289 // - implicit reg uses
3290 // Therefore, removing the implicit operand won't change the explicit
3291 // operands layout.
3292 MI.removeOperand(UseOpIdx);
3293 }
3294}
3295
3296// Replace an instruction with one that materializes a constant (and sets
3297// CR0 if the original instruction was a record-form instruction).
3299 const LoadImmediateInfo &LII) const {
3300 // Remove existing operands.
3301 int OperandToKeep = LII.SetCR ? 1 : 0;
3302 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3303 MI.removeOperand(i);
3304
3305 // Replace the instruction.
3306 if (LII.SetCR) {
3307 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3308 // Set the immediate.
3309 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3310 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3311 return;
3312 }
3313 else
3314 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3315
3316 // Set the immediate.
3317 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3318 .addImm(LII.Imm);
3319}
3320
3322 bool &SeenIntermediateUse) const {
3323 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3324 "Should be called after register allocation.");
3326 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3327 It++;
3328 SeenIntermediateUse = false;
3329 for (; It != E; ++It) {
3330 if (It->modifiesRegister(Reg, TRI))
3331 return &*It;
3332 if (It->readsRegister(Reg, TRI))
3333 SeenIntermediateUse = true;
3334 }
3335 return nullptr;
3336}
3337
3340 const DebugLoc &DL, Register Reg,
3341 int64_t Imm) const {
3343 "Register should be in non-SSA form after RA");
3344 bool isPPC64 = Subtarget.isPPC64();
3345 // FIXME: Materialization here is not optimal.
3346 // For some special bit patterns we can use less instructions.
3347 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3348 if (isInt<16>(Imm)) {
3349 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3350 } else if (isInt<32>(Imm)) {
3351 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3352 .addImm(Imm >> 16);
3353 if (Imm & 0xFFFF)
3354 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3355 .addReg(Reg, RegState::Kill)
3356 .addImm(Imm & 0xFFFF);
3357 } else {
3358 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3359 "only supported in PPC64");
3360 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3361 if ((Imm >> 32) & 0xFFFF)
3362 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3363 .addReg(Reg, RegState::Kill)
3364 .addImm((Imm >> 32) & 0xFFFF);
3365 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3366 .addReg(Reg, RegState::Kill)
3367 .addImm(32)
3368 .addImm(31);
3369 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3370 .addReg(Reg, RegState::Kill)
3371 .addImm((Imm >> 16) & 0xFFFF);
3372 if (Imm & 0xFFFF)
3373 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3374 .addReg(Reg, RegState::Kill)
3375 .addImm(Imm & 0xFFFF);
3376 }
3377}
3378
3379MachineInstr *PPCInstrInfo::getForwardingDefMI(
3381 unsigned &OpNoForForwarding,
3382 bool &SeenIntermediateUse) const {
3383 OpNoForForwarding = ~0U;
3384 MachineInstr *DefMI = nullptr;
3385 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3387 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3388 // within the basic block to see if the register is defined using an
3389 // LI/LI8/ADDI/ADDI8.
3390 if (MRI->isSSA()) {
3391 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3392 if (!MI.getOperand(i).isReg())
3393 continue;
3394 Register Reg = MI.getOperand(i).getReg();
3395 if (!Reg.isVirtual())
3396 continue;
3397 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3398 if (TrueReg.isVirtual()) {
3399 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3400 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3401 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3402 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3403 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3404 OpNoForForwarding = i;
3405 DefMI = DefMIForTrueReg;
3406 // The ADDI and LI operand maybe exist in one instruction at same
3407 // time. we prefer to fold LI operand as LI only has one Imm operand
3408 // and is more possible to be converted. So if current DefMI is
3409 // ADDI/ADDI8, we continue to find possible LI/LI8.
3410 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3411 break;
3412 }
3413 }
3414 }
3415 } else {
3416 // Looking back through the definition for each operand could be expensive,
3417 // so exit early if this isn't an instruction that either has an immediate
3418 // form or is already an immediate form that we can handle.
3419 ImmInstrInfo III;
3420 unsigned Opc = MI.getOpcode();
3421 bool ConvertibleImmForm =
3422 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3423 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3424 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3425 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3426 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3427 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3428 Opc == PPC::RLWINM8_rec;
3429 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3430 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3431 : false;
3432 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3433 return nullptr;
3434
3435 // Don't convert or %X, %Y, %Y since that's just a register move.
3436 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3437 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3438 return nullptr;
3439 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3440 MachineOperand &MO = MI.getOperand(i);
3441 SeenIntermediateUse = false;
3442 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3443 Register Reg = MI.getOperand(i).getReg();
3444 // If we see another use of this reg between the def and the MI,
3445 // we want to flag it so the def isn't deleted.
3446 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3447 if (DefMI) {
3448 // Is this register defined by some form of add-immediate (including
3449 // load-immediate) within this basic block?
3450 switch (DefMI->getOpcode()) {
3451 default:
3452 break;
3453 case PPC::LI:
3454 case PPC::LI8:
3455 case PPC::ADDItocL:
3456 case PPC::ADDI:
3457 case PPC::ADDI8:
3458 OpNoForForwarding = i;
3459 return DefMI;
3460 }
3461 }
3462 }
3463 }
3464 }
3465 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3466}
3467
3468unsigned PPCInstrInfo::getSpillTarget() const {
3469 // With P10, we may need to spill paired vector registers or accumulator
3470 // registers. MMA implies paired vectors, so we can just check that.
3471 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3472 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3473 2 : Subtarget.hasP9Vector() ?
3474 1 : 0;
3475}
3476
3477ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3478 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3479}
3480
3481ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3482 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3483}
3484
3485// This opt tries to convert the following imm form to an index form to save an
3486// add for stack variables.
3487// Return false if no such pattern found.
3488//
3489// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3490// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3491// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3492//
3493// can be converted to:
3494//
3495// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3496// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3497//
3498// In order to eliminate ADD instr, make sure that:
3499// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3500// new ADDI instr and ADDI can only take int16 Imm.
3501// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3502// between ADDI and ADD instr since its original def in ADDI will be changed
3503// in new ADDI instr. And also there should be no new def for it between
3504// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3505// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3506// between ADD and Imm instr since ADD instr will be eliminated.
3507// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3508// moved to Index instr.
3510 MachineFunction *MF = MI.getParent()->getParent();
3512 bool PostRA = !MRI->isSSA();
3513 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3514 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3515 // frame base(OffsetAddi) are determined.
3516 if (!PostRA)
3517 return false;
3518 unsigned ToBeDeletedReg = 0;
3519 int64_t OffsetImm = 0;
3520 unsigned XFormOpcode = 0;
3521 ImmInstrInfo III;
3522
3523 // Check if Imm instr meets requirement.
3524 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3525 III))
3526 return false;
3527
3528 bool OtherIntermediateUse = false;
3529 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3530
3531 // Exit if there is other use between ADD and Imm instr or no def found.
3532 if (OtherIntermediateUse || !ADDMI)
3533 return false;
3534
3535 // Check if ADD instr meets requirement.
3536 if (!isADDInstrEligibleForFolding(*ADDMI))
3537 return false;
3538
3539 unsigned ScaleRegIdx = 0;
3540 int64_t OffsetAddi = 0;
3541 MachineInstr *ADDIMI = nullptr;
3542
3543 // Check if there is a valid ToBeChangedReg in ADDMI.
3544 // 1: It must be killed.
3545 // 2: Its definition must be a valid ADDIMI.
3546 // 3: It must satify int16 offset requirement.
3547 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3548 ScaleRegIdx = 2;
3549 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3550 ScaleRegIdx = 1;
3551 else
3552 return false;
3553
3554 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3555 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3556 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3557 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3559 for (auto It = ++Start; It != End; It++)
3560 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3561 return true;
3562 return false;
3563 };
3564
3565 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3566 // treated as special zero when ScaleReg is R0/X0 register.
3567 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3568 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3569 return false;
3570
3571 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3572 // and Imm Instr.
3573 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3574 return false;
3575
3576 // Now start to do the transformation.
3577 LLVM_DEBUG(dbgs() << "Replace instruction: "
3578 << "\n");
3579 LLVM_DEBUG(ADDIMI->dump());
3580 LLVM_DEBUG(ADDMI->dump());
3581 LLVM_DEBUG(MI.dump());
3582 LLVM_DEBUG(dbgs() << "with: "
3583 << "\n");
3584
3585 // Update ADDI instr.
3586 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3587
3588 // Update Imm instr.
3589 MI.setDesc(get(XFormOpcode));
3590 MI.getOperand(III.ImmOpNo)
3591 .ChangeToRegister(ScaleReg, false, false,
3592 ADDMI->getOperand(ScaleRegIdx).isKill());
3593
3594 MI.getOperand(III.OpNoForForwarding)
3595 .ChangeToRegister(ToBeChangedReg, false, false, true);
3596
3597 // Eliminate ADD instr.
3598 ADDMI->eraseFromParent();
3599
3600 LLVM_DEBUG(ADDIMI->dump());
3601 LLVM_DEBUG(MI.dump());
3602
3603 return true;
3604}
3605
3607 int64_t &Imm) const {
3608 unsigned Opc = ADDIMI.getOpcode();
3609
3610 // Exit if the instruction is not ADDI.
3611 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3612 return false;
3613
3614 // The operand may not necessarily be an immediate - it could be a relocation.
3615 if (!ADDIMI.getOperand(2).isImm())
3616 return false;
3617
3618 Imm = ADDIMI.getOperand(2).getImm();
3619
3620 return true;
3621}
3622
3624 unsigned Opc = ADDMI.getOpcode();
3625
3626 // Exit if the instruction is not ADD.
3627 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3628}
3629
3631 unsigned &ToBeDeletedReg,
3632 unsigned &XFormOpcode,
3633 int64_t &OffsetImm,
3634 ImmInstrInfo &III) const {
3635 // Only handle load/store.
3636 if (!MI.mayLoadOrStore())
3637 return false;
3638
3639 unsigned Opc = MI.getOpcode();
3640
3641 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3642
3643 // Exit if instruction has no index form.
3644 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3645 return false;
3646
3647 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3648 if (!instrHasImmForm(XFormOpcode,
3649 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3650 return false;
3651
3652 if (!III.IsSummingOperands)
3653 return false;
3654
3655 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3656 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3657 // Only support imm operands, not relocation slots or others.
3658 if (!ImmOperand.isImm())
3659 return false;
3660
3661 assert(RegOperand.isReg() && "Instruction format is not right");
3662
3663 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3664 if (!RegOperand.isKill())
3665 return false;
3666
3667 ToBeDeletedReg = RegOperand.getReg();
3668 OffsetImm = ImmOperand.getImm();
3669
3670 return true;
3671}
3672
3674 MachineInstr *&ADDIMI,
3675 int64_t &OffsetAddi,
3676 int64_t OffsetImm) const {
3677 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3678 MachineOperand &MO = ADDMI->getOperand(Index);
3679
3680 if (!MO.isKill())
3681 return false;
3682
3683 bool OtherIntermediateUse = false;
3684
3685 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3686 // Currently handle only one "add + Imminstr" pair case, exit if other
3687 // intermediate use for ToBeChangedReg found.
3688 // TODO: handle the cases where there are other "add + Imminstr" pairs
3689 // with same offset in Imminstr which is like:
3690 //
3691 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3692 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3693 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3694 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3695 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3696 //
3697 // can be converted to:
3698 //
3699 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3700 // (OffsetAddi + OffsetImm)
3701 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3702 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3703
3704 if (OtherIntermediateUse || !ADDIMI)
3705 return false;
3706 // Check if ADDI instr meets requirement.
3707 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3708 return false;
3709
3710 if (isInt<16>(OffsetAddi + OffsetImm))
3711 return true;
3712 return false;
3713}
3714
3715// If this instruction has an immediate form and one of its operands is a
3716// result of a load-immediate or an add-immediate, convert it to
3717// the immediate form if the constant is in range.
3719 SmallSet<Register, 4> &RegsToUpdate,
3720 MachineInstr **KilledDef) const {
3721 MachineFunction *MF = MI.getParent()->getParent();
3723 bool PostRA = !MRI->isSSA();
3724 bool SeenIntermediateUse = true;
3725 unsigned ForwardingOperand = ~0U;
3726 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3727 SeenIntermediateUse);
3728 if (!DefMI)
3729 return false;
3730 assert(ForwardingOperand < MI.getNumOperands() &&
3731 "The forwarding operand needs to be valid at this point");
3732 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3733 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3734 if (KilledDef && KillFwdDefMI)
3735 *KilledDef = DefMI;
3736
3737 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3738 // registers that need their kill flags updated.
3739 for (const MachineOperand &MO : DefMI->operands())
3740 if (MO.isReg() && MO.isDef())
3741 RegsToUpdate.insert(MO.getReg());
3742 for (const MachineOperand &MO : MI.operands())
3743 if (MO.isReg())
3744 RegsToUpdate.insert(MO.getReg());
3745
3746 // If this is a imm instruction and its register operands is produced by ADDI,
3747 // put the imm into imm inst directly.
3748 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3749 PPC::INSTRUCTION_LIST_END &&
3750 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3751 return true;
3752
3753 ImmInstrInfo III;
3754 bool IsVFReg = MI.getOperand(0).isReg()
3755 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3756 : false;
3757 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3758 // If this is a reg+reg instruction that has a reg+imm form,
3759 // and one of the operands is produced by an add-immediate,
3760 // try to convert it.
3761 if (HasImmForm &&
3762 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3763 KillFwdDefMI))
3764 return true;
3765
3766 // If this is a reg+reg instruction that has a reg+imm form,
3767 // and one of the operands is produced by LI, convert it now.
3768 if (HasImmForm &&
3769 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3770 return true;
3771
3772 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3773 // can be simpified to LI.
3774 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3775 return true;
3776
3777 return false;
3778}
3779
3781 MachineInstr **ToErase) const {
3782 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3783 Register FoldingReg = MI.getOperand(1).getReg();
3784 if (!FoldingReg.isVirtual())
3785 return false;
3786 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3787 if (SrcMI->getOpcode() != PPC::RLWINM &&
3788 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3789 SrcMI->getOpcode() != PPC::RLWINM8 &&
3790 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3791 return false;
3792 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3793 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3794 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3795 "Invalid PPC::RLWINM Instruction!");
3796 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3797 uint64_t SHMI = MI.getOperand(2).getImm();
3798 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3799 uint64_t MBMI = MI.getOperand(3).getImm();
3800 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3801 uint64_t MEMI = MI.getOperand(4).getImm();
3802
3803 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3804 "Invalid PPC::RLWINM Instruction!");
3805 // If MBMI is bigger than MEMI, we always can not get run of ones.
3806 // RotatedSrcMask non-wrap:
3807 // 0........31|32........63
3808 // RotatedSrcMask: B---E B---E
3809 // MaskMI: -----------|--E B------
3810 // Result: ----- --- (Bad candidate)
3811 //
3812 // RotatedSrcMask wrap:
3813 // 0........31|32........63
3814 // RotatedSrcMask: --E B----|--E B----
3815 // MaskMI: -----------|--E B------
3816 // Result: --- -----|--- ----- (Bad candidate)
3817 //
3818 // One special case is RotatedSrcMask is a full set mask.
3819 // RotatedSrcMask full:
3820 // 0........31|32........63
3821 // RotatedSrcMask: ------EB---|-------EB---
3822 // MaskMI: -----------|--E B------
3823 // Result: -----------|--- ------- (Good candidate)
3824
3825 // Mark special case.
3826 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3827
3828 // For other MBMI > MEMI cases, just return.
3829 if ((MBMI > MEMI) && !SrcMaskFull)
3830 return false;
3831
3832 // Handle MBMI <= MEMI cases.
3833 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3834 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3835 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3836 // while in PowerPC ISA, lowerest bit is at index 63.
3837 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3838
3839 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3840 APInt FinalMask = RotatedSrcMask & MaskMI;
3841 uint32_t NewMB, NewME;
3842 bool Simplified = false;
3843
3844 // If final mask is 0, MI result should be 0 too.
3845 if (FinalMask.isZero()) {
3846 bool Is64Bit =
3847 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3848 Simplified = true;
3849 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3850 LLVM_DEBUG(MI.dump());
3851
3852 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3853 // Replace MI with "LI 0"
3854 MI.removeOperand(4);
3855 MI.removeOperand(3);
3856 MI.removeOperand(2);
3857 MI.getOperand(1).ChangeToImmediate(0);
3858 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3859 } else {
3860 // Replace MI with "ANDI_rec reg, 0"
3861 MI.removeOperand(4);
3862 MI.removeOperand(3);
3863 MI.getOperand(2).setImm(0);
3864 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3865 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3866 if (SrcMI->getOperand(1).isKill()) {
3867 MI.getOperand(1).setIsKill(true);
3868 SrcMI->getOperand(1).setIsKill(false);
3869 } else
3870 // About to replace MI.getOperand(1), clear its kill flag.
3871 MI.getOperand(1).setIsKill(false);
3872 }
3873
3874 LLVM_DEBUG(dbgs() << "With: ");
3875 LLVM_DEBUG(MI.dump());
3876
3877 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3878 NewMB <= NewME) ||
3879 SrcMaskFull) {
3880 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3881 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3882 // return a 32 bit value.
3883 Simplified = true;
3884 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3885 LLVM_DEBUG(MI.dump());
3886
3887 uint16_t NewSH = (SHSrc + SHMI) % 32;
3888 MI.getOperand(2).setImm(NewSH);
3889 // If SrcMI mask is full, no need to update MBMI and MEMI.
3890 if (!SrcMaskFull) {
3891 MI.getOperand(3).setImm(NewMB);
3892 MI.getOperand(4).setImm(NewME);
3893 }
3894 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3895 if (SrcMI->getOperand(1).isKill()) {
3896 MI.getOperand(1).setIsKill(true);
3897 SrcMI->getOperand(1).setIsKill(false);
3898 } else
3899 // About to replace MI.getOperand(1), clear its kill flag.
3900 MI.getOperand(1).setIsKill(false);
3901
3902 LLVM_DEBUG(dbgs() << "To: ");
3903 LLVM_DEBUG(MI.dump());
3904 }
3905 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3906 !SrcMI->hasImplicitDef()) {
3907 // If FoldingReg has no non-debug use and it has no implicit def (it
3908 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3909 // Otherwise keep it.
3910 *ToErase = SrcMI;
3911 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3912 LLVM_DEBUG(SrcMI->dump());
3913 }
3914 return Simplified;
3915}
3916
3917bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3918 ImmInstrInfo &III, bool PostRA) const {
3919 // The vast majority of the instructions would need their operand 2 replaced
3920 // with an immediate when switching to the reg+imm form. A marked exception
3921 // are the update form loads/stores for which a constant operand 2 would need
3922 // to turn into a displacement and move operand 1 to the operand 2 position.
3923 III.ImmOpNo = 2;
3924 III.OpNoForForwarding = 2;
3925 III.ImmWidth = 16;
3926 III.ImmMustBeMultipleOf = 1;
3927 III.TruncateImmTo = 0;
3928 III.IsSummingOperands = false;
3929 switch (Opc) {
3930 default: return false;
3931 case PPC::ADD4:
3932 case PPC::ADD8:
3933 III.SignedImm = true;
3934 III.ZeroIsSpecialOrig = 0;
3935 III.ZeroIsSpecialNew = 1;
3936 III.IsCommutative = true;
3937 III.IsSummingOperands = true;
3938 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3939 break;
3940 case PPC::ADDC:
3941 case PPC::ADDC8:
3942 III.SignedImm = true;
3943 III.ZeroIsSpecialOrig = 0;
3944 III.ZeroIsSpecialNew = 0;
3945 III.IsCommutative = true;
3946 III.IsSummingOperands = true;
3947 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3948 break;
3949 case PPC::ADDC_rec:
3950 III.SignedImm = true;
3951 III.ZeroIsSpecialOrig = 0;
3952 III.ZeroIsSpecialNew = 0;
3953 III.IsCommutative = true;
3954 III.IsSummingOperands = true;
3955 III.ImmOpcode = PPC::ADDIC_rec;
3956 break;
3957 case PPC::SUBFC:
3958 case PPC::SUBFC8:
3959 III.SignedImm = true;
3960 III.ZeroIsSpecialOrig = 0;
3961 III.ZeroIsSpecialNew = 0;
3962 III.IsCommutative = false;
3963 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3964 break;
3965 case PPC::CMPW:
3966 case PPC::CMPD:
3967 III.SignedImm = true;
3968 III.ZeroIsSpecialOrig = 0;
3969 III.ZeroIsSpecialNew = 0;
3970 III.IsCommutative = false;
3971 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3972 break;
3973 case PPC::CMPLW:
3974 case PPC::CMPLD:
3975 III.SignedImm = false;
3976 III.ZeroIsSpecialOrig = 0;
3977 III.ZeroIsSpecialNew = 0;
3978 III.IsCommutative = false;
3979 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3980 break;
3981 case PPC::AND_rec:
3982 case PPC::AND8_rec:
3983 case PPC::OR:
3984 case PPC::OR8:
3985 case PPC::XOR:
3986 case PPC::XOR8:
3987 III.SignedImm = false;
3988 III.ZeroIsSpecialOrig = 0;
3989 III.ZeroIsSpecialNew = 0;
3990 III.IsCommutative = true;
3991 switch(Opc) {
3992 default: llvm_unreachable("Unknown opcode");
3993 case PPC::AND_rec:
3994 III.ImmOpcode = PPC::ANDI_rec;
3995 break;
3996 case PPC::AND8_rec:
3997 III.ImmOpcode = PPC::ANDI8_rec;
3998 break;
3999 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4000 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4001 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4002 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4003 }
4004 break;
4005 case PPC::RLWNM:
4006 case PPC::RLWNM8:
4007 case PPC::RLWNM_rec:
4008 case PPC::RLWNM8_rec:
4009 case PPC::SLW:
4010 case PPC::SLW8:
4011 case PPC::SLW_rec:
4012 case PPC::SLW8_rec:
4013 case PPC::SRW:
4014 case PPC::SRW8:
4015 case PPC::SRW_rec:
4016 case PPC::SRW8_rec:
4017 case PPC::SRAW:
4018 case PPC::SRAW_rec:
4019 III.SignedImm = false;
4020 III.ZeroIsSpecialOrig = 0;
4021 III.ZeroIsSpecialNew = 0;
4022 III.IsCommutative = false;
4023 // This isn't actually true, but the instructions ignore any of the
4024 // upper bits, so any immediate loaded with an LI is acceptable.
4025 // This does not apply to shift right algebraic because a value
4026 // out of range will produce a -1/0.
4027 III.ImmWidth = 16;
4028 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4029 Opc == PPC::RLWNM8_rec)
4030 III.TruncateImmTo = 5;
4031 else
4032 III.TruncateImmTo = 6;
4033 switch(Opc) {
4034 default: llvm_unreachable("Unknown opcode");
4035 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4036 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4037 case PPC::RLWNM_rec:
4038 III.ImmOpcode = PPC::RLWINM_rec;
4039 break;
4040 case PPC::RLWNM8_rec:
4041 III.ImmOpcode = PPC::RLWINM8_rec;
4042 break;
4043 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4044 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4045 case PPC::SLW_rec:
4046 III.ImmOpcode = PPC::RLWINM_rec;
4047 break;
4048 case PPC::SLW8_rec:
4049 III.ImmOpcode = PPC::RLWINM8_rec;
4050 break;
4051 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4052 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4053 case PPC::SRW_rec:
4054 III.ImmOpcode = PPC::RLWINM_rec;
4055 break;
4056 case PPC::SRW8_rec:
4057 III.ImmOpcode = PPC::RLWINM8_rec;
4058 break;
4059 case PPC::SRAW:
4060 III.ImmWidth = 5;
4061 III.TruncateImmTo = 0;
4062 III.ImmOpcode = PPC::SRAWI;
4063 break;
4064 case PPC::SRAW_rec:
4065 III.ImmWidth = 5;
4066 III.TruncateImmTo = 0;
4067 III.ImmOpcode = PPC::SRAWI_rec;
4068 break;
4069 }
4070 break;
4071 case PPC::RLDCL:
4072 case PPC::RLDCL_rec:
4073 case PPC::RLDCR:
4074 case PPC::RLDCR_rec:
4075 case PPC::SLD:
4076 case PPC::SLD_rec:
4077 case PPC::SRD:
4078 case PPC::SRD_rec:
4079 case PPC::SRAD:
4080 case PPC::SRAD_rec:
4081 III.SignedImm = false;
4082 III.ZeroIsSpecialOrig = 0;
4083 III.ZeroIsSpecialNew = 0;
4084 III.IsCommutative = false;
4085 // This isn't actually true, but the instructions ignore any of the
4086 // upper bits, so any immediate loaded with an LI is acceptable.
4087 // This does not apply to shift right algebraic because a value
4088 // out of range will produce a -1/0.
4089 III.ImmWidth = 16;
4090 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4091 Opc == PPC::RLDCR_rec)
4092 III.TruncateImmTo = 6;
4093 else
4094 III.TruncateImmTo = 7;
4095 switch(Opc) {
4096 default: llvm_unreachable("Unknown opcode");
4097 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4098 case PPC::RLDCL_rec:
4099 III.ImmOpcode = PPC::RLDICL_rec;
4100 break;
4101 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4102 case PPC::RLDCR_rec:
4103 III.ImmOpcode = PPC::RLDICR_rec;
4104 break;
4105 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4106 case PPC::SLD_rec:
4107 III.ImmOpcode = PPC::RLDICR_rec;
4108 break;
4109 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4110 case PPC::SRD_rec:
4111 III.ImmOpcode = PPC::RLDICL_rec;
4112 break;
4113 case PPC::SRAD:
4114 III.ImmWidth = 6;
4115 III.TruncateImmTo = 0;
4116 III.ImmOpcode = PPC::SRADI;
4117 break;
4118 case PPC::SRAD_rec:
4119 III.ImmWidth = 6;
4120 III.TruncateImmTo = 0;
4121 III.ImmOpcode = PPC::SRADI_rec;
4122 break;
4123 }
4124 break;
4125 // Loads and stores:
4126 case PPC::LBZX:
4127 case PPC::LBZX8:
4128 case PPC::LHZX:
4129 case PPC::LHZX8:
4130 case PPC::LHAX:
4131 case PPC::LHAX8:
4132 case PPC::LWZX:
4133 case PPC::LWZX8:
4134 case PPC::LWAX:
4135 case PPC::LDX:
4136 case PPC::LFSX:
4137 case PPC::LFDX:
4138 case PPC::STBX:
4139 case PPC::STBX8:
4140 case PPC::STHX:
4141 case PPC::STHX8:
4142 case PPC::STWX:
4143 case PPC::STWX8:
4144 case PPC::STDX:
4145 case PPC::STFSX:
4146 case PPC::STFDX:
4147 III.SignedImm = true;
4148 III.ZeroIsSpecialOrig = 1;
4149 III.ZeroIsSpecialNew = 2;
4150 III.IsCommutative = true;
4151 III.IsSummingOperands = true;
4152 III.ImmOpNo = 1;
4153 III.OpNoForForwarding = 2;
4154 switch(Opc) {
4155 default: llvm_unreachable("Unknown opcode");
4156 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4157 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4158 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4159 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4160 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4161 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4162 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4163 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4164 case PPC::LWAX:
4165 III.ImmOpcode = PPC::LWA;
4166 III.ImmMustBeMultipleOf = 4;
4167 break;
4168 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4169 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4170 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4171 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4172 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4173 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4174 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4175 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4176 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4177 case PPC::STDX:
4178 III.ImmOpcode = PPC::STD;
4179 III.ImmMustBeMultipleOf = 4;
4180 break;
4181 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4182 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4183 }
4184 break;
4185 case PPC::LBZUX:
4186 case PPC::LBZUX8:
4187 case PPC::LHZUX:
4188 case PPC::LHZUX8:
4189 case PPC::LHAUX:
4190 case PPC::LHAUX8:
4191 case PPC::LWZUX:
4192 case PPC::LWZUX8:
4193 case PPC::LDUX:
4194 case PPC::LFSUX:
4195 case PPC::LFDUX:
4196 case PPC::STBUX:
4197 case PPC::STBUX8:
4198 case PPC::STHUX:
4199 case PPC::STHUX8:
4200 case PPC::STWUX:
4201 case PPC::STWUX8:
4202 case PPC::STDUX:
4203 case PPC::STFSUX:
4204 case PPC::STFDUX:
4205 III.SignedImm = true;
4206 III.ZeroIsSpecialOrig = 2;
4207 III.ZeroIsSpecialNew = 3;
4208 III.IsCommutative = false;
4209 III.IsSummingOperands = true;
4210 III.ImmOpNo = 2;
4211 III.OpNoForForwarding = 3;
4212 switch(Opc) {
4213 default: llvm_unreachable("Unknown opcode");
4214 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4215 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4216 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4217 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4218 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4219 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4220 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4221 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4222 case PPC::LDUX:
4223 III.ImmOpcode = PPC::LDU;
4224 III.ImmMustBeMultipleOf = 4;
4225 break;
4226 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4227 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4228 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4229 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4230 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4231 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4232 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4233 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4234 case PPC::STDUX:
4235 III.ImmOpcode = PPC::STDU;
4236 III.ImmMustBeMultipleOf = 4;
4237 break;
4238 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4239 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4240 }
4241 break;
4242 // Power9 and up only. For some of these, the X-Form version has access to all
4243 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4244 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4245 // into or stored from is one of the VR registers.
4246 case PPC::LXVX:
4247 case PPC::LXSSPX:
4248 case PPC::LXSDX:
4249 case PPC::STXVX:
4250 case PPC::STXSSPX:
4251 case PPC::STXSDX:
4252 case PPC::XFLOADf32:
4253 case PPC::XFLOADf64:
4254 case PPC::XFSTOREf32:
4255 case PPC::XFSTOREf64:
4256 if (!Subtarget.hasP9Vector())
4257 return false;
4258 III.SignedImm = true;
4259 III.ZeroIsSpecialOrig = 1;
4260 III.ZeroIsSpecialNew = 2;
4261 III.IsCommutative = true;
4262 III.IsSummingOperands = true;
4263 III.ImmOpNo = 1;
4264 III.OpNoForForwarding = 2;
4265 III.ImmMustBeMultipleOf = 4;
4266 switch(Opc) {
4267 default: llvm_unreachable("Unknown opcode");
4268 case PPC::LXVX:
4269 III.ImmOpcode = PPC::LXV;
4270 III.ImmMustBeMultipleOf = 16;
4271 break;
4272 case PPC::LXSSPX:
4273 if (PostRA) {
4274 if (IsVFReg)
4275 III.ImmOpcode = PPC::LXSSP;
4276 else {
4277 III.ImmOpcode = PPC::LFS;
4278 III.ImmMustBeMultipleOf = 1;
4279 }
4280 break;
4281 }
4282 [[fallthrough]];
4283 case PPC::XFLOADf32:
4284 III.ImmOpcode = PPC::DFLOADf32;
4285 break;
4286 case PPC::LXSDX:
4287 if (PostRA) {
4288 if (IsVFReg)
4289 III.ImmOpcode = PPC::LXSD;
4290 else {
4291 III.ImmOpcode = PPC::LFD;
4292 III.ImmMustBeMultipleOf = 1;
4293 }
4294 break;
4295 }
4296 [[fallthrough]];
4297 case PPC::XFLOADf64:
4298 III.ImmOpcode = PPC::DFLOADf64;
4299 break;
4300 case PPC::STXVX:
4301 III.ImmOpcode = PPC::STXV;
4302 III.ImmMustBeMultipleOf = 16;
4303 break;
4304 case PPC::STXSSPX:
4305 if (PostRA) {
4306 if (IsVFReg)
4307 III.ImmOpcode = PPC::STXSSP;
4308 else {
4309 III.ImmOpcode = PPC::STFS;
4310 III.ImmMustBeMultipleOf = 1;
4311 }
4312 break;
4313 }
4314 [[fallthrough]];
4315 case PPC::XFSTOREf32:
4316 III.ImmOpcode = PPC::DFSTOREf32;
4317 break;
4318 case PPC::STXSDX:
4319 if (PostRA) {
4320 if (IsVFReg)
4321 III.ImmOpcode = PPC::STXSD;
4322 else {
4323 III.ImmOpcode = PPC::STFD;
4324 III.ImmMustBeMultipleOf = 1;
4325 }
4326 break;
4327 }
4328 [[fallthrough]];
4329 case PPC::XFSTOREf64:
4330 III.ImmOpcode = PPC::DFSTOREf64;
4331 break;
4332 }
4333 break;
4334 }
4335 return true;
4336}
4337
4338// Utility function for swaping two arbitrary operands of an instruction.
4339static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4340 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4341
4342 unsigned MaxOp = std::max(Op1, Op2);
4343 unsigned MinOp = std::min(Op1, Op2);
4344 MachineOperand MOp1 = MI.getOperand(MinOp);
4345 MachineOperand MOp2 = MI.getOperand(MaxOp);
4346 MI.removeOperand(std::max(Op1, Op2));
4347 MI.removeOperand(std::min(Op1, Op2));
4348
4349 // If the operands we are swapping are the two at the end (the common case)
4350 // we can just remove both and add them in the opposite order.
4351 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4352 MI.addOperand(MOp2);
4353 MI.addOperand(MOp1);
4354 } else {
4355 // Store all operands in a temporary vector, remove them and re-add in the
4356 // right order.
4358 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4359 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4360 MOps.push_back(MI.getOperand(i));
4361 MI.removeOperand(i);
4362 }
4363 // MOp2 needs to be added next.
4364 MI.addOperand(MOp2);
4365 // Now add the rest.
4366 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4367 if (i == MaxOp)
4368 MI.addOperand(MOp1);
4369 else {
4370 MI.addOperand(MOps.back());
4371 MOps.pop_back();
4372 }
4373 }
4374 }
4375}
4376
4377// Check if the 'MI' that has the index OpNoForForwarding
4378// meets the requirement described in the ImmInstrInfo.
4379bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4380 const ImmInstrInfo &III,
4381 unsigned OpNoForForwarding
4382 ) const {
4383 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4384 // would not work pre-RA, we can only do the check post RA.
4385 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4386 if (MRI.isSSA())
4387 return false;
4388
4389 // Cannot do the transform if MI isn't summing the operands.
4390 if (!III.IsSummingOperands)
4391 return false;
4392
4393 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4394 if (!III.ZeroIsSpecialOrig)
4395 return false;
4396
4397 // We cannot do the transform if the operand we are trying to replace
4398 // isn't the same as the operand the instruction allows.
4399 if (OpNoForForwarding != III.OpNoForForwarding)
4400 return false;
4401
4402 // Check if the instruction we are trying to transform really has
4403 // the special zero register as its operand.
4404 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4405 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4406 return false;
4407
4408 // This machine instruction is convertible if it is,
4409 // 1. summing the operands.
4410 // 2. one of the operands is special zero register.
4411 // 3. the operand we are trying to replace is allowed by the MI.
4412 return true;
4413}
4414
4415// Check if the DefMI is the add inst and set the ImmMO and RegMO
4416// accordingly.
4417bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4418 const ImmInstrInfo &III,
4419 MachineOperand *&ImmMO,
4420 MachineOperand *&RegMO) const {
4421 unsigned Opc = DefMI.getOpcode();
4422 if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4423 return false;
4424
4425 assert(DefMI.getNumOperands() >= 3 &&
4426 "Add inst must have at least three operands");
4427 RegMO = &DefMI.getOperand(1);
4428 ImmMO = &DefMI.getOperand(2);
4429
4430 // Before RA, ADDI first operand could be a frame index.
4431 if (!RegMO->isReg())
4432 return false;
4433
4434 // This DefMI is elgible for forwarding if it is:
4435 // 1. add inst
4436 // 2. one of the operands is Imm/CPI/Global.
4437 return isAnImmediateOperand(*ImmMO);
4438}
4439
4440bool PPCInstrInfo::isRegElgibleForForwarding(
4441 const MachineOperand &RegMO, const MachineInstr &DefMI,
4442 const MachineInstr &MI, bool KillDefMI,
4443 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4444 // x = addi y, imm
4445 // ...
4446 // z = lfdx 0, x -> z = lfd imm(y)
4447 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4448 // of "y" between the DEF of "x" and "z".
4449 // The query is only valid post RA.
4450 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4451 if (MRI.isSSA())
4452 return false;
4453
4454 Register Reg = RegMO.getReg();
4455
4456 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4458 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4459 It++;
4460 for (; It != E; ++It) {
4461 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4462 return false;
4463 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4464 IsFwdFeederRegKilled = true;
4465 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4466 SeenIntermediateUse = true;
4467 // Made it to DefMI without encountering a clobber.
4468 if ((&*It) == &DefMI)
4469 break;
4470 }
4471 assert((&*It) == &DefMI && "DefMI is missing");
4472
4473 // If DefMI also defines the register to be forwarded, we can only forward it
4474 // if DefMI is being erased.
4475 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4476 return KillDefMI;
4477
4478 return true;
4479}
4480
4481bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4482 const MachineInstr &DefMI,
4483 const ImmInstrInfo &III,
4484 int64_t &Imm,
4485 int64_t BaseImm) const {
4486 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4487 if (DefMI.getOpcode() == PPC::ADDItocL) {
4488 // The operand for ADDItocL is CPI, which isn't imm at compiling time,
4489 // However, we know that, it is 16-bit width, and has the alignment of 4.
4490 // Check if the instruction met the requirement.
4491 if (III.ImmMustBeMultipleOf > 4 ||
4492 III.TruncateImmTo || III.ImmWidth != 16)
4493 return false;
4494
4495 // Going from XForm to DForm loads means that the displacement needs to be
4496 // not just an immediate but also a multiple of 4, or 16 depending on the
4497 // load. A DForm load cannot be represented if it is a multiple of say 2.
4498 // XForm loads do not have this restriction.
4499 if (ImmMO.isGlobal()) {
4500 const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
4502 return false;
4503 }
4504
4505 return true;
4506 }
4507
4508 if (ImmMO.isImm()) {
4509 // It is Imm, we need to check if the Imm fit the range.
4510 // Sign-extend to 64-bits.
4511 // DefMI may be folded with another imm form instruction, the result Imm is
4512 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4513 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4514 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4515 return false;
4516 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4517 return false;
4518 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4519
4520 if (Imm % III.ImmMustBeMultipleOf)
4521 return false;
4522 if (III.TruncateImmTo)
4523 Imm &= ((1 << III.TruncateImmTo) - 1);
4524 }
4525 else
4526 return false;
4527
4528 // This ImmMO is forwarded if it meets the requriement describle
4529 // in ImmInstrInfo
4530 return true;
4531}
4532
4533bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4534 unsigned OpNoForForwarding,
4535 MachineInstr **KilledDef) const {
4536 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4537 !DefMI.getOperand(1).isImm())
4538 return false;
4539
4540 MachineFunction *MF = MI.getParent()->getParent();
4542 bool PostRA = !MRI->isSSA();
4543
4544 int64_t Immediate = DefMI.getOperand(1).getImm();
4545 // Sign-extend to 64-bits.
4546 int64_t SExtImm = SignExtend64<16>(Immediate);
4547
4548 bool ReplaceWithLI = false;
4549 bool Is64BitLI = false;
4550 int64_t NewImm = 0;
4551 bool SetCR = false;
4552 unsigned Opc = MI.getOpcode();
4553 switch (Opc) {
4554 default:
4555 return false;
4556
4557 // FIXME: Any branches conditional on such a comparison can be made
4558 // unconditional. At this time, this happens too infrequently to be worth
4559 // the implementation effort, but if that ever changes, we could convert
4560 // such a pattern here.
4561 case PPC::CMPWI:
4562 case PPC::CMPLWI:
4563 case PPC::CMPDI:
4564 case PPC::CMPLDI: {
4565 // Doing this post-RA would require dataflow analysis to reliably find uses
4566 // of the CR register set by the compare.
4567 // No need to fixup killed/dead flag since this transformation is only valid
4568 // before RA.
4569 if (PostRA)
4570 return false;
4571 // If a compare-immediate is fed by an immediate and is itself an input of
4572 // an ISEL (the most common case) into a COPY of the correct register.
4573 bool Changed = false;
4574 Register DefReg = MI.getOperand(0).getReg();
4575 int64_t Comparand = MI.getOperand(2).getImm();
4576 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4577 ? (Comparand | 0xFFFFFFFFFFFF0000)
4578 : Comparand;
4579
4580 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4581 unsigned UseOpc = CompareUseMI.getOpcode();
4582 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4583 continue;
4584 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4585 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4586 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4587 unsigned RegToCopy =
4588 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4589 if (RegToCopy == PPC::NoRegister)
4590 continue;
4591 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4592 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4593 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4594 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4595 CompareUseMI.removeOperand(3);
4596 CompareUseMI.removeOperand(2);
4597 continue;
4598 }
4599 LLVM_DEBUG(
4600 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4601 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4602 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4603 // Convert to copy and remove unneeded operands.
4604 CompareUseMI.setDesc(get(PPC::COPY));
4605 CompareUseMI.removeOperand(3);
4606 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4607 CmpIselsConverted++;
4608 Changed = true;
4609 LLVM_DEBUG(CompareUseMI.dump());
4610 }
4611 if (Changed)
4612 return true;
4613 // This may end up incremented multiple times since this function is called
4614 // during a fixed-point transformation, but it is only meant to indicate the
4615 // presence of this opportunity.
4616 MissedConvertibleImmediateInstrs++;
4617 return false;
4618 }
4619
4620 // Immediate forms - may simply be convertable to an LI.
4621 case PPC::ADDI:
4622 case PPC::ADDI8: {
4623 // Does the sum fit in a 16-bit signed field?
4624 int64_t Addend = MI.getOperand(2).getImm();
4625 if (isInt<16>(Addend + SExtImm)) {
4626 ReplaceWithLI = true;
4627 Is64BitLI = Opc == PPC::ADDI8;
4628 NewImm = Addend + SExtImm;
4629 break;
4630 }
4631 return false;
4632 }
4633 case PPC::SUBFIC:
4634 case PPC::SUBFIC8: {
4635 // Only transform this if the CARRY implicit operand is dead.
4636 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4637 return false;
4638 int64_t Minuend = MI.getOperand(2).getImm();
4639 if (isInt<16>(Minuend - SExtImm)) {
4640 ReplaceWithLI = true;
4641 Is64BitLI = Opc == PPC::SUBFIC8;
4642 NewImm = Minuend - SExtImm;
4643 break;
4644 }
4645 return false;
4646 }
4647 case PPC::RLDICL:
4648 case PPC::RLDICL_rec:
4649 case PPC::RLDICL_32:
4650 case PPC::RLDICL_32_64: {
4651 // Use APInt's rotate function.
4652 int64_t SH = MI.getOperand(2).getImm();
4653 int64_t MB = MI.getOperand(3).getImm();
4654 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4655 SExtImm, true);
4656 InVal = InVal.rotl(SH);
4657 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4658 InVal &= Mask;
4659 // Can't replace negative values with an LI as that will sign-extend
4660 // and not clear the left bits. If we're setting the CR bit, we will use
4661 // ANDI_rec which won't sign extend, so that's safe.
4662 if (isUInt<15>(InVal.getSExtValue()) ||
4663 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4664 ReplaceWithLI = true;
4665 Is64BitLI = Opc != PPC::RLDICL_32;
4666 NewImm = InVal.getSExtValue();
4667 SetCR = Opc == PPC::RLDICL_rec;
4668 break;
4669 }
4670 return false;
4671 }
4672 case PPC::RLWINM:
4673 case PPC::RLWINM8:
4674 case PPC::RLWINM_rec:
4675 case PPC::RLWINM8_rec: {
4676 int64_t SH = MI.getOperand(2).getImm();
4677 int64_t MB = MI.getOperand(3).getImm();
4678 int64_t ME = MI.getOperand(4).getImm();
4679 APInt InVal(32, SExtImm, true);
4680 InVal = InVal.rotl(SH);
4681 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4682 InVal &= Mask;
4683 // Can't replace negative values with an LI as that will sign-extend
4684 // and not clear the left bits. If we're setting the CR bit, we will use
4685 // ANDI_rec which won't sign extend, so that's safe.
4686 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4687 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4688 isUInt<16>(InVal.getSExtValue()));
4689 if (ValueFits) {
4690 ReplaceWithLI = true;
4691 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4692 NewImm = InVal.getSExtValue();
4693 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4694 break;
4695 }
4696 return false;
4697 }
4698 case PPC::ORI:
4699 case PPC::ORI8:
4700 case PPC::XORI:
4701 case PPC::XORI8: {
4702 int64_t LogicalImm = MI.getOperand(2).getImm();
4703 int64_t Result = 0;
4704 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4705 Result = LogicalImm | SExtImm;
4706 else
4707 Result = LogicalImm ^ SExtImm;
4708 if (isInt<16>(Result)) {
4709 ReplaceWithLI = true;
4710 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4711 NewImm = Result;
4712 break;
4713 }
4714 return false;
4715 }
4716 }
4717
4718 if (ReplaceWithLI) {
4719 // We need to be careful with CR-setting instructions we're replacing.
4720 if (SetCR) {
4721 // We don't know anything about uses when we're out of SSA, so only
4722 // replace if the new immediate will be reproduced.
4723 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4724 if (PostRA && ImmChanged)
4725 return false;
4726