LLVM 19.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
527 MachineInstr &Root, unsigned &Pattern,
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (Pattern) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
741 switch (Pattern) {
748 default:
750 }
751}
752
755 bool DoRegPressureReduce) const {
756 // Using the machine combiner in this way is potentially expensive, so
757 // restrict to when aggressive optimizations are desired.
759 return false;
760
761 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
762 return true;
763
765 DoRegPressureReduce);
766}
767
769 MachineInstr &Root, unsigned Pattern,
772 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
773 switch (Pattern) {
778 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
779 break;
780 default:
781 // Reassociate default patterns.
783 DelInstrs, InstrIdxForVirtReg);
784 break;
785 }
786}
787
788void PPCInstrInfo::reassociateFMA(
789 MachineInstr &Root, unsigned Pattern,
792 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
793 MachineFunction *MF = Root.getMF();
796 MachineOperand &OpC = Root.getOperand(0);
797 Register RegC = OpC.getReg();
798 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
799 MRI.constrainRegClass(RegC, RC);
800
801 unsigned FmaOp = Root.getOpcode();
802 int16_t Idx = getFMAOpIdxInfo(FmaOp);
803 assert(Idx >= 0 && "Root must be a FMA instruction");
804
805 bool IsILPReassociate =
808
811
812 MachineInstr *Prev = nullptr;
813 MachineInstr *Leaf = nullptr;
814 switch (Pattern) {
815 default:
816 llvm_unreachable("not recognized pattern!");
819 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
820 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
821 break;
823 Register MULReg =
824 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
825 Leaf = MRI.getVRegDef(MULReg);
826 break;
827 }
829 Register MULReg = TRI->lookThruCopyLike(
830 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
831 Leaf = MRI.getVRegDef(MULReg);
832 break;
833 }
834 }
835
836 uint32_t IntersectedFlags = 0;
837 if (IsILPReassociate)
838 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
839 else
840 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
841
842 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
843 bool &KillFlag) {
844 Reg = Operand.getReg();
845 MRI.constrainRegClass(Reg, RC);
846 KillFlag = Operand.isKill();
847 };
848
849 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
850 Register &MulOp2, Register &AddOp,
851 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
852 bool &AddOpKillFlag) {
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
854 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
855 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
856 };
857
858 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
859 RegA21, RegB;
860 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
861 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
862 KillA11 = false, KillA21 = false, KillB = false;
863
864 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
865
866 if (IsILPReassociate)
867 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
868
870 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
871 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
874 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
875 } else {
876 // Get FSUB instruction info.
877 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
878 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
879 }
880
881 // Create new virtual registers for the new results instead of
882 // recycling legacy ones because the MachineCombiner's computation of the
883 // critical path requires a new register definition rather than an existing
884 // one.
885 // For register pressure reassociation, we only need create one virtual
886 // register for the new fma.
887 Register NewVRA = MRI.createVirtualRegister(RC);
888 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
889
890 Register NewVRB = 0;
891 if (IsILPReassociate) {
892 NewVRB = MRI.createVirtualRegister(RC);
893 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
894 }
895
896 Register NewVRD = 0;
898 NewVRD = MRI.createVirtualRegister(RC);
899 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
900 }
901
902 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
903 Register RegMul1, bool KillRegMul1,
904 Register RegMul2, bool KillRegMul2) {
905 MI->getOperand(AddOpIdx).setReg(RegAdd);
906 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
907 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
908 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
909 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
910 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
911 };
912
913 MachineInstrBuilder NewARegPressure, NewCRegPressure;
914 switch (Pattern) {
915 default:
916 llvm_unreachable("not recognized pattern!");
918 // Create new instructions for insertion.
919 MachineInstrBuilder MINewB =
920 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
921 .addReg(RegX, getKillRegState(KillX))
922 .addReg(RegM21, getKillRegState(KillM21))
923 .addReg(RegM22, getKillRegState(KillM22));
924 MachineInstrBuilder MINewA =
925 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
926 .addReg(RegY, getKillRegState(KillY))
927 .addReg(RegM31, getKillRegState(KillM31))
928 .addReg(RegM32, getKillRegState(KillM32));
929 // If AddOpIdx is not 1, adjust the order.
930 if (AddOpIdx != 1) {
931 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
932 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
933 }
934
935 MachineInstrBuilder MINewC =
936 BuildMI(*MF, Root.getDebugLoc(),
938 .addReg(NewVRB, getKillRegState(true))
939 .addReg(NewVRA, getKillRegState(true));
940
941 // Update flags for newly created instructions.
942 setSpecialOperandAttr(*MINewA, IntersectedFlags);
943 setSpecialOperandAttr(*MINewB, IntersectedFlags);
944 setSpecialOperandAttr(*MINewC, IntersectedFlags);
945
946 // Record new instructions for insertion.
947 InsInstrs.push_back(MINewA);
948 InsInstrs.push_back(MINewB);
949 InsInstrs.push_back(MINewC);
950 break;
951 }
953 assert(NewVRD && "new FMA register not created!");
954 // Create new instructions for insertion.
955 MachineInstrBuilder MINewA =
956 BuildMI(*MF, Leaf->getDebugLoc(),
958 .addReg(RegM11, getKillRegState(KillM11))
959 .addReg(RegM12, getKillRegState(KillM12));
960 MachineInstrBuilder MINewB =
961 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
962 .addReg(RegX, getKillRegState(KillX))
963 .addReg(RegM21, getKillRegState(KillM21))
964 .addReg(RegM22, getKillRegState(KillM22));
965 MachineInstrBuilder MINewD =
966 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
967 .addReg(NewVRA, getKillRegState(true))
968 .addReg(RegM31, getKillRegState(KillM31))
969 .addReg(RegM32, getKillRegState(KillM32));
970 // If AddOpIdx is not 1, adjust the order.
971 if (AddOpIdx != 1) {
972 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
973 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
974 KillM32);
975 }
976
977 MachineInstrBuilder MINewC =
978 BuildMI(*MF, Root.getDebugLoc(),
980 .addReg(NewVRB, getKillRegState(true))
981 .addReg(NewVRD, getKillRegState(true));
982
983 // Update flags for newly created instructions.
984 setSpecialOperandAttr(*MINewA, IntersectedFlags);
985 setSpecialOperandAttr(*MINewB, IntersectedFlags);
986 setSpecialOperandAttr(*MINewD, IntersectedFlags);
987 setSpecialOperandAttr(*MINewC, IntersectedFlags);
988
989 // Record new instructions for insertion.
990 InsInstrs.push_back(MINewA);
991 InsInstrs.push_back(MINewB);
992 InsInstrs.push_back(MINewD);
993 InsInstrs.push_back(MINewC);
994 break;
995 }
998 Register VarReg;
999 bool KillVarReg = false;
1001 VarReg = RegM31;
1002 KillVarReg = KillM31;
1003 } else {
1004 VarReg = RegM32;
1005 KillVarReg = KillM32;
1006 }
1007 // We don't want to get negative const from memory pool too early, as the
1008 // created entry will not be deleted even if it has no users. Since all
1009 // operand of Leaf and Root are virtual register, we use zero register
1010 // here as a placeholder. When the InsInstrs is selected in
1011 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1012 // with a virtual register which is a load from constant pool.
1013 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1014 .addReg(RegB, getKillRegState(RegB))
1015 .addReg(RegY, getKillRegState(KillY))
1016 .addReg(PPC::ZERO8);
1017 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1018 .addReg(NewVRA, getKillRegState(true))
1019 .addReg(RegX, getKillRegState(KillX))
1020 .addReg(VarReg, getKillRegState(KillVarReg));
1021 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1022 // both at index 1, no need to adjust.
1023 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1024 // the operand index here.
1025 break;
1026 }
1027 }
1028
1029 if (!IsILPReassociate) {
1030 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1031 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1032
1033 InsInstrs.push_back(NewARegPressure);
1034 InsInstrs.push_back(NewCRegPressure);
1035 }
1036
1037 assert(!InsInstrs.empty() &&
1038 "Insertion instructions set should not be empty!");
1039
1040 // Record old instructions for deletion.
1041 DelInstrs.push_back(Leaf);
1042 if (IsILPReassociate)
1043 DelInstrs.push_back(Prev);
1044 DelInstrs.push_back(&Root);
1045}
1046
1047// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1049 Register &SrcReg, Register &DstReg,
1050 unsigned &SubIdx) const {
1051 switch (MI.getOpcode()) {
1052 default: return false;
1053 case PPC::EXTSW:
1054 case PPC::EXTSW_32:
1055 case PPC::EXTSW_32_64:
1056 SrcReg = MI.getOperand(1).getReg();
1057 DstReg = MI.getOperand(0).getReg();
1058 SubIdx = PPC::sub_32;
1059 return true;
1060 }
1061}
1062
1064 int &FrameIndex) const {
1065 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1066 // Check for the operands added by addFrameReference (the immediate is the
1067 // offset which defaults to 0).
1068 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1069 MI.getOperand(2).isFI()) {
1070 FrameIndex = MI.getOperand(2).getIndex();
1071 return MI.getOperand(0).getReg();
1072 }
1073 }
1074 return 0;
1075}
1076
1077// For opcodes with the ReMaterializable flag set, this function is called to
1078// verify the instruction is really rematable.
1080 const MachineInstr &MI) const {
1081 switch (MI.getOpcode()) {
1082 default:
1083 // Let base implementaion decide.
1084 break;
1085 case PPC::LI:
1086 case PPC::LI8:
1087 case PPC::PLI:
1088 case PPC::PLI8:
1089 case PPC::LIS:
1090 case PPC::LIS8:
1091 case PPC::ADDIStocHA:
1092 case PPC::ADDIStocHA8:
1093 case PPC::ADDItocL:
1094 case PPC::ADDItocL8:
1095 case PPC::LOAD_STACK_GUARD:
1096 case PPC::PPCLdFixedAddr:
1097 case PPC::XXLXORz:
1098 case PPC::XXLXORspz:
1099 case PPC::XXLXORdpz:
1100 case PPC::XXLEQVOnes:
1101 case PPC::XXSPLTI32DX:
1102 case PPC::XXSPLTIW:
1103 case PPC::XXSPLTIDP:
1104 case PPC::V_SET0B:
1105 case PPC::V_SET0H:
1106 case PPC::V_SET0:
1107 case PPC::V_SETALLONESB:
1108 case PPC::V_SETALLONESH:
1109 case PPC::V_SETALLONES:
1110 case PPC::CRSET:
1111 case PPC::CRUNSET:
1112 case PPC::XXSETACCZ:
1113 case PPC::XXSETACCZW:
1114 return true;
1115 }
1117}
1118
1120 int &FrameIndex) const {
1121 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1122 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1123 MI.getOperand(2).isFI()) {
1124 FrameIndex = MI.getOperand(2).getIndex();
1125 return MI.getOperand(0).getReg();
1126 }
1127 }
1128 return 0;
1129}
1130
1132 unsigned OpIdx1,
1133 unsigned OpIdx2) const {
1134 MachineFunction &MF = *MI.getParent()->getParent();
1135
1136 // Normal instructions can be commuted the obvious way.
1137 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1138 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1139 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1140 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1141 // changing the relative order of the mask operands might change what happens
1142 // to the high-bits of the mask (and, thus, the result).
1143
1144 // Cannot commute if it has a non-zero rotate count.
1145 if (MI.getOperand(3).getImm() != 0)
1146 return nullptr;
1147
1148 // If we have a zero rotate count, we have:
1149 // M = mask(MB,ME)
1150 // Op0 = (Op1 & ~M) | (Op2 & M)
1151 // Change this to:
1152 // M = mask((ME+1)&31, (MB-1)&31)
1153 // Op0 = (Op2 & ~M) | (Op1 & M)
1154
1155 // Swap op1/op2
1156 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1157 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1158 Register Reg0 = MI.getOperand(0).getReg();
1159 Register Reg1 = MI.getOperand(1).getReg();
1160 Register Reg2 = MI.getOperand(2).getReg();
1161 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1162 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1163 bool Reg1IsKill = MI.getOperand(1).isKill();
1164 bool Reg2IsKill = MI.getOperand(2).isKill();
1165 bool ChangeReg0 = false;
1166 // If machine instrs are no longer in two-address forms, update
1167 // destination register as well.
1168 if (Reg0 == Reg1) {
1169 // Must be two address instruction (i.e. op1 is tied to op0).
1170 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1171 "Expecting a two-address instruction!");
1172 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1173 Reg2IsKill = false;
1174 ChangeReg0 = true;
1175 }
1176
1177 // Masks.
1178 unsigned MB = MI.getOperand(4).getImm();
1179 unsigned ME = MI.getOperand(5).getImm();
1180
1181 // We can't commute a trivial mask (there is no way to represent an all-zero
1182 // mask).
1183 if (MB == 0 && ME == 31)
1184 return nullptr;
1185
1186 if (NewMI) {
1187 // Create a new instruction.
1188 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1189 bool Reg0IsDead = MI.getOperand(0).isDead();
1190 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1191 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1192 .addReg(Reg2, getKillRegState(Reg2IsKill))
1193 .addReg(Reg1, getKillRegState(Reg1IsKill))
1194 .addImm((ME + 1) & 31)
1195 .addImm((MB - 1) & 31);
1196 }
1197
1198 if (ChangeReg0) {
1199 MI.getOperand(0).setReg(Reg2);
1200 MI.getOperand(0).setSubReg(SubReg2);
1201 }
1202 MI.getOperand(2).setReg(Reg1);
1203 MI.getOperand(1).setReg(Reg2);
1204 MI.getOperand(2).setSubReg(SubReg1);
1205 MI.getOperand(1).setSubReg(SubReg2);
1206 MI.getOperand(2).setIsKill(Reg1IsKill);
1207 MI.getOperand(1).setIsKill(Reg2IsKill);
1208
1209 // Swap the mask around.
1210 MI.getOperand(4).setImm((ME + 1) & 31);
1211 MI.getOperand(5).setImm((MB - 1) & 31);
1212 return &MI;
1213}
1214
1216 unsigned &SrcOpIdx1,
1217 unsigned &SrcOpIdx2) const {
1218 // For VSX A-Type FMA instructions, it is the first two operands that can be
1219 // commuted, however, because the non-encoded tied input operand is listed
1220 // first, the operands to swap are actually the second and third.
1221
1222 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1223 if (AltOpc == -1)
1224 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1225
1226 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1227 // and SrcOpIdx2.
1228 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1229}
1230
1233 // This function is used for scheduling, and the nop wanted here is the type
1234 // that terminates dispatch groups on the POWER cores.
1235 unsigned Directive = Subtarget.getCPUDirective();
1236 unsigned Opcode;
1237 switch (Directive) {
1238 default: Opcode = PPC::NOP; break;
1239 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1240 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1241 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1242 // FIXME: Update when POWER9 scheduling model is ready.
1243 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1244 }
1245
1246 DebugLoc DL;
1247 BuildMI(MBB, MI, DL, get(Opcode));
1248}
1249
1250/// Return the noop instruction to use for a noop.
1252 MCInst Nop;
1253 Nop.setOpcode(PPC::NOP);
1254 return Nop;
1255}
1256
1257// Branch analysis.
1258// Note: If the condition register is set to CTR or CTR8 then this is a
1259// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1262 MachineBasicBlock *&FBB,
1264 bool AllowModify) const {
1265 bool isPPC64 = Subtarget.isPPC64();
1266
1267 // If the block has no terminators, it just falls into the block after it.
1269 if (I == MBB.end())
1270 return false;
1271
1272 if (!isUnpredicatedTerminator(*I))
1273 return false;
1274
1275 if (AllowModify) {
1276 // If the BB ends with an unconditional branch to the fallthrough BB,
1277 // we eliminate the branch instruction.
1278 if (I->getOpcode() == PPC::B &&
1279 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1280 I->eraseFromParent();
1281
1282 // We update iterator after deleting the last branch.
1284 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1285 return false;
1286 }
1287 }
1288
1289 // Get the last instruction in the block.
1290 MachineInstr &LastInst = *I;
1291
1292 // If there is only one terminator instruction, process it.
1293 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1294 if (LastInst.getOpcode() == PPC::B) {
1295 if (!LastInst.getOperand(0).isMBB())
1296 return true;
1297 TBB = LastInst.getOperand(0).getMBB();
1298 return false;
1299 } else if (LastInst.getOpcode() == PPC::BCC) {
1300 if (!LastInst.getOperand(2).isMBB())
1301 return true;
1302 // Block ends with fall-through condbranch.
1303 TBB = LastInst.getOperand(2).getMBB();
1304 Cond.push_back(LastInst.getOperand(0));
1305 Cond.push_back(LastInst.getOperand(1));
1306 return false;
1307 } else if (LastInst.getOpcode() == PPC::BC) {
1308 if (!LastInst.getOperand(1).isMBB())
1309 return true;
1310 // Block ends with fall-through condbranch.
1311 TBB = LastInst.getOperand(1).getMBB();
1313 Cond.push_back(LastInst.getOperand(0));
1314 return false;
1315 } else if (LastInst.getOpcode() == PPC::BCn) {
1316 if (!LastInst.getOperand(1).isMBB())
1317 return true;
1318 // Block ends with fall-through condbranch.
1319 TBB = LastInst.getOperand(1).getMBB();
1321 Cond.push_back(LastInst.getOperand(0));
1322 return false;
1323 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1324 LastInst.getOpcode() == PPC::BDNZ) {
1325 if (!LastInst.getOperand(0).isMBB())
1326 return true;
1328 return true;
1329 TBB = LastInst.getOperand(0).getMBB();
1330 Cond.push_back(MachineOperand::CreateImm(1));
1331 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1332 true));
1333 return false;
1334 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1335 LastInst.getOpcode() == PPC::BDZ) {
1336 if (!LastInst.getOperand(0).isMBB())
1337 return true;
1339 return true;
1340 TBB = LastInst.getOperand(0).getMBB();
1341 Cond.push_back(MachineOperand::CreateImm(0));
1342 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1343 true));
1344 return false;
1345 }
1346
1347 // Otherwise, don't know what this is.
1348 return true;
1349 }
1350
1351 // Get the instruction before it if it's a terminator.
1352 MachineInstr &SecondLastInst = *I;
1353
1354 // If there are three terminators, we don't know what sort of block this is.
1355 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1356 return true;
1357
1358 // If the block ends with PPC::B and PPC:BCC, handle it.
1359 if (SecondLastInst.getOpcode() == PPC::BCC &&
1360 LastInst.getOpcode() == PPC::B) {
1361 if (!SecondLastInst.getOperand(2).isMBB() ||
1362 !LastInst.getOperand(0).isMBB())
1363 return true;
1364 TBB = SecondLastInst.getOperand(2).getMBB();
1365 Cond.push_back(SecondLastInst.getOperand(0));
1366 Cond.push_back(SecondLastInst.getOperand(1));
1367 FBB = LastInst.getOperand(0).getMBB();
1368 return false;
1369 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1370 LastInst.getOpcode() == PPC::B) {
1371 if (!SecondLastInst.getOperand(1).isMBB() ||
1372 !LastInst.getOperand(0).isMBB())
1373 return true;
1374 TBB = SecondLastInst.getOperand(1).getMBB();
1376 Cond.push_back(SecondLastInst.getOperand(0));
1377 FBB = LastInst.getOperand(0).getMBB();
1378 return false;
1379 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1380 LastInst.getOpcode() == PPC::B) {
1381 if (!SecondLastInst.getOperand(1).isMBB() ||
1382 !LastInst.getOperand(0).isMBB())
1383 return true;
1384 TBB = SecondLastInst.getOperand(1).getMBB();
1386 Cond.push_back(SecondLastInst.getOperand(0));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(1));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1404 SecondLastInst.getOpcode() == PPC::BDZ) &&
1405 LastInst.getOpcode() == PPC::B) {
1406 if (!SecondLastInst.getOperand(0).isMBB() ||
1407 !LastInst.getOperand(0).isMBB())
1408 return true;
1410 return true;
1411 TBB = SecondLastInst.getOperand(0).getMBB();
1412 Cond.push_back(MachineOperand::CreateImm(0));
1413 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1414 true));
1415 FBB = LastInst.getOperand(0).getMBB();
1416 return false;
1417 }
1418
1419 // If the block ends with two PPC:Bs, handle it. The second one is not
1420 // executed, so remove it.
1421 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1422 if (!SecondLastInst.getOperand(0).isMBB())
1423 return true;
1424 TBB = SecondLastInst.getOperand(0).getMBB();
1425 I = LastInst;
1426 if (AllowModify)
1427 I->eraseFromParent();
1428 return false;
1429 }
1430
1431 // Otherwise, can't handle this.
1432 return true;
1433}
1434
1436 int *BytesRemoved) const {
1437 assert(!BytesRemoved && "code size not handled");
1438
1440 if (I == MBB.end())
1441 return 0;
1442
1443 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1444 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1445 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1446 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1447 return 0;
1448
1449 // Remove the branch.
1450 I->eraseFromParent();
1451
1452 I = MBB.end();
1453
1454 if (I == MBB.begin()) return 1;
1455 --I;
1456 if (I->getOpcode() != PPC::BCC &&
1457 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1458 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1459 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1460 return 1;
1461
1462 // Remove the branch.
1463 I->eraseFromParent();
1464 return 2;
1465}
1466
1469 MachineBasicBlock *FBB,
1471 const DebugLoc &DL,
1472 int *BytesAdded) const {
1473 // Shouldn't be a fall through.
1474 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1475 assert((Cond.size() == 2 || Cond.size() == 0) &&
1476 "PPC branch conditions have two components!");
1477 assert(!BytesAdded && "code size not handled");
1478
1479 bool isPPC64 = Subtarget.isPPC64();
1480
1481 // One-way branch.
1482 if (!FBB) {
1483 if (Cond.empty()) // Unconditional branch
1484 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1485 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1486 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1487 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1488 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1490 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1491 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1492 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1493 else // Conditional branch
1494 BuildMI(&MBB, DL, get(PPC::BCC))
1495 .addImm(Cond[0].getImm())
1496 .add(Cond[1])
1497 .addMBB(TBB);
1498 return 1;
1499 }
1500
1501 // Two-way Conditional Branch.
1502 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1503 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1504 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1505 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1507 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1508 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1509 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1510 else
1511 BuildMI(&MBB, DL, get(PPC::BCC))
1512 .addImm(Cond[0].getImm())
1513 .add(Cond[1])
1514 .addMBB(TBB);
1515 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1516 return 2;
1517}
1518
1519// Select analysis.
1522 Register DstReg, Register TrueReg,
1523 Register FalseReg, int &CondCycles,
1524 int &TrueCycles, int &FalseCycles) const {
1525 if (!Subtarget.hasISEL())
1526 return false;
1527
1528 if (Cond.size() != 2)
1529 return false;
1530
1531 // If this is really a bdnz-like condition, then it cannot be turned into a
1532 // select.
1533 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1534 return false;
1535
1536 // If the conditional branch uses a physical register, then it cannot be
1537 // turned into a select.
1538 if (Cond[1].getReg().isPhysical())
1539 return false;
1540
1541 // Check register classes.
1543 const TargetRegisterClass *RC =
1544 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1545 if (!RC)
1546 return false;
1547
1548 // isel is for regular integer GPRs only.
1549 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1550 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1552 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1553 return false;
1554
1555 // FIXME: These numbers are for the A2, how well they work for other cores is
1556 // an open question. On the A2, the isel instruction has a 2-cycle latency
1557 // but single-cycle throughput. These numbers are used in combination with
1558 // the MispredictPenalty setting from the active SchedMachineModel.
1559 CondCycles = 1;
1560 TrueCycles = 1;
1561 FalseCycles = 1;
1562
1563 return true;
1564}
1565
1568 const DebugLoc &dl, Register DestReg,
1570 Register FalseReg) const {
1571 assert(Cond.size() == 2 &&
1572 "PPC branch conditions have two components!");
1573
1574 // Get the register classes.
1576 const TargetRegisterClass *RC =
1577 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1578 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1579
1580 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1581 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1582 assert((Is64Bit ||
1583 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1584 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1585 "isel is for regular integer GPRs only");
1586
1587 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1588 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1589
1590 unsigned SubIdx = 0;
1591 bool SwapOps = false;
1592 switch (SelectPred) {
1593 case PPC::PRED_EQ:
1594 case PPC::PRED_EQ_MINUS:
1595 case PPC::PRED_EQ_PLUS:
1596 SubIdx = PPC::sub_eq; SwapOps = false; break;
1597 case PPC::PRED_NE:
1598 case PPC::PRED_NE_MINUS:
1599 case PPC::PRED_NE_PLUS:
1600 SubIdx = PPC::sub_eq; SwapOps = true; break;
1601 case PPC::PRED_LT:
1602 case PPC::PRED_LT_MINUS:
1603 case PPC::PRED_LT_PLUS:
1604 SubIdx = PPC::sub_lt; SwapOps = false; break;
1605 case PPC::PRED_GE:
1606 case PPC::PRED_GE_MINUS:
1607 case PPC::PRED_GE_PLUS:
1608 SubIdx = PPC::sub_lt; SwapOps = true; break;
1609 case PPC::PRED_GT:
1610 case PPC::PRED_GT_MINUS:
1611 case PPC::PRED_GT_PLUS:
1612 SubIdx = PPC::sub_gt; SwapOps = false; break;
1613 case PPC::PRED_LE:
1614 case PPC::PRED_LE_MINUS:
1615 case PPC::PRED_LE_PLUS:
1616 SubIdx = PPC::sub_gt; SwapOps = true; break;
1617 case PPC::PRED_UN:
1618 case PPC::PRED_UN_MINUS:
1619 case PPC::PRED_UN_PLUS:
1620 SubIdx = PPC::sub_un; SwapOps = false; break;
1621 case PPC::PRED_NU:
1622 case PPC::PRED_NU_MINUS:
1623 case PPC::PRED_NU_PLUS:
1624 SubIdx = PPC::sub_un; SwapOps = true; break;
1625 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1626 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1627 }
1628
1629 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1630 SecondReg = SwapOps ? TrueReg : FalseReg;
1631
1632 // The first input register of isel cannot be r0. If it is a member
1633 // of a register class that can be r0, then copy it first (the
1634 // register allocator should eliminate the copy).
1635 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1637 const TargetRegisterClass *FirstRC =
1638 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1639 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1640 Register OldFirstReg = FirstReg;
1641 FirstReg = MRI.createVirtualRegister(FirstRC);
1642 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1643 .addReg(OldFirstReg);
1644 }
1645
1646 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1647 .addReg(FirstReg).addReg(SecondReg)
1648 .addReg(Cond[1].getReg(), 0, SubIdx);
1649}
1650
1651static unsigned getCRBitValue(unsigned CRBit) {
1652 unsigned Ret = 4;
1653 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1654 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1655 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1656 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1657 Ret = 3;
1658 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1659 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1660 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1661 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1662 Ret = 2;
1663 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1664 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1665 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1666 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1667 Ret = 1;
1668 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1669 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1670 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1671 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1672 Ret = 0;
1673
1674 assert(Ret != 4 && "Invalid CR bit register");
1675 return Ret;
1676}
1677
1680 const DebugLoc &DL, MCRegister DestReg,
1681 MCRegister SrcReg, bool KillSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 }
1763
1764 unsigned Opc;
1765 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1766 Opc = PPC::OR;
1767 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1768 Opc = PPC::OR8;
1769 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1770 Opc = PPC::FMR;
1771 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1772 Opc = PPC::MCRF;
1773 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1774 Opc = PPC::VOR;
1775 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1776 // There are two different ways this can be done:
1777 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1778 // issue in VSU pipeline 0.
1779 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1780 // can go to either pipeline.
1781 // We'll always use xxlor here, because in practically all cases where
1782 // copies are generated, they are close enough to some use that the
1783 // lower-latency form is preferable.
1784 Opc = PPC::XXLOR;
1785 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1786 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1787 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1788 else if (Subtarget.pairedVectorMemops() &&
1789 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1790 if (SrcReg > PPC::VSRp15)
1791 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1792 else
1793 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1794 if (DestReg > PPC::VSRp15)
1795 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1796 else
1797 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1798 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1799 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1800 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1801 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1802 return;
1803 }
1804 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1805 Opc = PPC::CROR;
1806 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1807 Opc = PPC::EVOR;
1808 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1809 PPC::UACCRCRegClass.contains(DestReg)) &&
1810 (PPC::ACCRCRegClass.contains(SrcReg) ||
1811 PPC::UACCRCRegClass.contains(SrcReg))) {
1812 // If primed, de-prime the source register, copy the individual registers
1813 // and prime the destination if needed. The vector subregisters are
1814 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1815 // source is primed, we need to re-prime it after the copy as well.
1816 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1817 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1818 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1819 MCRegister VSLSrcReg =
1820 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1821 MCRegister VSLDestReg =
1822 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1823 if (SrcPrimed)
1824 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1825 for (unsigned Idx = 0; Idx < 4; Idx++)
1826 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1827 .addReg(VSLSrcReg + Idx)
1828 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1829 if (DestPrimed)
1830 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1831 if (SrcPrimed && !KillSrc)
1832 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1833 return;
1834 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1835 PPC::G8pRCRegClass.contains(SrcReg)) {
1836 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1837 unsigned DestRegIdx = DestReg - PPC::G8p0;
1838 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1839 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1840 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1841 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1842 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1843 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1844 .addReg(SrcRegSub0)
1845 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1846 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1847 .addReg(SrcRegSub1)
1848 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1849 return;
1850 } else
1851 llvm_unreachable("Impossible reg-to-reg copy");
1852
1853 const MCInstrDesc &MCID = get(Opc);
1854 if (MCID.getNumOperands() == 3)
1855 BuildMI(MBB, I, DL, MCID, DestReg)
1856 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1857 else
1858 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1859}
1860
1861unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1862 int OpcodeIndex = 0;
1863
1864 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1865 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1867 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1868 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1878 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1880 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1882 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1884 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1886 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1888 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1890 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1891 assert(Subtarget.pairedVectorMemops() &&
1892 "Register unexpected when paired memops are disabled.");
1894 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1895 assert(Subtarget.pairedVectorMemops() &&
1896 "Register unexpected when paired memops are disabled.");
1898 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1899 assert(Subtarget.pairedVectorMemops() &&
1900 "Register unexpected when paired memops are disabled.");
1902 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1903 assert(Subtarget.pairedVectorMemops() &&
1904 "Register unexpected when paired memops are disabled.");
1906 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1908 } else {
1909 llvm_unreachable("Unknown regclass!");
1910 }
1911 return OpcodeIndex;
1912}
1913
1914unsigned
1916 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1917 return OpcodesForSpill[getSpillIndex(RC)];
1918}
1919
1920unsigned
1922 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1923 return OpcodesForSpill[getSpillIndex(RC)];
1924}
1925
1926void PPCInstrInfo::StoreRegToStackSlot(
1927 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1928 const TargetRegisterClass *RC,
1929 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1930 unsigned Opcode = getStoreOpcodeForSpill(RC);
1931 DebugLoc DL;
1932
1933 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1934 FuncInfo->setHasSpills();
1935
1937 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1938 FrameIdx));
1939
1940 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1941 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1942 FuncInfo->setSpillsCR();
1943
1944 if (isXFormMemOp(Opcode))
1945 FuncInfo->setHasNonRISpills();
1946}
1947
1950 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1951 const TargetRegisterInfo *TRI) const {
1952 MachineFunction &MF = *MBB.getParent();
1954
1955 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1956
1957 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1958 MBB.insert(MI, NewMIs[i]);
1959
1960 const MachineFrameInfo &MFI = MF.getFrameInfo();
1964 MFI.getObjectAlign(FrameIdx));
1965 NewMIs.back()->addMemOperand(MF, MMO);
1966}
1967
1970 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1971 const TargetRegisterInfo *TRI, Register VReg) const {
1972 // We need to avoid a situation in which the value from a VRRC register is
1973 // spilled using an Altivec instruction and reloaded into a VSRC register
1974 // using a VSX instruction. The issue with this is that the VSX
1975 // load/store instructions swap the doublewords in the vector and the Altivec
1976 // ones don't. The register classes on the spill/reload may be different if
1977 // the register is defined using an Altivec instruction and is then used by a
1978 // VSX instruction.
1979 RC = updatedRC(RC);
1980 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1981}
1982
1983void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1984 unsigned DestReg, int FrameIdx,
1985 const TargetRegisterClass *RC,
1987 const {
1988 unsigned Opcode = getLoadOpcodeForSpill(RC);
1989 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1990 FrameIdx));
1991}
1992
1995 int FrameIdx, const TargetRegisterClass *RC,
1996 const TargetRegisterInfo *TRI) const {
1997 MachineFunction &MF = *MBB.getParent();
1999 DebugLoc DL;
2000 if (MI != MBB.end()) DL = MI->getDebugLoc();
2001
2002 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2003
2004 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
2005 MBB.insert(MI, NewMIs[i]);
2006
2007 const MachineFrameInfo &MFI = MF.getFrameInfo();
2011 MFI.getObjectAlign(FrameIdx));
2012 NewMIs.back()->addMemOperand(MF, MMO);
2013}
2014
2017 Register DestReg, int FrameIdx,
2018 const TargetRegisterClass *RC,
2019 const TargetRegisterInfo *TRI,
2020 Register VReg) const {
2021 // We need to avoid a situation in which the value from a VRRC register is
2022 // spilled using an Altivec instruction and reloaded into a VSRC register
2023 // using a VSX instruction. The issue with this is that the VSX
2024 // load/store instructions swap the doublewords in the vector and the Altivec
2025 // ones don't. The register classes on the spill/reload may be different if
2026 // the register is defined using an Altivec instruction and is then used by a
2027 // VSX instruction.
2028 RC = updatedRC(RC);
2029
2030 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2031}
2032
2035 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2036 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2037 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2038 else
2039 // Leave the CR# the same, but invert the condition.
2040 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2041 return false;
2042}
2043
2044// For some instructions, it is legal to fold ZERO into the RA register field.
2045// This function performs that fold by replacing the operand with PPC::ZERO,
2046// it does not consider whether the load immediate zero is no longer in use.
2048 Register Reg) const {
2049 // A zero immediate should always be loaded with a single li.
2050 unsigned DefOpc = DefMI.getOpcode();
2051 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2052 return false;
2053 if (!DefMI.getOperand(1).isImm())
2054 return false;
2055 if (DefMI.getOperand(1).getImm() != 0)
2056 return false;
2057
2058 // Note that we cannot here invert the arguments of an isel in order to fold
2059 // a ZERO into what is presented as the second argument. All we have here
2060 // is the condition bit, and that might come from a CR-logical bit operation.
2061
2062 const MCInstrDesc &UseMCID = UseMI.getDesc();
2063
2064 // Only fold into real machine instructions.
2065 if (UseMCID.isPseudo())
2066 return false;
2067
2068 // We need to find which of the User's operands is to be folded, that will be
2069 // the operand that matches the given register ID.
2070 unsigned UseIdx;
2071 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2072 if (UseMI.getOperand(UseIdx).isReg() &&
2073 UseMI.getOperand(UseIdx).getReg() == Reg)
2074 break;
2075
2076 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2077 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2078
2079 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2080
2081 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2082 // register (which might also be specified as a pointer class kind).
2083 if (UseInfo->isLookupPtrRegClass()) {
2084 if (UseInfo->RegClass /* Kind */ != 1)
2085 return false;
2086 } else {
2087 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2088 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2089 return false;
2090 }
2091
2092 // Make sure this is not tied to an output register (or otherwise
2093 // constrained). This is true for ST?UX registers, for example, which
2094 // are tied to their output registers.
2095 if (UseInfo->Constraints != 0)
2096 return false;
2097
2098 MCRegister ZeroReg;
2099 if (UseInfo->isLookupPtrRegClass()) {
2100 bool isPPC64 = Subtarget.isPPC64();
2101 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2102 } else {
2103 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2104 PPC::ZERO8 : PPC::ZERO;
2105 }
2106
2107 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2108 LLVM_DEBUG(UseMI.dump());
2109 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2110 LLVM_DEBUG(dbgs() << "Into: ");
2111 LLVM_DEBUG(UseMI.dump());
2112 return true;
2113}
2114
2115// Folds zero into instructions which have a load immediate zero as an operand
2116// but also recognize zero as immediate zero. If the definition of the load
2117// has no more users it is deleted.
2119 Register Reg, MachineRegisterInfo *MRI) const {
2120 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2121 if (MRI->use_nodbg_empty(Reg))
2122 DefMI.eraseFromParent();
2123 return Changed;
2124}
2125
2127 for (MachineInstr &MI : MBB)
2128 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2129 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2130 return true;
2131 return false;
2132}
2133
2134// We should make sure that, if we're going to predicate both sides of a
2135// condition (a diamond), that both sides don't define the counter register. We
2136// can predicate counter-decrement-based branches, but while that predicates
2137// the branching, it does not predicate the counter decrement. If we tried to
2138// merge the triangle into one predicated block, we'd decrement the counter
2139// twice.
2141 unsigned NumT, unsigned ExtraT,
2142 MachineBasicBlock &FMBB,
2143 unsigned NumF, unsigned ExtraF,
2144 BranchProbability Probability) const {
2145 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2146}
2147
2148
2150 // The predicated branches are identified by their type, not really by the
2151 // explicit presence of a predicate. Furthermore, some of them can be
2152 // predicated more than once. Because if conversion won't try to predicate
2153 // any instruction which already claims to be predicated (by returning true
2154 // here), always return false. In doing so, we let isPredicable() be the
2155 // final word on whether not the instruction can be (further) predicated.
2156
2157 return false;
2158}
2159
2161 const MachineBasicBlock *MBB,
2162 const MachineFunction &MF) const {
2163 switch (MI.getOpcode()) {
2164 default:
2165 break;
2166 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2167 // across them, since some FP operations may change content of FPSCR.
2168 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2169 case PPC::MFFS:
2170 case PPC::MTFSF:
2171 case PPC::FENCE:
2172 return true;
2173 }
2175}
2176
2178 ArrayRef<MachineOperand> Pred) const {
2179 unsigned OpC = MI.getOpcode();
2180 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2181 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2182 bool isPPC64 = Subtarget.isPPC64();
2183 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2184 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2185 // Need add Def and Use for CTR implicit operand.
2186 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2187 .addReg(Pred[1].getReg(), RegState::Implicit)
2189 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2190 MI.setDesc(get(PPC::BCLR));
2191 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2192 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2193 MI.setDesc(get(PPC::BCLRn));
2194 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2195 } else {
2196 MI.setDesc(get(PPC::BCCLR));
2197 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2198 .addImm(Pred[0].getImm())
2199 .add(Pred[1]);
2200 }
2201
2202 return true;
2203 } else if (OpC == PPC::B) {
2204 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2205 bool isPPC64 = Subtarget.isPPC64();
2206 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2207 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2208 // Need add Def and Use for CTR implicit operand.
2209 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2210 .addReg(Pred[1].getReg(), RegState::Implicit)
2212 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2213 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2214 MI.removeOperand(0);
2215
2216 MI.setDesc(get(PPC::BC));
2217 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2218 .add(Pred[1])
2219 .addMBB(MBB);
2220 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2221 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2222 MI.removeOperand(0);
2223
2224 MI.setDesc(get(PPC::BCn));
2225 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2226 .add(Pred[1])
2227 .addMBB(MBB);
2228 } else {
2229 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2230 MI.removeOperand(0);
2231
2232 MI.setDesc(get(PPC::BCC));
2233 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2234 .addImm(Pred[0].getImm())
2235 .add(Pred[1])
2236 .addMBB(MBB);
2237 }
2238
2239 return true;
2240 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2241 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2242 OpC == PPC::BCTRL8_RM) {
2243 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2244 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2245
2246 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2247 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2248 bool isPPC64 = Subtarget.isPPC64();
2249
2250 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2251 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2252 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2254 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2255 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2256 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2257 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2258 } else {
2259 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2260 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2261 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2262 .addImm(Pred[0].getImm())
2263 .add(Pred[1]);
2264 }
2265
2266 // Need add Def and Use for LR implicit operand.
2267 if (setLR)
2268 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2269 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2270 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2271 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2272 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2274
2275 return true;
2276 }
2277
2278 return false;
2279}
2280
2282 ArrayRef<MachineOperand> Pred2) const {
2283 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2284 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2285
2286 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2287 return false;
2288 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2289 return false;
2290
2291 // P1 can only subsume P2 if they test the same condition register.
2292 if (Pred1[1].getReg() != Pred2[1].getReg())
2293 return false;
2294
2295 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2296 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2297
2298 if (P1 == P2)
2299 return true;
2300
2301 // Does P1 subsume P2, e.g. GE subsumes GT.
2302 if (P1 == PPC::PRED_LE &&
2303 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2304 return true;
2305 if (P1 == PPC::PRED_GE &&
2306 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2307 return true;
2308
2309 return false;
2310}
2311
2313 std::vector<MachineOperand> &Pred,
2314 bool SkipDead) const {
2315 // Note: At the present time, the contents of Pred from this function is
2316 // unused by IfConversion. This implementation follows ARM by pushing the
2317 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2318 // predicate, instructions defining CTR or CTR8 are also included as
2319 // predicate-defining instructions.
2320
2321 const TargetRegisterClass *RCs[] =
2322 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2323 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2324
2325 bool Found = false;
2326 for (const MachineOperand &MO : MI.operands()) {
2327 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2328 const TargetRegisterClass *RC = RCs[c];
2329 if (MO.isReg()) {
2330 if (MO.isDef() && RC->contains(MO.getReg())) {
2331 Pred.push_back(MO);
2332 Found = true;
2333 }
2334 } else if (MO.isRegMask()) {
2335 for (MCPhysReg R : *RC)
2336 if (MO.clobbersPhysReg(R)) {
2337 Pred.push_back(MO);
2338 Found = true;
2339 }
2340 }
2341 }
2342 }
2343
2344 return Found;
2345}
2346
2348 Register &SrcReg2, int64_t &Mask,
2349 int64_t &Value) const {
2350 unsigned Opc = MI.getOpcode();
2351
2352 switch (Opc) {
2353 default: return false;
2354 case PPC::CMPWI:
2355 case PPC::CMPLWI:
2356 case PPC::CMPDI:
2357 case PPC::CMPLDI:
2358 SrcReg = MI.getOperand(1).getReg();
2359 SrcReg2 = 0;
2360 Value = MI.getOperand(2).getImm();
2361 Mask = 0xFFFF;
2362 return true;
2363 case PPC::CMPW:
2364 case PPC::CMPLW:
2365 case PPC::CMPD:
2366 case PPC::CMPLD:
2367 case PPC::FCMPUS:
2368 case PPC::FCMPUD:
2369 SrcReg = MI.getOperand(1).getReg();
2370 SrcReg2 = MI.getOperand(2).getReg();
2371 Value = 0;
2372 Mask = 0;
2373 return true;
2374 }
2375}
2376
2378 Register SrcReg2, int64_t Mask,
2379 int64_t Value,
2380 const MachineRegisterInfo *MRI) const {
2381 if (DisableCmpOpt)
2382 return false;
2383
2384 int OpC = CmpInstr.getOpcode();
2385 Register CRReg = CmpInstr.getOperand(0).getReg();
2386
2387 // FP record forms set CR1 based on the exception status bits, not a
2388 // comparison with zero.
2389 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2390 return false;
2391
2393 // The record forms set the condition register based on a signed comparison
2394 // with zero (so says the ISA manual). This is not as straightforward as it
2395 // seems, however, because this is always a 64-bit comparison on PPC64, even
2396 // for instructions that are 32-bit in nature (like slw for example).
2397 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2398 // for equality checks (as those don't depend on the sign). On PPC64,
2399 // we are restricted to equality for unsigned 64-bit comparisons and for
2400 // signed 32-bit comparisons the applicability is more restricted.
2401 bool isPPC64 = Subtarget.isPPC64();
2402 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2403 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2404 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2405
2406 // Look through copies unless that gets us to a physical register.
2407 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2408 if (ActualSrc.isVirtual())
2409 SrcReg = ActualSrc;
2410
2411 // Get the unique definition of SrcReg.
2412 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2413 if (!MI) return false;
2414
2415 bool equalityOnly = false;
2416 bool noSub = false;
2417 if (isPPC64) {
2418 if (is32BitSignedCompare) {
2419 // We can perform this optimization only if SrcReg is sign-extending.
2420 if (isSignExtended(SrcReg, MRI))
2421 noSub = true;
2422 else
2423 return false;
2424 } else if (is32BitUnsignedCompare) {
2425 // We can perform this optimization, equality only, if SrcReg is
2426 // zero-extending.
2427 if (isZeroExtended(SrcReg, MRI)) {
2428 noSub = true;
2429 equalityOnly = true;
2430 } else
2431 return false;
2432 } else
2433 equalityOnly = is64BitUnsignedCompare;
2434 } else
2435 equalityOnly = is32BitUnsignedCompare;
2436
2437 if (equalityOnly) {
2438 // We need to check the uses of the condition register in order to reject
2439 // non-equality comparisons.
2441 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2442 I != IE; ++I) {
2443 MachineInstr *UseMI = &*I;
2444 if (UseMI->getOpcode() == PPC::BCC) {
2446 unsigned PredCond = PPC::getPredicateCondition(Pred);
2447 // We ignore hint bits when checking for non-equality comparisons.
2448 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2449 return false;
2450 } else if (UseMI->getOpcode() == PPC::ISEL ||
2451 UseMI->getOpcode() == PPC::ISEL8) {
2452 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2453 if (SubIdx != PPC::sub_eq)
2454 return false;
2455 } else
2456 return false;
2457 }
2458 }
2459
2460 MachineBasicBlock::iterator I = CmpInstr;
2461
2462 // Scan forward to find the first use of the compare.
2463 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2464 ++I) {
2465 bool FoundUse = false;
2467 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2468 J != JE; ++J)
2469 if (&*J == &*I) {
2470 FoundUse = true;
2471 break;
2472 }
2473
2474 if (FoundUse)
2475 break;
2476 }
2477
2480
2481 // There are two possible candidates which can be changed to set CR[01].
2482 // One is MI, the other is a SUB instruction.
2483 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2484 MachineInstr *Sub = nullptr;
2485 if (SrcReg2 != 0)
2486 // MI is not a candidate for CMPrr.
2487 MI = nullptr;
2488 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2489 // same BB as the comparison. This is to allow the check below to avoid calls
2490 // (and other explicit clobbers); instead we should really check for these
2491 // more explicitly (in at least a few predecessors).
2492 else if (MI->getParent() != CmpInstr.getParent())
2493 return false;
2494 else if (Value != 0) {
2495 // The record-form instructions set CR bit based on signed comparison
2496 // against 0. We try to convert a compare against 1 or -1 into a compare
2497 // against 0 to exploit record-form instructions. For example, we change
2498 // the condition "greater than -1" into "greater than or equal to 0"
2499 // and "less than 1" into "less than or equal to 0".
2500
2501 // Since we optimize comparison based on a specific branch condition,
2502 // we don't optimize if condition code is used by more than once.
2503 if (equalityOnly || !MRI->hasOneUse(CRReg))
2504 return false;
2505
2506 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2507 if (UseMI->getOpcode() != PPC::BCC)
2508 return false;
2509
2511 unsigned PredCond = PPC::getPredicateCondition(Pred);
2512 unsigned PredHint = PPC::getPredicateHint(Pred);
2513 int16_t Immed = (int16_t)Value;
2514
2515 // When modifying the condition in the predicate, we propagate hint bits
2516 // from the original predicate to the new one.
2517 if (Immed == -1 && PredCond == PPC::PRED_GT)
2518 // We convert "greater than -1" into "greater than or equal to 0",
2519 // since we are assuming signed comparison by !equalityOnly
2520 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2521 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2522 // We convert "less than or equal to -1" into "less than 0".
2523 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2524 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2525 // We convert "less than 1" into "less than or equal to 0".
2526 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2527 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2528 // We convert "greater than or equal to 1" into "greater than 0".
2529 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2530 else
2531 return false;
2532
2533 // Convert the comparison and its user to a compare against zero with the
2534 // appropriate predicate on the branch. Zero comparison might provide
2535 // optimization opportunities post-RA (see optimization in
2536 // PPCPreEmitPeephole.cpp).
2537 UseMI->getOperand(0).setImm(Pred);
2538 CmpInstr.getOperand(2).setImm(0);
2539 }
2540
2541 // Search for Sub.
2542 --I;
2543
2544 // Get ready to iterate backward from CmpInstr.
2545 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2546
2547 for (; I != E && !noSub; --I) {
2548 const MachineInstr &Instr = *I;
2549 unsigned IOpC = Instr.getOpcode();
2550
2551 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2552 Instr.readsRegister(PPC::CR0, TRI)))
2553 // This instruction modifies or uses the record condition register after
2554 // the one we want to change. While we could do this transformation, it
2555 // would likely not be profitable. This transformation removes one
2556 // instruction, and so even forcing RA to generate one move probably
2557 // makes it unprofitable.
2558 return false;
2559
2560 // Check whether CmpInstr can be made redundant by the current instruction.
2561 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2562 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2563 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2564 ((Instr.getOperand(1).getReg() == SrcReg &&
2565 Instr.getOperand(2).getReg() == SrcReg2) ||
2566 (Instr.getOperand(1).getReg() == SrcReg2 &&
2567 Instr.getOperand(2).getReg() == SrcReg))) {
2568 Sub = &*I;
2569 break;
2570 }
2571
2572 if (I == B)
2573 // The 'and' is below the comparison instruction.
2574 return false;
2575 }
2576
2577 // Return false if no candidates exist.
2578 if (!MI && !Sub)
2579 return false;
2580
2581 // The single candidate is called MI.
2582 if (!MI) MI = Sub;
2583
2584 int NewOpC = -1;
2585 int MIOpC = MI->getOpcode();
2586 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2587 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2588 NewOpC = MIOpC;
2589 else {
2590 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2591 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2592 NewOpC = MIOpC;
2593 }
2594
2595 // FIXME: On the non-embedded POWER architectures, only some of the record
2596 // forms are fast, and we should use only the fast ones.
2597
2598 // The defining instruction has a record form (or is already a record
2599 // form). It is possible, however, that we'll need to reverse the condition
2600 // code of the users.
2601 if (NewOpC == -1)
2602 return false;
2603
2604 // This transformation should not be performed if `nsw` is missing and is not
2605 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2606 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2607 // CRReg can reflect if compared values are equal, this optz is still valid.
2608 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2609 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2610 return false;
2611
2612 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2613 // needs to be updated to be based on SUB. Push the condition code
2614 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2615 // condition code of these operands will be modified.
2616 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2617 // comparison against 0, which may modify predicate.
2618 bool ShouldSwap = false;
2619 if (Sub && Value == 0) {
2620 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2621 Sub->getOperand(2).getReg() == SrcReg;
2622
2623 // The operands to subf are the opposite of sub, so only in the fixed-point
2624 // case, invert the order.
2625 ShouldSwap = !ShouldSwap;
2626 }
2627
2628 if (ShouldSwap)
2630 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2631 I != IE; ++I) {
2632 MachineInstr *UseMI = &*I;
2633 if (UseMI->getOpcode() == PPC::BCC) {
2635 unsigned PredCond = PPC::getPredicateCondition(Pred);
2636 assert((!equalityOnly ||
2637 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2638 "Invalid predicate for equality-only optimization");
2639 (void)PredCond; // To suppress warning in release build.
2640 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2642 } else if (UseMI->getOpcode() == PPC::ISEL ||
2643 UseMI->getOpcode() == PPC::ISEL8) {
2644 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2645 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2646 "Invalid CR bit for equality-only optimization");
2647
2648 if (NewSubReg == PPC::sub_lt)
2649 NewSubReg = PPC::sub_gt;
2650 else if (NewSubReg == PPC::sub_gt)
2651 NewSubReg = PPC::sub_lt;
2652
2653 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2654 NewSubReg));
2655 } else // We need to abort on a user we don't understand.
2656 return false;
2657 }
2658 assert(!(Value != 0 && ShouldSwap) &&
2659 "Non-zero immediate support and ShouldSwap"
2660 "may conflict in updating predicate");
2661
2662 // Create a new virtual register to hold the value of the CR set by the
2663 // record-form instruction. If the instruction was not previously in
2664 // record form, then set the kill flag on the CR.
2665 CmpInstr.eraseFromParent();
2666
2668 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2669 get(TargetOpcode::COPY), CRReg)
2670 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2671
2672 // Even if CR0 register were dead before, it is alive now since the
2673 // instruction we just built uses it.
2674 MI->clearRegisterDeads(PPC::CR0);
2675
2676 if (MIOpC != NewOpC) {
2677 // We need to be careful here: we're replacing one instruction with
2678 // another, and we need to make sure that we get all of the right
2679 // implicit uses and defs. On the other hand, the caller may be holding
2680 // an iterator to this instruction, and so we can't delete it (this is
2681 // specifically the case if this is the instruction directly after the
2682 // compare).
2683
2684 // Rotates are expensive instructions. If we're emitting a record-form
2685 // rotate that can just be an andi/andis, we should just emit that.
2686 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2687 Register GPRRes = MI->getOperand(0).getReg();
2688 int64_t SH = MI->getOperand(2).getImm();
2689 int64_t MB = MI->getOperand(3).getImm();
2690 int64_t ME = MI->getOperand(4).getImm();
2691 // We can only do this if both the start and end of the mask are in the
2692 // same halfword.
2693 bool MBInLoHWord = MB >= 16;
2694 bool MEInLoHWord = ME >= 16;
2695 uint64_t Mask = ~0LLU;
2696
2697 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2698 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2699 // The mask value needs to shift right 16 if we're emitting andis.
2700 Mask >>= MBInLoHWord ? 0 : 16;
2701 NewOpC = MIOpC == PPC::RLWINM
2702 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2703 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2704 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2705 (ME - MB + 1 == SH) && (MB >= 16)) {
2706 // If we are rotating by the exact number of bits as are in the mask
2707 // and the mask is in the least significant bits of the register,
2708 // that's just an andis. (as long as the GPR result has no uses).
2709 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2710 Mask >>= 16;
2711 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2712 }
2713 // If we've set the mask, we can transform.
2714 if (Mask != ~0LLU) {
2715 MI->removeOperand(4);
2716 MI->removeOperand(3);
2717 MI->getOperand(2).setImm(Mask);
2718 NumRcRotatesConvertedToRcAnd++;
2719 }
2720 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2721 int64_t MB = MI->getOperand(3).getImm();
2722 if (MB >= 48) {
2723 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2724 NewOpC = PPC::ANDI8_rec;
2725 MI->removeOperand(3);
2726 MI->getOperand(2).setImm(Mask);
2727 NumRcRotatesConvertedToRcAnd++;
2728 }
2729 }
2730
2731 const MCInstrDesc &NewDesc = get(NewOpC);
2732 MI->setDesc(NewDesc);
2733
2734 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2735 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2736 MI->addOperand(*MI->getParent()->getParent(),
2737 MachineOperand::CreateReg(ImpDef, true, true));
2738 }
2739 }
2740 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2741 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2742 MI->addOperand(*MI->getParent()->getParent(),
2743 MachineOperand::CreateReg(ImpUse, false, true));
2744 }
2745 }
2746 }
2747 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2748 "Record-form instruction does not define cr0?");
2749
2750 // Modify the condition code of operands in OperandsToUpdate.
2751 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2752 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2753 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2754 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2755
2756 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2757 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2758
2759 return true;
2760}
2761
2764 if (MRI->isSSA())
2765 return false;
2766
2767 Register SrcReg, SrcReg2;
2768 int64_t CmpMask, CmpValue;
2769 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2770 return false;
2771
2772 // Try to optimize the comparison against 0.
2773 if (CmpValue || !CmpMask || SrcReg2)
2774 return false;
2775
2776 // The record forms set the condition register based on a signed comparison
2777 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2778 // equality checks in post-RA, we are more restricted on a unsigned
2779 // comparison.
2780 unsigned Opc = CmpMI.getOpcode();
2781 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2782 return false;
2783
2784 // The record forms are always based on a 64-bit comparison on PPC64
2785 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2786 // comparison. Since we can't do the equality checks in post-RA, we bail out
2787 // the case.
2788 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2789 return false;
2790
2791 // CmpMI can't be deleted if it has implicit def.
2792 if (CmpMI.hasImplicitDef())
2793 return false;
2794
2795 bool SrcRegHasOtherUse = false;
2796 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2797 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2798 return false;
2799
2800 MachineOperand RegMO = CmpMI.getOperand(0);
2801 Register CRReg = RegMO.getReg();
2802 if (CRReg != PPC::CR0)
2803 return false;
2804
2805 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2806 bool SeenUseOfCRReg = false;
2807 bool IsCRRegKilled = false;
2808 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2809 SeenUseOfCRReg) ||
2810 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2811 return false;
2812
2813 int SrcMIOpc = SrcMI->getOpcode();
2814 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2815 if (NewOpC == -1)
2816 return false;
2817
2818 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2819 LLVM_DEBUG(SrcMI->dump());
2820
2821 const MCInstrDesc &NewDesc = get(NewOpC);
2822 SrcMI->setDesc(NewDesc);
2823 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2825 SrcMI->clearRegisterDeads(CRReg);
2826
2827 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2828 "Record-form instruction does not define cr0?");
2829
2830 LLVM_DEBUG(dbgs() << "with: ");
2831 LLVM_DEBUG(SrcMI->dump());
2832 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2833 LLVM_DEBUG(CmpMI.dump());
2834 return true;
2835}
2836
2839 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2840 const TargetRegisterInfo *TRI) const {
2841 const MachineOperand *BaseOp;
2842 OffsetIsScalable = false;
2843 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2844 return false;
2845 BaseOps.push_back(BaseOp);
2846 return true;
2847}
2848
2849static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2850 const TargetRegisterInfo *TRI) {
2851 // If this is a volatile load/store, don't mess with it.
2852 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2853 return false;
2854
2855 if (LdSt.getOperand(2).isFI())
2856 return true;
2857
2858 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2859 // Can't cluster if the instruction modifies the base register
2860 // or it is update form. e.g. ld r2,3(r2)
2861 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2862 return false;
2863
2864 return true;
2865}
2866
2867// Only cluster instruction pair that have the same opcode, and they are
2868// clusterable according to PowerPC specification.
2869static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2870 const PPCSubtarget &Subtarget) {
2871 switch (FirstOpc) {
2872 default:
2873 return false;
2874 case PPC::STD:
2875 case PPC::STFD:
2876 case PPC::STXSD:
2877 case PPC::DFSTOREf64:
2878 return FirstOpc == SecondOpc;
2879 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2880 // 32bit and 64bit instruction selection. They are clusterable pair though
2881 // they are different opcode.
2882 case PPC::STW:
2883 case PPC::STW8:
2884 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2885 }
2886}
2887
2889 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2890 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2891 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2892 unsigned NumBytes) const {
2893
2894 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2895 const MachineOperand &BaseOp1 = *BaseOps1.front();
2896 const MachineOperand &BaseOp2 = *BaseOps2.front();
2897 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2898 "Only base registers and frame indices are supported.");
2899
2900 // ClusterSize means the number of memory operations that will have been
2901 // clustered if this hook returns true.
2902 // Don't cluster memory op if there are already two ops clustered at least.
2903 if (ClusterSize > 2)
2904 return false;
2905
2906 // Cluster the load/store only when they have the same base
2907 // register or FI.
2908 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2909 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2910 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2911 return false;
2912
2913 // Check if the load/store are clusterable according to the PowerPC
2914 // specification.
2915 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2916 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2917 unsigned FirstOpc = FirstLdSt.getOpcode();
2918 unsigned SecondOpc = SecondLdSt.getOpcode();
2920 // Cluster the load/store only when they have the same opcode, and they are
2921 // clusterable opcode according to PowerPC specification.
2922 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2923 return false;
2924
2925 // Can't cluster load/store that have ordered or volatile memory reference.
2926 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2927 !isLdStSafeToCluster(SecondLdSt, TRI))
2928 return false;
2929
2930 int64_t Offset1 = 0, Offset2 = 0;
2931 LocationSize Width1 = 0, Width2 = 0;
2932 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2933 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2934 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2935 Width1 != Width2)
2936 return false;
2937
2938 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2939 "getMemOperandWithOffsetWidth return incorrect base op");
2940 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2941 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2942 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2943}
2944
2945/// GetInstSize - Return the number of bytes of code the specified
2946/// instruction may be. This returns the maximum number of bytes.
2947///
2949 unsigned Opcode = MI.getOpcode();
2950
2951 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2952 const MachineFunction *MF = MI.getParent()->getParent();
2953 const char *AsmStr = MI.getOperand(0).getSymbolName();
2954 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2955 } else if (Opcode == TargetOpcode::STACKMAP) {
2956 StackMapOpers Opers(&MI);
2957 return Opers.getNumPatchBytes();
2958 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2959 PatchPointOpers Opers(&MI);
2960 return Opers.getNumPatchBytes();
2961 } else {
2962 return get(Opcode).getSize();
2963 }
2964}
2965
2966std::pair<unsigned, unsigned>
2968 // PPC always uses a direct mask.
2969 return std::make_pair(TF, 0u);
2970}
2971
2974 using namespace PPCII;
2975 static const std::pair<unsigned, const char *> TargetFlags[] = {
2976 {MO_PLT, "ppc-plt"},
2977 {MO_PIC_FLAG, "ppc-pic"},
2978 {MO_PCREL_FLAG, "ppc-pcrel"},
2979 {MO_GOT_FLAG, "ppc-got"},
2980 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2981 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2982 {MO_TPREL_FLAG, "ppc-tprel"},
2983 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2984 {MO_TLSLD_FLAG, "ppc-tlsld"},
2985 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2986 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2987 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2988 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2989 {MO_LO, "ppc-lo"},
2990 {MO_HA, "ppc-ha"},
2991 {MO_TPREL_LO, "ppc-tprel-lo"},
2992 {MO_TPREL_HA, "ppc-tprel-ha"},
2993 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2994 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2995 {MO_TOC_LO, "ppc-toc-lo"},
2996 {MO_TLS, "ppc-tls"},
2997 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2998 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2999 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3000 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3001 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3002 };
3003 return ArrayRef(TargetFlags);
3004}
3005
3006// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3007// The VSX versions have the advantage of a full 64-register target whereas
3008// the FP ones have the advantage of lower latency and higher throughput. So
3009// what we are after is using the faster instructions in low register pressure
3010// situations and using the larger register file in high register pressure
3011// situations.
3013 unsigned UpperOpcode, LowerOpcode;
3014 switch (MI.getOpcode()) {
3015 case PPC::DFLOADf32:
3016 UpperOpcode = PPC::LXSSP;
3017 LowerOpcode = PPC::LFS;
3018 break;
3019 case PPC::DFLOADf64:
3020 UpperOpcode = PPC::LXSD;
3021 LowerOpcode = PPC::LFD;
3022 break;
3023 case PPC::DFSTOREf32:
3024 UpperOpcode = PPC::STXSSP;
3025 LowerOpcode = PPC::STFS;
3026 break;
3027 case PPC::DFSTOREf64:
3028 UpperOpcode = PPC::STXSD;
3029 LowerOpcode = PPC::STFD;
3030 break;
3031 case PPC::XFLOADf32:
3032 UpperOpcode = PPC::LXSSPX;
3033 LowerOpcode = PPC::LFSX;
3034 break;
3035 case PPC::XFLOADf64:
3036 UpperOpcode = PPC::LXSDX;
3037 LowerOpcode = PPC::LFDX;
3038 break;
3039 case PPC::XFSTOREf32:
3040 UpperOpcode = PPC::STXSSPX;
3041 LowerOpcode = PPC::STFSX;
3042 break;
3043 case PPC::XFSTOREf64:
3044 UpperOpcode = PPC::STXSDX;
3045 LowerOpcode = PPC::STFDX;
3046 break;
3047 case PPC::LIWAX:
3048 UpperOpcode = PPC::LXSIWAX;
3049 LowerOpcode = PPC::LFIWAX;
3050 break;
3051 case PPC::LIWZX:
3052 UpperOpcode = PPC::LXSIWZX;
3053 LowerOpcode = PPC::LFIWZX;
3054 break;
3055 case PPC::STIWX:
3056 UpperOpcode = PPC::STXSIWX;
3057 LowerOpcode = PPC::STFIWX;
3058 break;
3059 default:
3060 llvm_unreachable("Unknown Operation!");
3061 }
3062
3063 Register TargetReg = MI.getOperand(0).getReg();
3064 unsigned Opcode;
3065 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3066 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3067 Opcode = LowerOpcode;
3068 else
3069 Opcode = UpperOpcode;
3070 MI.setDesc(get(Opcode));
3071 return true;
3072}
3073
3074static bool isAnImmediateOperand(const MachineOperand &MO) {
3075 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3076}
3077
3079 auto &MBB = *MI.getParent();
3080 auto DL = MI.getDebugLoc();
3081
3082 switch (MI.getOpcode()) {
3083 case PPC::BUILD_UACC: {
3084 MCRegister ACC = MI.getOperand(0).getReg();
3085 MCRegister UACC = MI.getOperand(1).getReg();
3086 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3087 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3088 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3089 // FIXME: This can easily be improved to look up to the top of the MBB
3090 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3091 // we can just re-target any such XXLOR's to DstVSR + offset.
3092 for (int VecNo = 0; VecNo < 4; VecNo++)
3093 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3094 .addReg(SrcVSR + VecNo)
3095 .addReg(SrcVSR + VecNo);
3096 }
3097 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3098 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3099 // with a NOP.
3100 [[fallthrough]];
3101 }
3102 case PPC::KILL_PAIR: {
3103 MI.setDesc(get(PPC::UNENCODED_NOP));
3104 MI.removeOperand(1);
3105 MI.removeOperand(0);
3106 return true;
3107 }
3108 case TargetOpcode::LOAD_STACK_GUARD: {
3109 assert(Subtarget.isTargetLinux() &&
3110 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3111 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3112 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3113 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3114 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3115 .addImm(Offset)
3116 .addReg(Reg);
3117 return true;
3118 }
3119 case PPC::PPCLdFixedAddr: {
3120 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3121 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3122 int64_t Offset = 0;
3123 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3124 MI.setDesc(get(PPC::LWZ));
3125 uint64_t FAType = MI.getOperand(1).getImm();
3126#undef PPC_LNX_FEATURE
3127#undef PPC_LNX_CPU
3128#define PPC_LNX_DEFINE_OFFSETS
3129#include "llvm/TargetParser/PPCTargetParser.def"
3130 bool IsLE = Subtarget.isLittleEndian();
3131 bool Is64 = Subtarget.isPPC64();
3132 if (FAType == PPC_FAWORD_HWCAP) {
3133 if (IsLE)
3134 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3135 else
3136 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3137 } else if (FAType == PPC_FAWORD_HWCAP2) {
3138 if (IsLE)
3139 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3140 else
3141 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3142 } else if (FAType == PPC_FAWORD_CPUID) {
3143 if (IsLE)
3144 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3145 else
3146 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3147 }
3148 assert(Offset && "Do not know the offset for this fixed addr load");
3149 MI.removeOperand(1);
3151 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3152 .addImm(Offset)
3153 .addReg(Reg);
3154 return true;
3155#define PPC_TGT_PARSER_UNDEF_MACROS
3156#include "llvm/TargetParser/PPCTargetParser.def"
3157#undef PPC_TGT_PARSER_UNDEF_MACROS
3158 }
3159 case PPC::DFLOADf32:
3160 case PPC::DFLOADf64:
3161 case PPC::DFSTOREf32:
3162 case PPC::DFSTOREf64: {
3163 assert(Subtarget.hasP9Vector() &&
3164 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3165 assert(MI.getOperand(2).isReg() &&
3166 isAnImmediateOperand(MI.getOperand(1)) &&
3167 "D-form op must have register and immediate operands");
3168 return expandVSXMemPseudo(MI);
3169 }
3170 case PPC::XFLOADf32:
3171 case PPC::XFSTOREf32:
3172 case PPC::LIWAX:
3173 case PPC::LIWZX:
3174 case PPC::STIWX: {
3175 assert(Subtarget.hasP8Vector() &&
3176 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3177 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3178 "X-form op must have register and register operands");
3179 return expandVSXMemPseudo(MI);
3180 }
3181 case PPC::XFLOADf64:
3182 case PPC::XFSTOREf64: {
3183 assert(Subtarget.hasVSX() &&
3184 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3185 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3186 "X-form op must have register and register operands");
3187 return expandVSXMemPseudo(MI);
3188 }
3189 case PPC::SPILLTOVSR_LD: {
3190 Register TargetReg = MI.getOperand(0).getReg();
3191 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3192 MI.setDesc(get(PPC::DFLOADf64));
3193 return expandPostRAPseudo(MI);
3194 }
3195 else
3196 MI.setDesc(get(PPC::LD));
3197 return true;
3198 }
3199 case PPC::SPILLTOVSR_ST: {
3200 Register SrcReg = MI.getOperand(0).getReg();
3201 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3202 NumStoreSPILLVSRRCAsVec++;
3203 MI.setDesc(get(PPC::DFSTOREf64));
3204 return expandPostRAPseudo(MI);
3205 } else {
3206 NumStoreSPILLVSRRCAsGpr++;
3207 MI.setDesc(get(PPC::STD));
3208 }
3209 return true;
3210 }
3211 case PPC::SPILLTOVSR_LDX: {
3212 Register TargetReg = MI.getOperand(0).getReg();
3213 if (PPC::VSFRCRegClass.contains(TargetReg))
3214 MI.setDesc(get(PPC::LXSDX));
3215 else
3216 MI.setDesc(get(PPC::LDX));
3217 return true;
3218 }
3219 case PPC::SPILLTOVSR_STX: {
3220 Register SrcReg = MI.getOperand(0).getReg();
3221 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3222 NumStoreSPILLVSRRCAsVec++;
3223 MI.setDesc(get(PPC::STXSDX));
3224 } else {
3225 NumStoreSPILLVSRRCAsGpr++;
3226 MI.setDesc(get(PPC::STDX));
3227 }
3228 return true;
3229 }
3230
3231 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3232 case PPC::CFENCE:
3233 case PPC::CFENCE8: {
3234 auto Val = MI.getOperand(0).getReg();
3235 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3236 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3237 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3239 .addReg(PPC::CR7)
3240 .addImm(1);
3241 MI.setDesc(get(PPC::ISYNC));
3242 MI.removeOperand(0);
3243 return true;
3244 }
3245 }
3246 return false;
3247}
3248
3249// Essentially a compile-time implementation of a compare->isel sequence.
3250// It takes two constants to compare, along with the true/false registers
3251// and the comparison type (as a subreg to a CR field) and returns one
3252// of the true/false registers, depending on the comparison results.
3253static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3254 unsigned TrueReg, unsigned FalseReg,
3255 unsigned CRSubReg) {
3256 // Signed comparisons. The immediates are assumed to be sign-extended.
3257 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3258 switch (CRSubReg) {
3259 default: llvm_unreachable("Unknown integer comparison type.");
3260 case PPC::sub_lt:
3261 return Imm1 < Imm2 ? TrueReg : FalseReg;
3262 case PPC::sub_gt:
3263 return Imm1 > Imm2 ? TrueReg : FalseReg;
3264 case PPC::sub_eq:
3265 return Imm1 == Imm2 ? TrueReg : FalseReg;
3266 }
3267 }
3268 // Unsigned comparisons.
3269 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3270 switch (CRSubReg) {
3271 default: llvm_unreachable("Unknown integer comparison type.");
3272 case PPC::sub_lt:
3273 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3274 case PPC::sub_gt:
3275 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3276 case PPC::sub_eq:
3277 return Imm1 == Imm2 ? TrueReg : FalseReg;
3278 }
3279 }
3280 return PPC::NoRegister;
3281}
3282
3284 unsigned OpNo,
3285 int64_t Imm) const {
3286 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3287 // Replace the REG with the Immediate.
3288 Register InUseReg = MI.getOperand(OpNo).getReg();
3289 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3290
3291 // We need to make sure that the MI didn't have any implicit use
3292 // of this REG any more. We don't call MI.implicit_operands().empty() to
3293 // return early, since MI's MCID might be changed in calling context, as a
3294 // result its number of explicit operands may be changed, thus the begin of
3295 // implicit operand is changed.
3297 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3298 if (UseOpIdx >= 0) {
3299 MachineOperand &MO = MI.getOperand(UseOpIdx);
3300 if (MO.isImplicit())
3301 // The operands must always be in the following order:
3302 // - explicit reg defs,
3303 // - other explicit operands (reg uses, immediates, etc.),
3304 // - implicit reg defs
3305 // - implicit reg uses
3306 // Therefore, removing the implicit operand won't change the explicit
3307 // operands layout.
3308 MI.removeOperand(UseOpIdx);
3309 }
3310}
3311
3312// Replace an instruction with one that materializes a constant (and sets
3313// CR0 if the original instruction was a record-form instruction).
3315 const LoadImmediateInfo &LII) const {
3316 // Remove existing operands.
3317 int OperandToKeep = LII.SetCR ? 1 : 0;
3318 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3319 MI.removeOperand(i);
3320
3321 // Replace the instruction.
3322 if (LII.SetCR) {
3323 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3324 // Set the immediate.
3325 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3326 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3327 return;
3328 }
3329 else
3330 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3331
3332 // Set the immediate.
3333 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3334 .addImm(LII.Imm);
3335}
3336
3338 bool &SeenIntermediateUse) const {
3339 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3340 "Should be called after register allocation.");
3342 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3343 It++;
3344 SeenIntermediateUse = false;
3345 for (; It != E; ++It) {
3346 if (It->modifiesRegister(Reg, TRI))
3347 return &*It;
3348 if (It->readsRegister(Reg, TRI))
3349 SeenIntermediateUse = true;
3350 }
3351 return nullptr;
3352}
3353
3356 const DebugLoc &DL, Register Reg,
3357 int64_t Imm) const {
3359 "Register should be in non-SSA form after RA");
3360 bool isPPC64 = Subtarget.isPPC64();
3361 // FIXME: Materialization here is not optimal.
3362 // For some special bit patterns we can use less instructions.
3363 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3364 if (isInt<16>(Imm)) {
3365 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3366 } else if (isInt<32>(Imm)) {
3367 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3368 .addImm(Imm >> 16);
3369 if (Imm & 0xFFFF)
3370 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3371 .addReg(Reg, RegState::Kill)
3372 .addImm(Imm & 0xFFFF);
3373 } else {
3374 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3375 "only supported in PPC64");
3376 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3377 if ((Imm >> 32) & 0xFFFF)
3378 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3379 .addReg(Reg, RegState::Kill)
3380 .addImm((Imm >> 32) & 0xFFFF);
3381 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3382 .addReg(Reg, RegState::Kill)
3383 .addImm(32)
3384 .addImm(31);
3385 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3386 .addReg(Reg, RegState::Kill)
3387 .addImm((Imm >> 16) & 0xFFFF);
3388 if (Imm & 0xFFFF)
3389 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3390 .addReg(Reg, RegState::Kill)
3391 .addImm(Imm & 0xFFFF);
3392 }
3393}
3394
3395MachineInstr *PPCInstrInfo::getForwardingDefMI(
3397 unsigned &OpNoForForwarding,
3398 bool &SeenIntermediateUse) const {
3399 OpNoForForwarding = ~0U;
3400 MachineInstr *DefMI = nullptr;
3401 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3403 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3404 // within the basic block to see if the register is defined using an
3405 // LI/LI8/ADDI/ADDI8.
3406 if (MRI->isSSA()) {
3407 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3408 if (!MI.getOperand(i).isReg())
3409 continue;
3410 Register Reg = MI.getOperand(i).getReg();
3411 if (!Reg.isVirtual())
3412 continue;
3413 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3414 if (TrueReg.isVirtual()) {
3415 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3416 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3417 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3418 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3419 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3420 OpNoForForwarding = i;
3421 DefMI = DefMIForTrueReg;
3422 // The ADDI and LI operand maybe exist in one instruction at same
3423 // time. we prefer to fold LI operand as LI only has one Imm operand
3424 // and is more possible to be converted. So if current DefMI is
3425 // ADDI/ADDI8, we continue to find possible LI/LI8.
3426 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3427 break;
3428 }
3429 }
3430 }
3431 } else {
3432 // Looking back through the definition for each operand could be expensive,
3433 // so exit early if this isn't an instruction that either has an immediate
3434 // form or is already an immediate form that we can handle.
3435 ImmInstrInfo III;
3436 unsigned Opc = MI.getOpcode();
3437 bool ConvertibleImmForm =
3438 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3439 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3440 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3441 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3442 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3443 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3444 Opc == PPC::RLWINM8_rec;
3445 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3446 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3447 : false;
3448 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3449 return nullptr;
3450
3451 // Don't convert or %X, %Y, %Y since that's just a register move.
3452 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3453 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3454 return nullptr;
3455 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3456 MachineOperand &MO = MI.getOperand(i);
3457 SeenIntermediateUse = false;
3458 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3459 Register Reg = MI.getOperand(i).getReg();
3460 // If we see another use of this reg between the def and the MI,
3461 // we want to flag it so the def isn't deleted.
3462 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3463 if (DefMI) {
3464 // Is this register defined by some form of add-immediate (including
3465 // load-immediate) within this basic block?
3466 switch (DefMI->getOpcode()) {
3467 default:
3468 break;
3469 case PPC::LI:
3470 case PPC::LI8:
3471 case PPC::ADDItocL8:
3472 case PPC::ADDI:
3473 case PPC::ADDI8:
3474 OpNoForForwarding = i;
3475 return DefMI;
3476 }
3477 }
3478 }
3479 }
3480 }
3481 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3482}
3483
3484unsigned PPCInstrInfo::getSpillTarget() const {
3485 // With P10, we may need to spill paired vector registers or accumulator
3486 // registers. MMA implies paired vectors, so we can just check that.
3487 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3488 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3489 2 : Subtarget.hasP9Vector() ?
3490 1 : 0;
3491}
3492
3493ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3494 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3495}
3496
3497ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3498 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3499}
3500
3501// This opt tries to convert the following imm form to an index form to save an
3502// add for stack variables.
3503// Return false if no such pattern found.
3504//
3505// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3506// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3507// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3508//
3509// can be converted to:
3510//
3511// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3512// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3513//
3514// In order to eliminate ADD instr, make sure that:
3515// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3516// new ADDI instr and ADDI can only take int16 Imm.
3517// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3518// between ADDI and ADD instr since its original def in ADDI will be changed
3519// in new ADDI instr. And also there should be no new def for it between
3520// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3521// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3522// between ADD and Imm instr since ADD instr will be eliminated.
3523// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3524// moved to Index instr.
3526 MachineFunction *MF = MI.getParent()->getParent();
3528 bool PostRA = !MRI->isSSA();
3529 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3530 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3531 // frame base(OffsetAddi) are determined.
3532 if (!PostRA)
3533 return false;
3534 unsigned ToBeDeletedReg = 0;
3535 int64_t OffsetImm = 0;
3536 unsigned XFormOpcode = 0;
3537 ImmInstrInfo III;
3538
3539 // Check if Imm instr meets requirement.
3540 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3541 III))
3542 return false;
3543
3544 bool OtherIntermediateUse = false;
3545 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3546
3547 // Exit if there is other use between ADD and Imm instr or no def found.
3548 if (OtherIntermediateUse || !ADDMI)
3549 return false;
3550
3551 // Check if ADD instr meets requirement.
3552 if (!isADDInstrEligibleForFolding(*ADDMI))
3553 return false;
3554
3555 unsigned ScaleRegIdx = 0;
3556 int64_t OffsetAddi = 0;
3557 MachineInstr *ADDIMI = nullptr;
3558
3559 // Check if there is a valid ToBeChangedReg in ADDMI.
3560 // 1: It must be killed.
3561 // 2: Its definition must be a valid ADDIMI.
3562 // 3: It must satify int16 offset requirement.
3563 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3564 ScaleRegIdx = 2;
3565 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3566 ScaleRegIdx = 1;
3567 else
3568 return false;
3569
3570 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3571 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3572 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3573 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3575 for (auto It = ++Start; It != End; It++)
3576 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3577 return true;
3578 return false;
3579 };
3580
3581 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3582 // treated as special zero when ScaleReg is R0/X0 register.
3583 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3584 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3585 return false;
3586
3587 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3588 // and Imm Instr.
3589 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3590 return false;
3591
3592 // Now start to do the transformation.
3593 LLVM_DEBUG(dbgs() << "Replace instruction: "
3594 << "\n");
3595 LLVM_DEBUG(ADDIMI->dump());
3596 LLVM_DEBUG(ADDMI->dump());
3597 LLVM_DEBUG(MI.dump());
3598 LLVM_DEBUG(dbgs() << "with: "
3599 << "\n");
3600
3601 // Update ADDI instr.
3602 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3603
3604 // Update Imm instr.
3605 MI.setDesc(get(XFormOpcode));
3606 MI.getOperand(III.ImmOpNo)
3607 .ChangeToRegister(ScaleReg, false, false,
3608 ADDMI->getOperand(ScaleRegIdx).isKill());
3609
3610 MI.getOperand(III.OpNoForForwarding)
3611 .ChangeToRegister(ToBeChangedReg, false, false, true);
3612
3613 // Eliminate ADD instr.
3614 ADDMI->eraseFromParent();
3615
3616 LLVM_DEBUG(ADDIMI->dump());
3617 LLVM_DEBUG(MI.dump());
3618
3619 return true;
3620}
3621
3623 int64_t &Imm) const {
3624 unsigned Opc = ADDIMI.getOpcode();
3625
3626 // Exit if the instruction is not ADDI.
3627 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3628 return false;
3629
3630 // The operand may not necessarily be an immediate - it could be a relocation.
3631 if (!ADDIMI.getOperand(2).isImm())
3632 return false;
3633
3634 Imm = ADDIMI.getOperand(2).getImm();
3635
3636 return true;
3637}
3638
3640 unsigned Opc = ADDMI.getOpcode();
3641
3642 // Exit if the instruction is not ADD.
3643 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3644}
3645
3647 unsigned &ToBeDeletedReg,
3648 unsigned &XFormOpcode,
3649 int64_t &OffsetImm,
3650 ImmInstrInfo &III) const {
3651 // Only handle load/store.
3652 if (!MI.mayLoadOrStore())
3653 return false;
3654
3655 unsigned Opc = MI.getOpcode();
3656
3657 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3658
3659 // Exit if instruction has no index form.
3660 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3661 return false;
3662
3663 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3664 if (!instrHasImmForm(XFormOpcode,
3665 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3666 return false;
3667
3668 if (!III.IsSummingOperands)
3669 return false;
3670
3671 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3672 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3673 // Only support imm operands, not relocation slots or others.
3674 if (!ImmOperand.isImm())
3675 return false;
3676
3677 assert(RegOperand.isReg() && "Instruction format is not right");
3678
3679 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3680 if (!RegOperand.isKill())
3681 return false;
3682
3683 ToBeDeletedReg = RegOperand.getReg();
3684 OffsetImm = ImmOperand.getImm();
3685
3686 return true;
3687}
3688
3690 MachineInstr *&ADDIMI,
3691 int64_t &OffsetAddi,
3692 int64_t OffsetImm) const {
3693 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3694 MachineOperand &MO = ADDMI->getOperand(Index);
3695
3696 if (!MO.isKill())
3697 return false;
3698
3699 bool OtherIntermediateUse = false;
3700
3701 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3702 // Currently handle only one "add + Imminstr" pair case, exit if other
3703 // intermediate use for ToBeChangedReg found.
3704 // TODO: handle the cases where there are other "add + Imminstr" pairs
3705 // with same offset in Imminstr which is like:
3706 //
3707 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3708 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3709 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3710 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3711 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3712 //
3713 // can be converted to:
3714 //
3715 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3716 // (OffsetAddi + OffsetImm)
3717 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3718 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3719
3720 if (OtherIntermediateUse || !ADDIMI)
3721 return false;
3722 // Check if ADDI instr meets requirement.
3723 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3724 return false;
3725
3726 if (isInt<16>(OffsetAddi + OffsetImm))
3727 return true;
3728 return false;
3729}
3730
3731// If this instruction has an immediate form and one of its operands is a
3732// result of a load-immediate or an add-immediate, convert it to
3733// the immediate form if the constant is in range.
3735 SmallSet<Register, 4> &RegsToUpdate,
3736 MachineInstr **KilledDef) const {
3737 MachineFunction *MF = MI.getParent()->getParent();
3739 bool PostRA = !MRI->isSSA();
3740 bool SeenIntermediateUse = true;
3741 unsigned ForwardingOperand = ~0U;
3742 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3743 SeenIntermediateUse);
3744 if (!DefMI)
3745 return false;
3746 assert(ForwardingOperand < MI.getNumOperands() &&
3747 "The forwarding operand needs to be valid at this point");
3748 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3749 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3750 if (KilledDef && KillFwdDefMI)
3751 *KilledDef = DefMI;
3752
3753 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3754 // registers that need their kill flags updated.
3755 for (const MachineOperand &MO : DefMI->operands())
3756 if (MO.isReg() && MO.isDef())
3757 RegsToUpdate.insert(MO.getReg());
3758 for (const MachineOperand &MO : MI.operands())
3759 if (MO.isReg())
3760 RegsToUpdate.insert(MO.getReg());
3761
3762 // If this is a imm instruction and its register operands is produced by ADDI,
3763 // put the imm into imm inst directly.
3764 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3765 PPC::INSTRUCTION_LIST_END &&
3766 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3767 return true;
3768
3769 ImmInstrInfo III;
3770 bool IsVFReg = MI.getOperand(0).isReg()
3771 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3772 : false;
3773 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3774 // If this is a reg+reg instruction that has a reg+imm form,
3775 // and one of the operands is produced by an add-immediate,
3776 // try to convert it.
3777 if (HasImmForm &&
3778 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3779 KillFwdDefMI))
3780 return true;
3781
3782 // If this is a reg+reg instruction that has a reg+imm form,
3783 // and one of the operands is produced by LI, convert it now.
3784 if (HasImmForm &&
3785 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3786 return true;
3787
3788 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3789 // can be simpified to LI.
3790 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3791 return true;
3792
3793 return false;
3794}
3795
3797 MachineInstr **ToErase) const {
3798 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3799 Register FoldingReg = MI.getOperand(1).getReg();
3800 if (!FoldingReg.isVirtual())
3801 return false;
3802 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3803 if (SrcMI->getOpcode() != PPC::RLWINM &&
3804 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3805 SrcMI->getOpcode() != PPC::RLWINM8 &&
3806 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3807 return false;
3808 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3809 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3810 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3811 "Invalid PPC::RLWINM Instruction!");
3812 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3813 uint64_t SHMI = MI.getOperand(2).getImm();
3814 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3815 uint64_t MBMI = MI.getOperand(3).getImm();
3816 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3817 uint64_t MEMI = MI.getOperand(4).getImm();
3818
3819 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3820 "Invalid PPC::RLWINM Instruction!");
3821 // If MBMI is bigger than MEMI, we always can not get run of ones.
3822 // RotatedSrcMask non-wrap:
3823 // 0........31|32........63
3824 // RotatedSrcMask: B---E B---E
3825 // MaskMI: -----------|--E B------
3826 // Result: ----- --- (Bad candidate)
3827 //
3828 // RotatedSrcMask wrap:
3829 // 0........31|32........63
3830 // RotatedSrcMask: --E B----|--E B----
3831 // MaskMI: -----------|--E B------
3832 // Result: --- -----|--- ----- (Bad candidate)
3833 //
3834 // One special case is RotatedSrcMask is a full set mask.
3835 // RotatedSrcMask full:
3836 // 0........31|32........63
3837 // RotatedSrcMask: ------EB---|-------EB---
3838 // MaskMI: -----------|--E B------
3839 // Result: -----------|--- ------- (Good candidate)
3840
3841 // Mark special case.
3842 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3843
3844 // For other MBMI > MEMI cases, just return.
3845 if ((MBMI > MEMI) && !SrcMaskFull)
3846 return false;
3847
3848 // Handle MBMI <= MEMI cases.
3849 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3850 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3851 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3852 // while in PowerPC ISA, lowerest bit is at index 63.
3853 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3854
3855 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3856 APInt FinalMask = RotatedSrcMask & MaskMI;
3857 uint32_t NewMB, NewME;
3858 bool Simplified = false;
3859
3860 // If final mask is 0, MI result should be 0 too.
3861 if (FinalMask.isZero()) {
3862 bool Is64Bit =
3863 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3864 Simplified = true;
3865 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3866 LLVM_DEBUG(MI.dump());
3867
3868 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3869 // Replace MI with "LI 0"
3870 MI.removeOperand(4);
3871 MI.removeOperand(3);
3872 MI.removeOperand(2);
3873 MI.getOperand(1).ChangeToImmediate(0);
3874 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3875 } else {
3876 // Replace MI with "ANDI_rec reg, 0"
3877 MI.removeOperand(4);
3878 MI.removeOperand(3);
3879 MI.getOperand(2).setImm(0);
3880 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3881 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3882 if (SrcMI->getOperand(1).isKill()) {
3883 MI.getOperand(1).setIsKill(true);
3884 SrcMI->getOperand(1).setIsKill(false);
3885 } else
3886 // About to replace MI.getOperand(1), clear its kill flag.
3887 MI.getOperand(1).setIsKill(false);
3888 }
3889
3890 LLVM_DEBUG(dbgs() << "With: ");
3891 LLVM_DEBUG(MI.dump());
3892
3893 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3894 NewMB <= NewME) ||
3895 SrcMaskFull) {
3896 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3897 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3898 // return a 32 bit value.
3899 Simplified = true;
3900 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3901 LLVM_DEBUG(MI.dump());
3902
3903 uint16_t NewSH = (SHSrc + SHMI) % 32;
3904 MI.getOperand(2).setImm(NewSH);
3905 // If SrcMI mask is full, no need to update MBMI and MEMI.
3906 if (!SrcMaskFull) {
3907 MI.getOperand(3).setImm(NewMB);
3908 MI.getOperand(4).setImm(NewME);
3909 }
3910 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3911 if (SrcMI->getOperand(1).isKill()) {
3912 MI.getOperand(1).setIsKill(true);
3913 SrcMI->getOperand(1).setIsKill(false);
3914 } else
3915 // About to replace MI.getOperand(1), clear its kill flag.
3916 MI.getOperand(1).setIsKill(false);
3917
3918 LLVM_DEBUG(dbgs() << "To: ");
3919 LLVM_DEBUG(MI.dump());
3920 }
3921 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3922 !SrcMI->hasImplicitDef()) {
3923 // If FoldingReg has no non-debug use and it has no implicit def (it
3924 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3925 // Otherwise keep it.
3926 *ToErase = SrcMI;
3927 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3928 LLVM_DEBUG(SrcMI->dump());
3929 }
3930 return Simplified;
3931}
3932
3933bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3934 ImmInstrInfo &III, bool PostRA) const {
3935 // The vast majority of the instructions would need their operand 2 replaced
3936 // with an immediate when switching to the reg+imm form. A marked exception
3937 // are the update form loads/stores for which a constant operand 2 would need
3938 // to turn into a displacement and move operand 1 to the operand 2 position.
3939 III.ImmOpNo = 2;
3940 III.OpNoForForwarding = 2;
3941 III.ImmWidth = 16;
3942 III.ImmMustBeMultipleOf = 1;
3943 III.TruncateImmTo = 0;
3944 III.IsSummingOperands = false;
3945 switch (Opc) {
3946 default: return false;
3947 case PPC::ADD4:
3948 case PPC::ADD8:
3949 III.SignedImm = true;
3950 III.ZeroIsSpecialOrig = 0;
3951 III.ZeroIsSpecialNew = 1;
3952 III.IsCommutative = true;
3953 III.IsSummingOperands = true;
3954 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3955 break;
3956 case PPC::ADDC:
3957 case PPC::ADDC8:
3958 III.SignedImm = true;
3959 III.ZeroIsSpecialOrig = 0;
3960 III.ZeroIsSpecialNew = 0;
3961 III.IsCommutative = true;
3962 III.IsSummingOperands = true;
3963 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3964 break;
3965 case PPC::ADDC_rec:
3966 III.SignedImm = true;
3967 III.ZeroIsSpecialOrig = 0;
3968 III.ZeroIsSpecialNew = 0;
3969 III.IsCommutative = true;
3970 III.IsSummingOperands = true;
3971 III.ImmOpcode = PPC::ADDIC_rec;
3972 break;
3973 case PPC::SUBFC:
3974 case PPC::SUBFC8:
3975 III.SignedImm = true;
3976 III.ZeroIsSpecialOrig = 0;
3977 III.ZeroIsSpecialNew = 0;
3978 III.IsCommutative = false;
3979 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3980 break;
3981 case PPC::CMPW:
3982 case PPC::CMPD:
3983 III.SignedImm = true;
3984 III.ZeroIsSpecialOrig = 0;
3985 III.ZeroIsSpecialNew = 0;
3986 III.IsCommutative = false;
3987 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3988 break;
3989 case PPC::CMPLW:
3990 case PPC::CMPLD:
3991 III.SignedImm = false;
3992 III.ZeroIsSpecialOrig = 0;
3993 III.ZeroIsSpecialNew = 0;
3994 III.IsCommutative = false;
3995 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3996 break;
3997 case PPC::AND_rec:
3998 case PPC::AND8_rec:
3999 case PPC::OR:
4000 case PPC::OR8:
4001 case PPC::XOR:
4002 case PPC::XOR8:
4003 III.SignedImm = false;
4004 III.ZeroIsSpecialOrig = 0;
4005 III.ZeroIsSpecialNew = 0;
4006 III.IsCommutative = true;
4007 switch(Opc) {
4008 default: llvm_unreachable("Unknown opcode");
4009 case PPC::AND_rec:
4010 III.ImmOpcode = PPC::ANDI_rec;
4011 break;
4012 case PPC::AND8_rec:
4013 III.ImmOpcode = PPC::ANDI8_rec;
4014 break;
4015 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4016 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4017 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4018 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4019 }
4020 break;
4021 case PPC::RLWNM:
4022 case PPC::RLWNM8:
4023 case PPC::RLWNM_rec:
4024 case PPC::RLWNM8_rec:
4025 case PPC::SLW:
4026 case PPC::SLW8:
4027 case PPC::SLW_rec:
4028 case PPC::SLW8_rec:
4029 case PPC::SRW:
4030 case PPC::SRW8:
4031 case PPC::SRW_rec:
4032 case PPC::SRW8_rec:
4033 case PPC::SRAW:
4034 case PPC::SRAW_rec:
4035 III.SignedImm = false;
4036 III.ZeroIsSpecialOrig = 0;
4037 III.ZeroIsSpecialNew = 0;
4038 III.IsCommutative = false;
4039 // This isn't actually true, but the instructions ignore any of the
4040 // upper bits, so any immediate loaded with an LI is acceptable.
4041 // This does not apply to shift right algebraic because a value
4042 // out of range will produce a -1/0.
4043 III.ImmWidth = 16;
4044 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4045 Opc == PPC::RLWNM8_rec)
4046 III.TruncateImmTo = 5;
4047 else
4048 III.TruncateImmTo = 6;
4049 switch(Opc) {
4050 default: llvm_unreachable("Unknown opcode");
4051 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4052 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4053 case PPC::RLWNM_rec:
4054 III.ImmOpcode = PPC::RLWINM_rec;
4055 break;
4056 case PPC::RLWNM8_rec:
4057 III.ImmOpcode = PPC::RLWINM8_rec;
4058 break;
4059 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4060 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4061 case PPC::SLW_rec:
4062 III.ImmOpcode = PPC::RLWINM_rec;
4063 break;
4064 case PPC::SLW8_rec:
4065 III.ImmOpcode = PPC::RLWINM8_rec;
4066 break;
4067 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4068 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4069 case PPC::SRW_rec:
4070 III.ImmOpcode = PPC::RLWINM_rec;
4071 break;
4072 case PPC::SRW8_rec:
4073 III.ImmOpcode = PPC::RLWINM8_rec;
4074 break;
4075 case PPC::SRAW:
4076 III.ImmWidth = 5;
4077 III.TruncateImmTo = 0;
4078 III.ImmOpcode = PPC::SRAWI;
4079 break;
4080 case PPC::SRAW_rec:
4081 III.ImmWidth = 5;
4082 III.TruncateImmTo = 0;
4083 III.ImmOpcode = PPC::SRAWI_rec;
4084 break;
4085 }
4086 break;
4087 case PPC::RLDCL:
4088 case PPC::RLDCL_rec:
4089 case PPC::RLDCR:
4090 case PPC::RLDCR_rec:
4091 case PPC::SLD:
4092 case PPC::SLD_rec:
4093 case PPC::SRD:
4094 case PPC::SRD_rec:
4095 case PPC::SRAD:
4096 case PPC::SRAD_rec:
4097 III.SignedImm = false;
4098 III.ZeroIsSpecialOrig = 0;
4099 III.ZeroIsSpecialNew = 0;
4100 III.IsCommutative = false;
4101 // This isn't actually true, but the instructions ignore any of the
4102 // upper bits, so any immediate loaded with an LI is acceptable.
4103 // This does not apply to shift right algebraic because a value
4104 // out of range will produce a -1/0.
4105 III.ImmWidth = 16;
4106 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4107 Opc == PPC::RLDCR_rec)
4108 III.TruncateImmTo = 6;
4109 else
4110 III.TruncateImmTo = 7;
4111 switch(Opc) {
4112 default: llvm_unreachable("Unknown opcode");
4113 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4114 case PPC::RLDCL_rec:
4115 III.ImmOpcode = PPC::RLDICL_rec;
4116 break;
4117 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4118 case PPC::RLDCR_rec:
4119 III.ImmOpcode = PPC::RLDICR_rec;
4120 break;
4121 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4122 case PPC::SLD_rec:
4123 III.ImmOpcode = PPC::RLDICR_rec;
4124 break;
4125 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4126 case PPC::SRD_rec:
4127 III.ImmOpcode = PPC::RLDICL_rec;
4128 break;
4129 case PPC::SRAD:
4130 III.ImmWidth = 6;
4131 III.TruncateImmTo = 0;
4132 III.ImmOpcode = PPC::SRADI;
4133 break;
4134 case PPC::SRAD_rec:
4135 III.ImmWidth = 6;
4136 III.TruncateImmTo = 0;
4137 III.ImmOpcode = PPC::SRADI_rec;
4138 break;
4139 }
4140 break;
4141 // Loads and stores:
4142 case PPC::LBZX:
4143 case PPC::LBZX8:
4144 case PPC::LHZX:
4145 case PPC::LHZX8:
4146 case PPC::LHAX:
4147 case PPC::LHAX8:
4148 case PPC::LWZX:
4149 case PPC::LWZX8:
4150 case PPC::LWAX:
4151 case PPC::LDX:
4152 case PPC::LFSX:
4153 case PPC::LFDX:
4154 case PPC::STBX:
4155 case PPC::STBX8:
4156 case PPC::STHX:
4157 case PPC::STHX8:
4158 case PPC::STWX:
4159 case PPC::STWX8:
4160 case PPC::STDX:
4161 case PPC::STFSX:
4162 case PPC::STFDX:
4163 III.SignedImm = true;
4164 III.ZeroIsSpecialOrig = 1;
4165 III.ZeroIsSpecialNew = 2;
4166 III.IsCommutative = true;
4167 III.IsSummingOperands = true;
4168 III.ImmOpNo = 1;
4169 III.OpNoForForwarding = 2;
4170 switch(Opc) {
4171 default: llvm_unreachable("Unknown opcode");
4172 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4173 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4174 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4175 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4176 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4177 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4178 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4179 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4180 case PPC::LWAX:
4181 III.ImmOpcode = PPC::LWA;
4182 III.ImmMustBeMultipleOf = 4;
4183 break;
4184 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4185 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4186 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4187 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4188 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4189 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4190 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4191 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4192 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4193 case PPC::STDX:
4194 III.ImmOpcode = PPC::STD;
4195 III.ImmMustBeMultipleOf = 4;
4196 break;
4197 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4198 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4199 }
4200 break;
4201 case PPC::LBZUX:
4202 case PPC::LBZUX8:
4203 case PPC::LHZUX:
4204 case PPC::LHZUX8:
4205 case PPC::LHAUX:
4206 case PPC::LHAUX8:
4207 case PPC::LWZUX:
4208 case PPC::LWZUX8:
4209 case PPC::LDUX:
4210 case PPC::LFSUX:
4211 case PPC::LFDUX:
4212 case PPC::STBUX:
4213 case PPC::STBUX8:
4214 case PPC::STHUX:
4215 case PPC::STHUX8:
4216 case PPC::STWUX:
4217 case PPC::STWUX8:
4218 case PPC::STDUX:
4219 case PPC::STFSUX:
4220 case PPC::STFDUX:
4221 III.SignedImm = true;
4222 III.ZeroIsSpecialOrig = 2;
4223 III.ZeroIsSpecialNew = 3;
4224 III.IsCommutative = false;
4225 III.IsSummingOperands = true;
4226 III.ImmOpNo = 2;
4227 III.OpNoForForwarding = 3;
4228 switch(Opc) {
4229 default: llvm_unreachable("Unknown opcode");
4230 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4231 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4232 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4233 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4234 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4235 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4236 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4237 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4238 case PPC::LDUX:
4239 III.ImmOpcode = PPC::LDU;
4240 III.ImmMustBeMultipleOf = 4;
4241 break;
4242 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4243 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4244 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4245 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4246 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4247 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4248 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4249 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4250 case PPC::STDUX:
4251 III.ImmOpcode = PPC::STDU;
4252 III.ImmMustBeMultipleOf = 4;
4253 break;
4254 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4255 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4256 }
4257 break;
4258 // Power9 and up only. For some of these, the X-Form version has access to all
4259 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4260 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4261 // into or stored from is one of the VR registers.
4262 case PPC::LXVX:
4263 case PPC::LXSSPX:
4264 case PPC::LXSDX:
4265 case PPC::STXVX:
4266 case PPC::STXSSPX:
4267 case PPC::STXSDX:
4268 case PPC::XFLOADf32:
4269 case PPC::XFLOADf64:
4270 case PPC::XFSTOREf32:
4271 case PPC::XFSTOREf64:
4272 if (!Subtarget.hasP9Vector())
4273 return false;
4274 III.SignedImm = true;
4275 III.ZeroIsSpecialOrig = 1;
4276 III.ZeroIsSpecialNew = 2;
4277 III.IsCommutative = true;
4278 III.IsSummingOperands = true;
4279 III.ImmOpNo = 1;
4280 III.OpNoForForwarding = 2;
4281 III.ImmMustBeMultipleOf = 4;
4282 switch(Opc) {
4283 default: llvm_unreachable("Unknown opcode");
4284 case PPC::LXVX:
4285 III.ImmOpcode = PPC::LXV;
4286 III.ImmMustBeMultipleOf = 16;
4287 break;
4288 case PPC::LXSSPX:
4289 if (PostRA) {
4290 if (IsVFReg)
4291 III.ImmOpcode = PPC::LXSSP;
4292 else {
4293 III.ImmOpcode = PPC::LFS;
4294 III.ImmMustBeMultipleOf = 1;
4295 }
4296 break;
4297 }
4298 [[fallthrough]];
4299 case PPC::XFLOADf32:
4300 III.ImmOpcode = PPC::DFLOADf32;
4301 break;
4302 case PPC::LXSDX:
4303 if (PostRA) {
4304 if (IsVFReg)
4305 III.ImmOpcode = PPC::LXSD;
4306 else {
4307 III.ImmOpcode = PPC::LFD;
4308 III.ImmMustBeMultipleOf = 1;
4309 }
4310 break;
4311 }
4312 [[fallthrough]];
4313 case PPC::XFLOADf64:
4314 III.ImmOpcode = PPC::DFLOADf64;
4315 break;
4316 case PPC::STXVX:
4317 III.ImmOpcode = PPC::STXV;
4318 III.ImmMustBeMultipleOf = 16;
4319 break;
4320 case PPC::STXSSPX:
4321 if (PostRA) {
4322 if (IsVFReg)
4323 III.ImmOpcode = PPC::STXSSP;
4324 else {
4325 III.ImmOpcode = PPC::STFS;
4326 III.ImmMustBeMultipleOf = 1;
4327 }
4328 break;
4329 }
4330 [[fallthrough]];
4331 case PPC::XFSTOREf32:
4332 III.ImmOpcode = PPC::DFSTOREf32;
4333 break;
4334 case PPC::STXSDX:
4335 if (PostRA) {
4336 if (IsVFReg)
4337 III.ImmOpcode = PPC::STXSD;
4338 else {
4339 III.ImmOpcode = PPC::STFD;
4340 III.ImmMustBeMultipleOf = 1;
4341 }
4342 break;
4343 }
4344 [[fallthrough]];
4345 case PPC::XFSTOREf64:
4346 III.ImmOpcode = PPC::DFSTOREf64;
4347 break;
4348 }
4349 break;
4350 }
4351 return true;
4352}
4353
4354// Utility function for swaping two arbitrary operands of an instruction.
4355static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4356 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4357
4358 unsigned MaxOp = std::max(Op1, Op2);
4359 unsigned MinOp = std::min(Op1, Op2);
4360 MachineOperand MOp1 = MI.getOperand(MinOp);
4361 MachineOperand MOp2 = MI.getOperand(MaxOp);
4362 MI.removeOperand(std::max(Op1, Op2));
4363 MI.removeOperand(std::min(Op1, Op2));
4364
4365 // If the operands we are swapping are the two at the end (the common case)
4366 // we can just remove both and add them in the opposite order.
4367 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4368 MI.addOperand(MOp2);
4369 MI.addOperand(MOp1);
4370 } else {
4371 // Store all operands in a temporary vector, remove them and re-add in the
4372 // right order.
4374 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4375 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4376 MOps.push_back(MI.getOperand(i));
4377 MI.removeOperand(i);
4378 }
4379 // MOp2 needs to be added next.
4380 MI.addOperand(MOp2);
4381 // Now add the rest.
4382 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4383 if (i == MaxOp)
4384 MI.addOperand(MOp1);
4385 else {
4386 MI.addOperand(MOps.back());
4387 MOps.pop_back();
4388 }
4389 }
4390 }
4391}
4392
4393// Check if the 'MI' that has the index OpNoForForwarding
4394// meets the requirement described in the ImmInstrInfo.
4395bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4396 const ImmInstrInfo &III,
4397 unsigned OpNoForForwarding
4398 ) const {
4399 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4400 // would not work pre-RA, we can only do the check post RA.
4401 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4402 if (MRI.isSSA())
4403 return false;
4404
4405 // Cannot do the transform if MI isn't summing the operands.
4406 if (!III.IsSummingOperands)
4407 return false;
4408
4409 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4410 if (!III.ZeroIsSpecialOrig)
4411 return false;
4412
4413 // We cannot do the transform if the operand we are trying to replace
4414 // isn't the same as the operand the instruction allows.
4415 if (OpNoForForwarding != III.OpNoForForwarding)
4416 return false;
4417
4418 // Check if the instruction we are trying to transform really has
4419 // the special zero register as its operand.
4420 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4421 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4422 return false;
4423
4424 // This machine instruction is convertible if it is,
4425 // 1. summing the operands.
4426 // 2. one of the operands is special zero register.
4427 // 3. the operand we are trying to replace is allowed by the MI.
4428 return true;
4429}
4430
4431// Check if the DefMI is the add inst and set the ImmMO and RegMO
4432// accordingly.
4433bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4434 const ImmInstrInfo &III,
4435 MachineOperand *&ImmMO,
4436 MachineOperand *&RegMO) const {
4437 unsigned Opc = DefMI.getOpcode();
4438 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4439 return false;
4440
4441 assert(DefMI.getNumOperands() >= 3 &&
4442 "Add inst must have at least three operands");
4443 RegMO = &DefMI.getOperand(1);
4444 ImmMO = &DefMI.getOperand(2);
4445
4446 // Before RA, ADDI first operand could be a frame index.
4447 if (!RegMO->isReg())
4448 return false;
4449
4450 // This DefMI is elgible for forwarding if it is:
4451 // 1. add inst
4452 // 2. one of the operands is Imm/CPI/Global.
4453 return isAnImmediateOperand(*ImmMO);
4454}
4455
4456bool PPCInstrInfo::isRegElgibleForForwarding(
4457 const MachineOperand &RegMO, const MachineInstr &DefMI,
4458 const MachineInstr &MI, bool KillDefMI,
4459 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4460 // x = addi y, imm
4461 // ...
4462 // z = lfdx 0, x -> z = lfd imm(y)
4463 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4464 // of "y" between the DEF of "x" and "z".
4465 // The query is only valid post RA.
4466 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4467 if (MRI.isSSA())
4468 return false;
4469
4470 Register Reg = RegMO.getReg();
4471
4472 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4474 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4475 It++;
4476 for (; It != E; ++It) {
4477 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4478 return false;
4479 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4480 IsFwdFeederRegKilled = true;
4481 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4482 SeenIntermediateUse = true;
4483 // Made it to DefMI without encountering a clobber.
4484 if ((&*It) == &DefMI)
4485 break;
4486 }
4487 assert((&*It) == &DefMI && "DefMI is missing");
4488
4489 // If DefMI also defines the register to be forwarded, we can only forward it
4490 // if DefMI is being erased.
4491 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4492 return KillDefMI;
4493
4494 return true;
4495}
4496
4497bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4498 const MachineInstr &DefMI,
4499 const ImmInstrInfo &III,
4500 int64_t &Imm,
4501 int64_t BaseImm) const {
4502 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4503 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4504 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4505 // However, we know that, it is 16-bit width, and has the alignment of 4.
4506 // Check if the instruction met the requirement.
4507 if (III.ImmMustBeMultipleOf > 4 ||
4508 III.TruncateImmTo || III.ImmWidth != 16)
4509 return false;
4510
4511 // Going from XForm to DForm loads means that the displacement needs to be
4512 // not just an immediate but also a multiple of 4, or 16 depending on the
4513 // load. A DForm load cannot be represented if it is a multiple of say 2.
4514 // XForm loads do not have this restriction.
4515 if (ImmMO.isGlobal()) {
4516 const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
4518 return false;
4519 }
4520
4521 return true;
4522 }
4523
4524 if (ImmMO.isImm()) {
4525 // It is Imm, we need to check if the Imm fit the range.
4526 // Sign-extend to 64-bits.
4527 // DefMI may be folded with another imm form instruction, the result Imm is
4528 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4529 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4530 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4531 return false;
4532 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4533 return false;
4534 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4535
4536 if (Imm % III.ImmMustBeMultipleOf)
4537 return false;
4538 if (III.TruncateImmTo)
4539 Imm &= ((1 << III.TruncateImmTo) - 1);
4540 }
4541 else
4542 return false;
4543
4544 // This ImmMO is forwarded if it meets the requriement describle
4545 // in ImmInstrInfo
4546 return true;
4547}
4548
4549bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4550 unsigned OpNoForForwarding,
4551 MachineInstr **KilledDef) const {
4552 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4553 !DefMI.getOperand(1).isImm())
4554 return false;
4555
4556 MachineFunction *MF = MI.getParent()->getParent();
4558 bool PostRA = !MRI->isSSA();
4559
4560 int64_t Immediate = DefMI.getOperand(1).getImm();
4561 // Sign-extend to 64-bits.
4562 int64_t SExtImm = SignExtend64<16>(Immediate);
4563
4564 bool ReplaceWithLI = false;
4565 bool Is64BitLI = false;
4566 int64_t NewImm = 0;
4567 bool SetCR = false;
4568 unsigned Opc = MI.getOpcode();
4569 switch (Opc) {
4570 default:
4571 return false;
4572
4573 // FIXME: Any branches conditional on such a comparison can be made
4574 // unconditional. At this time, this happens too infrequently to be worth
4575 // the implementation effort, but if that ever changes, we could convert
4576 // such a pattern here.
4577 case PPC::CMPWI:
4578 case PPC::CMPLWI:
4579 case PPC::CMPDI:
4580 case PPC::CMPLDI: {
4581 // Doing this post-RA would require dataflow analysis to reliably find uses
4582 // of the CR register set by the compare.
4583 // No need to fixup killed/dead flag since this transformation is only valid
4584 // before RA.
4585 if (PostRA)
4586 return false;
4587 // If a compare-immediate is fed by an immediate and is itself an input of
4588 // an ISEL (the most common case) into a COPY of the correct register.
4589 bool Changed = false;
4590 Register DefReg = MI.getOperand(0).getReg();
4591 int64_t Comparand = MI.getOperand(2).getImm();
4592 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4593 ? (Comparand | 0xFFFFFFFFFFFF0000)
4594 : Comparand;
4595
4596 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4597 unsigned UseOpc = CompareUseMI.getOpcode();
4598 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4599 continue;
4600 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4601 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4602 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4603 unsigned RegToCopy =
4604 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4605 if (RegToCopy == PPC::NoRegister)
4606 continue;
4607 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4608 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4609 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4610 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4611 CompareUseMI.removeOperand(3);
4612 CompareUseMI.removeOperand(2);
4613 continue;
4614 }
4615 LLVM_DEBUG(
4616 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4617 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4618 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4619 // Convert to copy and remove unneeded operands.
4620 CompareUseMI.setDesc(get(PPC::COPY));
4621 CompareUseMI.removeOperand(3);
4622 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4623 CmpIselsConverted++;
4624 Changed = true;
4625 LLVM_DEBUG(CompareUseMI.dump());
4626 }
4627 if (Changed)
4628 return true;
4629 // This may end up incremented multiple times since this function is called
4630 // during a fixed-point transformation, but it is only meant to indicate the
4631 // presence of this opportunity.
4632 MissedConvertibleImmediateInstrs++;
4633 return false;
4634 }
4635
4636 // Immediate forms - may simply be convertable to an LI.
4637 case PPC::ADDI:
4638 case PPC::ADDI8: {
4639 // Does the sum fit in a 16-bit signed field?
4640 int64_t Addend = MI.getOperand(2).getImm();
4641 if (isInt<16>(Addend + SExtImm)) {
4642 ReplaceWithLI = true;
4643 Is64BitLI = Opc == PPC::ADDI8;
4644 NewImm = Addend + SExtImm;
4645 break;
4646 }
4647 return false;
4648 }
4649 case PPC::SUBFIC:
4650 case PPC::SUBFIC8: {
4651 // Only transform this if the CARRY implicit operand is dead.
4652 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4653 return false;
4654 int64_t Minuend = MI.getOperand(2).getImm();
4655 if (isInt<16>(Minuend - SExtImm)) {
4656 ReplaceWithLI = true;
4657 Is64BitLI = Opc == PPC::SUBFIC8;
4658 NewImm = Minuend - SExtImm;
4659 break;
4660 }
4661 return false;
4662 }
4663 case PPC::RLDICL:
4664 case PPC::RLDICL_rec:
4665 case PPC::RLDICL_32:
4666 case PPC::RLDICL_32_64: {
4667 // Use APInt's rotate function.
4668 int64_t SH = MI.getOperand(2).getImm();
4669 int64_t MB = MI.getOperand(3).getImm();
4670 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4671 SExtImm, true);
4672 InVal = InVal.rotl(SH);
4673 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4674 InVal &= Mask;
4675 // Can't replace negative values with an LI as that will sign-extend
4676 // and not clear the left bits. If we're setting the CR bit, we will use
4677 // ANDI_rec which won't sign extend, so that's safe.
4678 if (isUInt<15>(InVal.getSExtValue()) ||
4679 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4680 ReplaceWithLI = true;
4681 Is64BitLI = Opc != PPC::RLDICL_32;
4682 NewImm = InVal.getSExtValue();
4683 SetCR = Opc == PPC::RLDICL_rec;
4684 break;
4685 }
4686 return false;
4687 }
4688 case PPC::RLWINM:
4689 case PPC::RLWINM8:
4690 case PPC::RLWINM_rec:
4691 case PPC::RLWINM8_rec: {
4692 int64_t SH = MI.getOperand(2).getImm();
4693 int64_t MB = MI.getOperand(3).getImm();
4694 int64_t ME = MI.getOperand(4).getImm();
4695 APInt InVal(32, SExtImm, true);
4696 InVal = InVal.rotl(SH);
4697 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4698 InVal &= Mask;
4699 // Can't replace negative values with an LI as that will sign-extend
4700 // and not clear the left bits. If we're setting the CR bit, we will use
4701 // ANDI_rec which won't sign extend, so that's safe.
4702 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4703 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4704 isUInt<16>(InVal.getSExtValue()));
4705 if (ValueFits) {
4706 ReplaceWithLI = true;
4707 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4708 NewImm = InVal.getSExtValue();
4709 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4710 break;
4711 }
4712 return false;
4713 }
4714 case PPC::ORI:
4715 case PPC::ORI8:
4716 case PPC::XORI:
4717 case PPC::XORI8: {
4718 int64_t LogicalImm = MI.getOperand(2).getImm();
4719 int64_t Result = 0;
4720 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4721 Result = LogicalImm | SExtImm;
4722 else
4723 Result = LogicalImm ^ SExtImm;
4724 if (isInt<16>(Result)) {
4725 ReplaceWithLI = true;
4726 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4727 NewImm = Result;
4728 break;
4729 }
4730 return false;
4731 }
4732 }
4733
4734 if (ReplaceWithLI) {
4735 // We need to be careful with CR-setting instructions we're replacing.
4736 if (SetCR) {
4737 // We don't know anything about uses when we're out of SSA, so only
4738 // replace if the new immediate will be reproduced.
4739 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4740 if (PostRA && ImmChanged)
4741 return false;
4742
4743 if (!PostRA) {
4744 // If the defining load-immediate has no other uses, we can just replace
4745 // the immediate with the new immediate.
4746 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4747 DefMI.getOperand(1).setImm(NewImm);
4748
4749 // If we're not using the GPR result of the CR-setting instruction, we
4750 // just need to and with zero/non-zero depending on the new immediate.
4751 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4752 if (NewImm) {
4753 assert(Immediate && "Transformation converted zero to non-zero?");
4754 NewImm = Immediate;
4755 }
4756 } else if (ImmChanged)
4757 return false;
4758 }
4759 }
4760
4761 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4762 LLVM_DEBUG(MI.dump());
4763 LLVM_DEBUG(dbgs() << "Fed by:\n");
4764 LLVM_DEBUG(DefMI.dump());
4766 LII.Imm = NewImm;
4767 LII.Is64Bit = Is64BitLI;
4768 LII.SetCR = SetCR;
4769 // If we're setting the CR, the original load-immediate must be kept (as an
4770 // operand to ANDI_rec/ANDI8_rec).
4771 if (KilledDef && SetCR)
4772 *KilledDef = nullptr;
4773 replaceInstrWithLI(MI, LII);
4774
4775 if (PostRA)
4776 recomputeLivenessFlags(*MI.getParent());
4777
4778 LLVM_DEBUG(dbgs() << "With:\n");
4779 LLVM_DEBUG(MI.dump());
4780 return true;
4781 }
4782 return false;
4783}
4784
4785bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4786 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4787 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4788 bool PostRA = !MRI->isSSA();
4789 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4790 // for post-ra.
4791 if (PostRA)
4792 return false;
4793
4794 // Only handle load/store.
4795 if (!MI.mayLoadOrStore())
4796 return false;
4797
4798 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4799
4800 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4801 "MI must have x-form opcode");
4802
4803 // get Imm Form info.
4804 ImmInstrInfo III;
4805 bool IsVFReg = MI.getOperand(0).isReg()
4806 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4807 : false;
4808
4809 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4810 return false;
4811
4812 if (!III.IsSummingOperands)
4813 return false;
4814
4815 if (OpNoForForwarding != III.OpNoForForwarding)
4816 return false;
4817
4818 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4819 if (!ImmOperandMI.isImm())
4820 return false;
4821
4822 // Check DefMI.
4823 MachineOperand *ImmMO = nullptr;
4824 MachineOperand *RegMO = nullptr;
4825 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4826 return false;
4827 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4828
4829 // Check Imm.
4830 // Set ImmBase from imm instruction as base and get new Imm inside
4831 // isImmElgibleForForwarding.
4832 int64_t ImmBase = ImmOperandMI.getImm();
4833 int64_t Imm = 0;
4834 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4835 return false;
4836
4837 // Do the transform
4838 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4839 LLVM_DEBUG(MI.dump());
4840 LLVM_DEBUG(dbgs() << "Fed by:\n");
4841 LLVM_DEBUG(DefMI.dump());
4842
4843 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4844 MI.getOperand(III.ImmOpNo).setImm(Imm);
4845
4846 LLVM_DEBUG(dbgs() << "With:\n");
4847 LLVM_DEBUG(MI.dump());
4848 return true;
4849}
4850
4851// If an X-Form instruction is fed by an add-immediate and one of its operands
4852// is the literal zero, attempt to forward the source of the add-immediate to
4853// the corresponding D-Form instruction with the displacement coming from
4854// the immediate being added.
4855bool PPCInstrInfo::transformToImmFormFedByAdd(
4856 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4857 MachineInstr &DefMI, bool KillDefMI) const {
4858 // RegMO ImmMO
4859 // | |
4860 // x = addi reg, imm <----- DefMI
4861 // y = op 0 , x <----- MI
4862 // |
4863 // OpNoForForwarding
4864 // Check if the MI meet the requirement described in the III.
4865 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4866 return false;
4867
4868 // Check if the DefMI meet the requirement
4869 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4870 MachineOperand *ImmMO = nullptr;
4871 MachineOperand *RegMO = nullptr;
4872 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4873 return false;
4874 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4875
4876 // As we get the Imm operand now, we need to check if the ImmMO meet
4877 // the requirement described in the III. If yes set the Imm.
4878 int64_t Imm = 0;
4879 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4880 return false;
4881
4882 bool IsFwdFeederRegKilled = false;
4883 bool SeenIntermediateUse = false;
4884 // Check if the RegMO can be forwarded to MI.
4885 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4886 IsFwdFeederRegKilled, SeenIntermediateUse))
4887 return false;
4888
4889 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4890 bool PostRA = !MRI.isSSA();
4891
4892 // We know that, the MI and DefMI both meet the pattern, and
4893 // the Imm also meet the requirement with the new Imm-form.
4894 // It is safe to do the transformation now.
4895 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4896 LLVM_DEBUG(MI.dump());
4897 LLVM_DEBUG(dbgs() << "Fed by:\n");
4898 LLVM_DEBUG(DefMI.dump());
4899
4900 // Update the base reg first.
4901 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4902 false, false,
4903 RegMO->isKill());
4904
4905 // Then, update the imm.
4906 if (ImmMO->isImm()) {
4907 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4908 // directly.
4910 }
4911 else {
4912 // Otherwise, it is Constant Pool Index(CPI) or Global,
4913 // which is relocation in fact. We need to replace the special zero
4914 // register with ImmMO.
4915 // Before that, we need to fixup the target flags for imm.
4916 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4917 if (DefMI.getOpcode() == PPC::ADDItocL8)
4919
4920 // MI didn't have the interface such as MI.setOperand(i) though
4921 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4922 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4923 // and, add the ImmMO, then, move back all the operands behind ZERO.
4925 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4926 MOps.push_back(MI.getOperand(i));
4927 MI.removeOperand(i);
4928 }
4929
4930 // Remove the last MO in the list, which is ZERO operand in fact.
4931 MOps.pop_back();
4932 // Add the imm operand.
4933 MI.addOperand(*ImmMO);
4934 // Now add the rest back.
4935 for (auto &MO : MOps)
4936 MI.addOperand(MO);
4937 }
4938
4939 // Update the opcode.
4940 MI.setDesc(get(III.ImmOpcode));
4941
4942 if (PostRA)
4943 recomputeLivenessFlags(*MI.getParent());
4944 LLVM_DEBUG(dbgs() << "With:\n");
4945 LLVM_DEBUG(MI.dump());
4946
4947 return true;
4948}
4949
4950bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4951 const ImmInstrInfo &III,
4952 unsigned ConstantOpNo,
4953 MachineInstr &DefMI) const {
4954 // DefMI must be LI or LI8.
4955 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4956 !DefMI.getOperand(1).isImm())
4957 return false;
4958
4959 // Get Imm operand and Sign-extend to 64-bits.
4960 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4961
4962 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4963 bool PostRA = !MRI.isSSA();
4964 // Exit early if we can't convert this.
4965 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4966 return false;
4967 if (Imm % III.ImmMustBeMultipleOf)
4968 return false;
4969 if (III.TruncateImmTo)
4970 Imm &= ((1 << III.TruncateImmTo) - 1);
4971 if (III.SignedImm) {
4972 APInt ActualValue(64, Imm, true);
4973 if (!ActualValue.isSignedIntN(III.ImmWidth))
4974 return false;
4975 } else {
4976 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4977 if ((uint64_t)Imm > UnsignedMax)
4978 return false;
4979 }
4980
4981 // If we're post-RA, the instructions don't agree on whether register zero is
4982 // special, we can transform this as long as the register operand that will
4983 // end up in the location where zero is special isn't R0.
4984 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4985 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4986 III.ZeroIsSpecialNew + 1;
4987 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
4988 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
4989 // If R0 is in the operand where zero is special for the new instruction,
4990 // it is unsafe to transform if the constant operand isn't that operand.
4991 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
4992 ConstantOpNo != III.ZeroIsSpecialNew)
4993 return false;
4994 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
4995 ConstantOpNo != PosForOrigZero)
4996 return false;
4997 }
4998
4999 unsigned Opc = MI.getOpcode();
5000 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5001 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5002 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5003 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5004 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5005 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5006 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5007 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5008 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5009 Opc == PPC::SRD_rec;
5010
5011 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5012 LLVM_DEBUG(MI.dump());
5013 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5014 LLVM_DEBUG(DefMI.dump());
5015 MI.setDesc(get(III.ImmOpcode));
5016 if (ConstantOpNo == III.OpNoForForwarding) {
5017 // Converting shifts to immediate form is a bit tricky since they may do
5018 // one of three things:
5019 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5020 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5021 // setting CR0)
5022 // 3. If the shift amount is in [1, OpSize), it's just a shift
5023 if (SpecialShift32 || SpecialShift64) {
5025 LII.Imm = 0;
5026 LII.SetCR = SetCR;
5027 LII.Is64Bit = SpecialShift64;
5028 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5029 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5030 replaceInstrWithLI(MI, LII);
5031 // Shifts by zero don't change the value. If we don't need to set CR0,
5032 // just convert this to a COPY. Can't do this post-RA since we've already
5033 // cleaned up the copies.
5034 else if (!SetCR && ShAmt == 0 && !PostRA) {
5035 MI.removeOperand(2);
5036 MI.setDesc(get(PPC::COPY));
5037 } else {
5038 // The 32 bit and 64 bit instructions are quite different.
5039 if (SpecialShift32) {
5040 // Left shifts use (N, 0, 31-N).
5041 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5042 // use (0, 0, 31) if N == 0.
5043 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5044 uint64_t MB = RightShift ? ShAmt : 0;
5045 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5047 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5048 .addImm(ME);
5049 } else {
5050 // Left shifts use (N, 63-N).
5051 // Right shifts use (64-N, N) if 0 < N < 64.
5052 // use (0, 0) if N == 0.
5053 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5054 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5056 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5057 }
5058 }
5059 } else
5060 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5061 }
5062 // Convert commutative instructions (switch the operands and convert the
5063 // desired one to an immediate.
5064 else if (III.IsCommutative) {
5065 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5066 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5067 } else
5068 llvm_unreachable("Should have exited early!");
5069
5070 // For instructions for which the constant register replaces a different
5071 // operand than where the immediate goes, we need to swap them.
5072 if (III.OpNoForForwarding != III.ImmOpNo)
5074
5075 // If the special R0/X0 register index are different for original instruction
5076 // and new instruction, we need to fix up the register class in new
5077 // instruction.
5078 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5079 if (III.ZeroIsSpecialNew) {
5080 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5081 // need to fix up register class.
5082 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5083 if (RegToModify.isVirtual()) {
5084 const TargetRegisterClass *NewRC =
5085 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5086 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5087 MRI.setRegClass(RegToModify, NewRC);
5088 }
5089 }
5090 }
5091
5092 if (PostRA)
5093 recomputeLivenessFlags(*MI.getParent());
5094
5095 LLVM_DEBUG(dbgs() << "With: ");
5096 LLVM_DEBUG(MI.dump());
5097 LLVM_DEBUG(dbgs() << "\n");
5098 return true;
5099}
5100
5101const TargetRegisterClass *
5103 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5104 return &PPC::VSRCRegClass;
5105 return RC;
5106}
5107
5109 return PPC::getRecordFormOpcode(Opcode);
5110}
5111
5112static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5113 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5114 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5115 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5116 Opcode == PPC::LHZUX8);
5117}
5118
5119// This function checks for sign extension from 32 bits to 64 bits.
5120static bool definedBySignExtendingOp(const unsigned Reg,
5121 const MachineRegisterInfo *MRI) {
5123 return false;
5124
5125 MachineInstr *MI = MRI->getVRegDef(Reg);
5126 if (!MI)
5127 return false;
5128
5129 int Opcode = MI->getOpcode();
5130 const PPCInstrInfo *TII =
5131 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5132 if (TII->isSExt32To64(Opcode))
5133 return true;
5134
5135 // The first def of LBZU/LHZU is sign extended.
5136 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5137 return true;
5138
5139 // RLDICL generates sign-extended output if it clears at least
5140 // 33 bits from the left (MSB).
5141 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5142 return true;
5143
5144 // If at least one bit from left in a lower word is masked out,
5145 // all of 0 to 32-th bits of the output are cleared.
5146 // Hence the output is already sign extended.
5147 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5148 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5149 MI->getOperand(3).getImm() > 0 &&
5150 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5151 return true;
5152
5153 // If the most significant bit of immediate in ANDIS is zero,
5154 // all of 0 to 32-th bits are cleared.
5155 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5156 uint16_t Imm = MI->getOperand(2).getImm();
5157 if ((Imm & 0x8000) == 0)
5158 return true;
5159 }
5160
5161 return false;
5162}
5163
5164// This function checks the machine instruction that defines the input register
5165// Reg. If that machine instruction always outputs a value that has only zeros
5166// in the higher 32 bits then this function will return true.
5167static bool definedByZeroExtendingOp(const unsigned Reg,
5168 const MachineRegisterInfo *MRI) {
5170 return false;
5171
5172 MachineInstr *MI = MRI->getVRegDef(Reg);
5173 if (!MI)
5174 return false;
5175
5176 int Opcode = MI->getOpcode();
5177 const PPCInstrInfo *TII =
5178 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5179 if (TII->isZExt32To64(Opcode))
5180 return true;
5181
5182 // The first def of LBZU/LHZU/LWZU are zero extended.
5183 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5184 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5185 MI->getOperand(0).getReg() == Reg)
5186 return true;
5187
5188 // The 16-bit immediate is sign-extended in li/lis.
5189 // If the most significant bit is zero, all higher bits are zero.
5190 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5191 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5192 int64_t Imm = MI->getOperand(1).getImm();
5193 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5194 return true;
5195 }
5196
5197 // We have some variations of rotate-and-mask instructions
5198 // that clear higher 32-bits.
5199 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5200 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5201 Opcode == PPC::RLDICL_32_64) &&
5202 MI->getOperand(3).getImm() >= 32)
5203 return true;
5204
5205 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5206 MI->getOperand(3).getImm() >= 32 &&
5207 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5208 return true;
5209
5210 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5211 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5212 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5213 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5214 return true;
5215
5216 return false;
5217}
5218
5219// This function returns true if the input MachineInstr is a TOC save
5220// instruction.
5222 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5223 return false;
5224 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5225 unsigned StackOffset = MI.getOperand(1).getImm();
5226 Register StackReg = MI.getOperand(2).getReg();
5227 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5228 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5229 return true;
5230
5231 return false;
5232}
5233
5234// We limit the max depth to track incoming values of PHIs or binary ops
5235// (e.g. AND) to avoid excessive cost.
5236const unsigned MAX_BINOP_DEPTH = 1;
5237// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5238// does not count all of the recursions. The parameter BinOpDepth is incremented
5239// only when isSignOrZeroExtended calls itself more than once. This is done to
5240// prevent expontential recursion. There is no parameter to track linear
5241// recursion.
5242std::pair<bool, bool>
5244 const unsigned BinOpDepth,
5245 const MachineRegisterInfo *MRI) const {
5247 return std::pair<bool, bool>(false, false);
5248
5249 MachineInstr *MI = MRI->getVRegDef(Reg);
5250 if (!MI)
5251 return std::pair<bool, bool>(false, false);
5252
5253 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5254 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5255
5256 // If we know the instruction always returns sign- and zero-extended result,
5257 // return here.
5258 if (IsSExt && IsZExt)
5259 return std::pair<bool, bool>(IsSExt, IsZExt);
5260
5261 switch (MI->getOpcode()) {
5262 case PPC::COPY: {
5263 Register SrcReg = MI->getOperand(1).getReg();
5264
5265 // In both ELFv1 and v2 ABI, method parameters and the return value
5266 // are sign- or zero-extended.
5267 const MachineFunction *MF = MI->getMF();
5268
5269 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5270 // If this is a copy from another register, we recursively check source.
5271 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5272 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5273 SrcExt.second || IsZExt);
5274 }
5275
5276 // From here on everything is SVR4ABI
5277 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5278 // We check the ZExt/SExt flags for a method parameter.
5279 if (MI->getParent()->getBasicBlock() ==
5280 &MF->getFunction().getEntryBlock()) {
5281 Register VReg = MI->getOperand(0).getReg();
5282 if (MF->getRegInfo().isLiveIn(VReg)) {
5283 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5284 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5285 return std::pair<bool, bool>(IsSExt, IsZExt);
5286 }
5287 }
5288
5289 if (SrcReg != PPC::X3) {
5290 // If this is a copy from another register, we recursively check source.
5291 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5292 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5293 SrcExt.second || IsZExt);
5294 }
5295
5296 // For a method return value, we check the ZExt/SExt flags in attribute.
5297 // We assume the following code sequence for method call.
5298 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5299 // BL8_NOP @func,...
5300 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5301 // %5 = COPY %x3; G8RC:%5
5302 const MachineBasicBlock *MBB = MI->getParent();
5303 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5306 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5307 return IsExtendPair;
5308
5309 const MachineInstr &CallMI = *(--II);
5310 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5311 return IsExtendPair;
5312
5313 const Function *CalleeFn =
5314 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5315 if (!CalleeFn)
5316 return IsExtendPair;
5317 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5318 if (IntTy && IntTy->getBitWidth() <= 32) {
5319 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5320 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5321 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5322 return std::pair<bool, bool>(IsSExt, IsZExt);
5323 }
5324
5325 return IsExtendPair;
5326 }
5327
5328 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5329 // So, we track the operand register as we do for register copy.
5330 case PPC::ORI:
5331 case PPC::XORI:
5332 case PPC::ORI8:
5333 case PPC::XORI8: {
5334 Register SrcReg = MI->getOperand(1).getReg();
5335 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5336 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5337 SrcExt.second || IsZExt);
5338 }
5339
5340 // OR, XOR with shifted 16-bit immediate does not change the upper
5341 // 32 bits. So, we track the operand register for zero extension.
5342 // For sign extension when the MSB of the immediate is zero, we also
5343 // track the operand register since the upper 33 bits are unchanged.
5344 case PPC::ORIS:
5345 case PPC::XORIS:
5346 case PPC::ORIS8:
5347 case PPC::XORIS8: {
5348 Register SrcReg = MI->getOperand(1).getReg();
5349 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5350 uint16_t Imm = MI->getOperand(2).getImm();
5351 if (Imm & 0x8000)
5352 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5353 else
5354 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5355 SrcExt.second || IsZExt);
5356 }
5357
5358 // If all incoming values are sign-/zero-extended,
5359 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5360 case PPC::OR:
5361 case PPC::OR8:
5362 case PPC::ISEL:
5363 case PPC::PHI: {
5364 if (BinOpDepth >= MAX_BINOP_DEPTH)
5365 return std::pair<bool, bool>(false, false);
5366
5367 // The input registers for PHI are operand 1, 3, ...
5368 // The input registers for others are operand 1 and 2.
5369 unsigned OperandEnd = 3, OperandStride = 1;
5370 if (MI->getOpcode() == PPC::PHI) {
5371 OperandEnd = MI->getNumOperands();
5372 OperandStride = 2;
5373 }
5374
5375 IsSExt = true;
5376 IsZExt = true;
5377 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5378 if (!MI->getOperand(I).isReg())
5379 return std::pair<bool, bool>(false, false);
5380
5381 Register SrcReg = MI->getOperand(I).getReg();
5382 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5383 IsSExt &= SrcExt.first;
5384 IsZExt &= SrcExt.second;
5385 }
5386 return std::pair<bool, bool>(IsSExt, IsZExt);
5387 }
5388
5389 // If at least one of the incoming values of an AND is zero extended
5390 // then the output is also zero-extended. If both of the incoming values
5391 // are sign-extended then the output is also sign extended.
5392 case PPC::AND:
5393 case PPC::AND8: {
5394 if (BinOpDepth >= MAX_BINOP_DEPTH)
5395 return std::pair<bool, bool>(false, false);
5396
5397 Register SrcReg1 = MI->getOperand(1).getReg();
5398 Register SrcReg2 = MI->getOperand(2).getReg();
5399 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5400 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5401 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5402 Src1Ext.second || Src2Ext.second);
5403 }
5404
5405 default:
5406 break;
5407 }
5408 return std::pair<bool, bool>(IsSExt, IsZExt);
5409}
5410
5411bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5412 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5413}
5414
5415namespace {
5416class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5417 MachineInstr *Loop, *EndLoop, *LoopCount;
5418 MachineFunction *MF;
5419 const TargetInstrInfo *TII;
5420 int64_t TripCount;
5421
5422public:
5423 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5424 MachineInstr *LoopCount)
5425 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5426 MF(Loop->getParent()->getParent()),
5427 TII(MF->getSubtarget().getInstrInfo()) {
5428 // Inspect the Loop instruction up-front, as it may be deleted when we call
5429 // createTripCountGreaterCondition.
5430 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5431 TripCount = LoopCount->getOperand(1).getImm();
5432 else
5433 TripCount = -1;
5434 }
5435
5436 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5437 // Only ignore the terminator.
5438 return MI == EndLoop;
5439 }
5440
5441 std::optional<bool> createTripCountGreaterCondition(
5442 int TC, MachineBasicBlock &MBB,
5444 if (TripCount == -1) {
5445 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5446 // so we don't need to generate any thing here.
5447 Cond.push_back(MachineOperand::CreateImm(0));
5449 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5450 true));
5451 return {};
5452 }
5453
5454 return TripCount > TC;
5455 }
5456
5457 void setPreheader(MachineBasicBlock *NewPreheader) override {
5458 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5459 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5460 }
5461
5462 void adjustTripCount(int TripCountAdjust) override {
5463 // If the loop trip count is a compile-time value, then just change the
5464 // value.
5465 if (LoopCount->getOpcode() == PPC::LI8 ||
5466 LoopCount->getOpcode() == PPC::LI) {
5467 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5468 LoopCount->getOperand(1).setImm(TripCount);
5469 return;
5470 }
5471
5472 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5473 // so we don't need to generate any thing here.
5474 }
5475
5476 void disposed() override {
5477 Loop->eraseFromParent();
5478 // Ensure the loop setup instruction is deleted too.
5479 LoopCount->eraseFromParent();
5480 }
5481};
5482} // namespace
5483
5484std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5486 // We really "analyze" only hardware loops right now.
5488 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5489 if (Preheader == LoopBB)
5490 Preheader = *std::next(LoopBB->pred_begin());
5491 MachineFunction *MF = Preheader->getParent();
5492
5493 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5495 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5496 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5498 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5499 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5500 }
5501 }
5502 return nullptr;
5503}
5504
5506 MachineBasicBlock &PreHeader,
5507 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5508
5509 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5510
5511 // The loop set-up instruction should be in preheader
5512 for (auto &I : PreHeader.instrs())
5513 if (I.getOpcode() == LOOPi)
5514 return &I;
5515 return nullptr;
5516}
5517
5518// Return true if get the base operand, byte offset of an instruction and the
5519// memory width. Width is the size of memory that is being loaded/stored.
5521 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5522 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5523 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5524 return false;
5525
5526 // Handle only loads/stores with base register followed by immediate offset.
5527 if (!LdSt.getOperand(1).isImm() ||
5528 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5529 return false;
5530 if (!LdSt.getOperand(1).isImm() ||
5531 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5532 return false;
5533
5534 if (!LdSt.hasOneMemOperand())
5535 return false;
5536
5537 Width = (*LdSt.memoperands_begin())->getSize();
5538 Offset = LdSt.getOperand(1).getImm();
5539 BaseReg = &LdSt.getOperand(2);
5540 return true;
5541}
5542
5544 const MachineInstr &MIa, const MachineInstr &MIb) const {
5545 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5546 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5547
5550 return false;
5551
5552 // Retrieve the base register, offset from the base register and width. Width
5553 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5554 // base registers are identical, and the offset of a lower memory access +
5555 // the width doesn't overlap the offset of a higher memory access,
5556 // then the memory accesses are different.
5558 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5559 int64_t OffsetA = 0, OffsetB = 0;
5560 LocationSize WidthA = 0, WidthB = 0;
5561 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5562 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5563 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5564 int LowOffset = std::min(OffsetA, OffsetB);
5565 int HighOffset = std::max(OffsetA, OffsetB);
5566 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5567 if (LowWidth.hasValue() &&
5568 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5569 return true;
5570 }
5571 }
5572 return false;
5573}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1158
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1111
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
const BasicBlock & getEntryBlock() const
Definition: Function.h:787
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:939
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:722
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:632
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:804
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:674
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:789
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:487
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:276
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:274
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:623
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:506
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:617
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:208
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeModel::Model getCodeModel() const
Returns the code model.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:698
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:96
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:91
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:92
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:73
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:78
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:68
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:79
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:76
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:84
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:77
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:70
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:71
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:74
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_SPESpill
Definition: PPCInstrInfo.h:83
@ SOK_CRSpill
Definition: PPCInstrInfo.h:72
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:80
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:69
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:85
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:55
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:45
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:35
uint64_t IsCommutative
Definition: PPCInstrInfo.h:43
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:41
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:53
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:38
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.