LLVM 19.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
38#include "llvm/MC/MCAsmInfo.h"
39#include "llvm/MC/MCInst.h"
42#include "llvm/Support/Debug.h"
45
46using namespace llvm;
47
48#define DEBUG_TYPE "ppc-instr-info"
49
50#define GET_INSTRMAP_INFO
51#define GET_INSTRINFO_CTOR_DTOR
52#include "PPCGenInstrInfo.inc"
53
54STATISTIC(NumStoreSPILLVSRRCAsVec,
55 "Number of spillvsrrc spilled to stack as vec");
56STATISTIC(NumStoreSPILLVSRRCAsGpr,
57 "Number of spillvsrrc spilled to stack as gpr");
58STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
59STATISTIC(CmpIselsConverted,
60 "Number of ISELs that depend on comparison of constants converted");
61STATISTIC(MissedConvertibleImmediateInstrs,
62 "Number of compare-immediate instructions fed by constants");
63STATISTIC(NumRcRotatesConvertedToRcAnd,
64 "Number of record-form rotates converted to record-form andi");
65
66static cl::
67opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
68 cl::desc("Disable analysis for CTR loops"));
69
70static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
71cl::desc("Disable compare instruction optimization"), cl::Hidden);
72
73static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
74cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
76
77static cl::opt<bool>
78UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
79 cl::desc("Use the old (incorrect) instruction latency calculation"));
80
81static cl::opt<float>
82 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
83 cl::desc("register pressure factor for the transformations."));
84
86 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
87 cl::desc("enable register pressure reduce in machine combiner pass."));
88
89// Pin the vtable to this file.
90void PPCInstrInfo::anchor() {}
91
93 : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
94 /* CatchRetOpcode */ -1,
95 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
96 Subtarget(STI), RI(STI.getTargetMachine()) {}
97
98/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
99/// this target when scheduling the DAG.
102 const ScheduleDAG *DAG) const {
103 unsigned Directive =
104 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
107 const InstrItineraryData *II =
108 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
109 return new ScoreboardHazardRecognizer(II, DAG);
110 }
111
113}
114
115/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
116/// to use for this target when scheduling the DAG.
119 const ScheduleDAG *DAG) const {
120 unsigned Directive =
121 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
122
123 // FIXME: Leaving this as-is until we have POWER9 scheduling info
125 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
126
127 // Most subtargets use a PPC970 recognizer.
130 assert(DAG->TII && "No InstrInfo?");
131
132 return new PPCHazardRecognizer970(*DAG);
133 }
134
135 return new ScoreboardHazardRecognizer(II, DAG);
136}
137
139 const MachineInstr &MI,
140 unsigned *PredCost) const {
141 if (!ItinData || UseOldLatencyCalc)
142 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
143
144 // The default implementation of getInstrLatency calls getStageLatency, but
145 // getStageLatency does not do the right thing for us. While we have
146 // itinerary, most cores are fully pipelined, and so the itineraries only
147 // express the first part of the pipeline, not every stage. Instead, we need
148 // to use the listed output operand cycle number (using operand 0 here, which
149 // is an output).
150
151 unsigned Latency = 1;
152 unsigned DefClass = MI.getDesc().getSchedClass();
153 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
154 const MachineOperand &MO = MI.getOperand(i);
155 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
156 continue;
157
158 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
159 if (!Cycle)
160 continue;
161
162 Latency = std::max(Latency, *Cycle);
163 }
164
165 return Latency;
166}
167
168std::optional<unsigned> PPCInstrInfo::getOperandLatency(
169 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
170 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
171 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
172 ItinData, DefMI, DefIdx, UseMI, UseIdx);
173
174 if (!DefMI.getParent())
175 return Latency;
176
177 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
178 Register Reg = DefMO.getReg();
179
180 bool IsRegCR;
181 if (Reg.isVirtual()) {
182 const MachineRegisterInfo *MRI =
183 &DefMI.getParent()->getParent()->getRegInfo();
184 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
185 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
186 } else {
187 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
188 PPC::CRBITRCRegClass.contains(Reg);
189 }
190
191 if (UseMI.isBranch() && IsRegCR) {
192 if (!Latency)
193 Latency = getInstrLatency(ItinData, DefMI);
194
195 // On some cores, there is an additional delay between writing to a condition
196 // register, and using it from a branch.
197 unsigned Directive = Subtarget.getCPUDirective();
198 switch (Directive) {
199 default: break;
200 case PPC::DIR_7400:
201 case PPC::DIR_750:
202 case PPC::DIR_970:
203 case PPC::DIR_E5500:
204 case PPC::DIR_PWR4:
205 case PPC::DIR_PWR5:
206 case PPC::DIR_PWR5X:
207 case PPC::DIR_PWR6:
208 case PPC::DIR_PWR6X:
209 case PPC::DIR_PWR7:
210 case PPC::DIR_PWR8:
211 // FIXME: Is this needed for POWER9?
212 Latency = *Latency + 2;
213 break;
214 }
215 }
216
217 return Latency;
218}
219
221 uint32_t Flags) const {
222 MI.setFlags(Flags);
226}
227
228// This function does not list all associative and commutative operations, but
229// only those worth feeding through the machine combiner in an attempt to
230// reduce the critical path. Mostly, this means floating-point operations,
231// because they have high latencies(>=5) (compared to other operations, such as
232// and/or, which are also associative and commutative, but have low latencies).
234 bool Invert) const {
235 if (Invert)
236 return false;
237 switch (Inst.getOpcode()) {
238 // Floating point:
239 // FP Add:
240 case PPC::FADD:
241 case PPC::FADDS:
242 // FP Multiply:
243 case PPC::FMUL:
244 case PPC::FMULS:
245 // Altivec Add:
246 case PPC::VADDFP:
247 // VSX Add:
248 case PPC::XSADDDP:
249 case PPC::XVADDDP:
250 case PPC::XVADDSP:
251 case PPC::XSADDSP:
252 // VSX Multiply:
253 case PPC::XSMULDP:
254 case PPC::XVMULDP:
255 case PPC::XVMULSP:
256 case PPC::XSMULSP:
259 // Fixed point:
260 // Multiply:
261 case PPC::MULHD:
262 case PPC::MULLD:
263 case PPC::MULHW:
264 case PPC::MULLW:
265 return true;
266 default:
267 return false;
268 }
269}
270
271#define InfoArrayIdxFMAInst 0
272#define InfoArrayIdxFAddInst 1
273#define InfoArrayIdxFMULInst 2
274#define InfoArrayIdxAddOpIdx 3
275#define InfoArrayIdxMULOpIdx 4
276#define InfoArrayIdxFSubInst 5
277// Array keeps info for FMA instructions:
278// Index 0(InfoArrayIdxFMAInst): FMA instruction;
279// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
280// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
281// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
282// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
283// second MUL operand index is plus 1;
284// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
285static const uint16_t FMAOpIdxInfo[][6] = {
286 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
287 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
288 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
289 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
290 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
291 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
292 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
293
294// Check if an opcode is a FMA instruction. If it is, return the index in array
295// FMAOpIdxInfo. Otherwise, return -1.
296int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
297 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
298 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
299 return I;
300 return -1;
301}
302
303// On PowerPC target, we have two kinds of patterns related to FMA:
304// 1: Improve ILP.
305// Try to reassociate FMA chains like below:
306//
307// Pattern 1:
308// A = FADD X, Y (Leaf)
309// B = FMA A, M21, M22 (Prev)
310// C = FMA B, M31, M32 (Root)
311// -->
312// A = FMA X, M21, M22
313// B = FMA Y, M31, M32
314// C = FADD A, B
315//
316// Pattern 2:
317// A = FMA X, M11, M12 (Leaf)
318// B = FMA A, M21, M22 (Prev)
319// C = FMA B, M31, M32 (Root)
320// -->
321// A = FMUL M11, M12
322// B = FMA X, M21, M22
323// D = FMA A, M31, M32
324// C = FADD B, D
325//
326// breaking the dependency between A and B, allowing FMA to be executed in
327// parallel (or back-to-back in a pipeline) instead of depending on each other.
328//
329// 2: Reduce register pressure.
330// Try to reassociate FMA with FSUB and a constant like below:
331// C is a floating point const.
332//
333// Pattern 1:
334// A = FSUB X, Y (Leaf)
335// D = FMA B, C, A (Root)
336// -->
337// A = FMA B, Y, -C
338// D = FMA A, X, C
339//
340// Pattern 2:
341// A = FSUB X, Y (Leaf)
342// D = FMA B, A, C (Root)
343// -->
344// A = FMA B, Y, -C
345// D = FMA A, X, C
346//
347// Before the transformation, A must be assigned with different hardware
348// register with D. After the transformation, A and D must be assigned with
349// same hardware register due to TIE attribute of FMA instructions.
350//
353 bool DoRegPressureReduce) const {
357
358 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
359 for (const auto &MO : Instr.explicit_operands())
360 if (!(MO.isReg() && MO.getReg().isVirtual()))
361 return false;
362 return true;
363 };
364
365 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
366 unsigned OpType) {
367 if (Instr.getOpcode() !=
368 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
369 return false;
370
371 // Instruction can be reassociated.
372 // fast math flags may prohibit reassociation.
373 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
374 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
375 return false;
376
377 // Instruction operands are virtual registers for reassociation.
378 if (!IsAllOpsVirtualReg(Instr))
379 return false;
380
381 // For register pressure reassociation, the FSub must have only one use as
382 // we want to delete the sub to save its def.
383 if (OpType == InfoArrayIdxFSubInst &&
384 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
385 return false;
386
387 return true;
388 };
389
390 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
391 int16_t &MulOpIdx, bool IsLeaf) {
392 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
393 if (Idx < 0)
394 return false;
395
396 // Instruction can be reassociated.
397 // fast math flags may prohibit reassociation.
398 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
399 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
400 return false;
401
402 // Instruction operands are virtual registers for reassociation.
403 if (!IsAllOpsVirtualReg(Instr))
404 return false;
405
407 if (IsLeaf)
408 return true;
409
411
412 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
413 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
414 // If 'add' operand's def is not in current block, don't do ILP related opt.
415 if (!MIAdd || MIAdd->getParent() != MBB)
416 return false;
417
418 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
419 // as this fma will be changed later.
420 return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
421 };
422
423 int16_t AddOpIdx = -1;
424 int16_t MulOpIdx = -1;
425
426 bool IsUsedOnceL = false;
427 bool IsUsedOnceR = false;
428 MachineInstr *MULInstrL = nullptr;
429 MachineInstr *MULInstrR = nullptr;
430
431 auto IsRPReductionCandidate = [&]() {
432 // Currently, we only support float and double.
433 // FIXME: add support for other types.
434 unsigned Opcode = Root.getOpcode();
435 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
436 return false;
437
438 // Root must be a valid FMA like instruction.
439 // Treat it as leaf as we don't care its add operand.
440 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
441 assert((MulOpIdx >= 0) && "mul operand index not right!");
442 Register MULRegL = TRI->lookThruSingleUseCopyChain(
443 Root.getOperand(MulOpIdx).getReg(), MRI);
444 Register MULRegR = TRI->lookThruSingleUseCopyChain(
445 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
446 if (!MULRegL && !MULRegR)
447 return false;
448
449 if (MULRegL && !MULRegR) {
450 MULRegR =
451 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
452 IsUsedOnceL = true;
453 } else if (!MULRegL && MULRegR) {
454 MULRegL =
455 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
456 IsUsedOnceR = true;
457 } else {
458 IsUsedOnceL = true;
459 IsUsedOnceR = true;
460 }
461
462 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
463 return false;
464
465 MULInstrL = MRI->getVRegDef(MULRegL);
466 MULInstrR = MRI->getVRegDef(MULRegR);
467 return true;
468 }
469 return false;
470 };
471
472 // Register pressure fma reassociation patterns.
473 if (DoRegPressureReduce && IsRPReductionCandidate()) {
474 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
475 // Register pressure pattern 1
476 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
477 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
478 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
480 return true;
481 }
482
483 // Register pressure pattern 2
484 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
485 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
486 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
488 return true;
489 }
490 }
491
492 // ILP fma reassociation patterns.
493 // Root must be a valid FMA like instruction.
494 AddOpIdx = -1;
495 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
496 return false;
497
498 assert((AddOpIdx >= 0) && "add operand index not right!");
499
500 Register RegB = Root.getOperand(AddOpIdx).getReg();
501 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
502
503 // Prev must be a valid FMA like instruction.
504 AddOpIdx = -1;
505 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
506 return false;
507
508 assert((AddOpIdx >= 0) && "add operand index not right!");
509
510 Register RegA = Prev->getOperand(AddOpIdx).getReg();
511 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
512 AddOpIdx = -1;
513 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
515 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
516 return true;
517 }
518 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
520 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
521 return true;
522 }
523 return false;
524}
525
527 MachineInstr &Root, unsigned &Pattern,
528 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
529 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
530
531 MachineFunction *MF = Root.getMF();
535
536 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
537 if (Idx < 0)
538 return;
539
541
542 // For now we only need to fix up placeholder for register pressure reduce
543 // patterns.
544 Register ConstReg = 0;
545 switch (Pattern) {
547 ConstReg =
548 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
549 break;
551 ConstReg =
552 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
553 break;
554 default:
555 // Not register pressure reduce patterns.
556 return;
557 }
558
559 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
560 // Get const value from const pool.
561 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
562 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
563
564 // Get negative fp const.
565 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
566 F1.changeSign();
567 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
568 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
569
570 // Put negative fp const into constant pool.
571 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
572
573 MachineOperand *Placeholder = nullptr;
574 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
575 for (auto *Inst : InsInstrs) {
576 for (MachineOperand &Operand : Inst->explicit_operands()) {
577 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
578 if (Operand.getReg() == PPC::ZERO8) {
579 Placeholder = &Operand;
580 break;
581 }
582 }
583 }
584
585 assert(Placeholder && "Placeholder does not exist!");
586
587 // Generate instructions to load the const fp from constant pool.
588 // We only support PPC64 and medium code model.
589 Register LoadNewConst =
590 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
591
592 // Fill the placeholder with the new load from constant pool.
593 Placeholder->setReg(LoadNewConst);
594}
595
597 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
598
600 return false;
601
602 // Currently, we only enable register pressure reducing in machine combiner
603 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
604 // support.
605 //
606 // So we need following instructions to access a TOC entry:
607 //
608 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
609 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
610 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
611 //
612 // FIXME: add more supported targets, like Small and Large code model, PPC32,
613 // AIX.
614 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
616 return false;
617
619 const MachineFunction *MF = MBB->getParent();
620 const MachineRegisterInfo *MRI = &MF->getRegInfo();
621
622 auto GetMBBPressure =
623 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
624 RegionPressure Pressure;
625 RegPressureTracker RPTracker(Pressure);
626
627 // Initialize the register pressure tracker.
628 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
629 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
630
631 for (const auto &MI : reverse(*MBB)) {
632 if (MI.isDebugValue() || MI.isDebugLabel())
633 continue;
634 RegisterOperands RegOpers;
635 RegOpers.collect(MI, *TRI, *MRI, false, false);
636 RPTracker.recedeSkipDebugValues();
637 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
638 RPTracker.recede(RegOpers);
639 }
640
641 // Close the RPTracker to finalize live ins.
642 RPTracker.closeRegion();
643
644 return RPTracker.getPressure().MaxSetPressure;
645 };
646
647 // For now we only care about float and double type fma.
648 unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
649 *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
650
651 // Only reduce register pressure when pressure is high.
652 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
653 (float)VSSRCLimit * FMARPFactor;
654}
655
657 // I has only one memory operand which is load from constant pool.
658 if (!I->hasOneMemOperand())
659 return false;
660
661 MachineMemOperand *Op = I->memoperands()[0];
662 return Op->isLoad() && Op->getPseudoValue() &&
663 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
664}
665
666Register PPCInstrInfo::generateLoadForNewConst(
667 unsigned Idx, MachineInstr *MI, Type *Ty,
668 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
669 // Now we only support PPC64, Medium code model and P9 with vector.
670 // We have immutable pattern to access const pool. See function
671 // shouldReduceRegisterPressure.
672 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
674 "Target not supported!\n");
675
676 MachineFunction *MF = MI->getMF();
678
679 // Generate ADDIStocHA8
680 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
681 MachineInstrBuilder TOCOffset =
682 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
683 .addReg(PPC::X2)
685
686 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
687 "Only float and double are supported!");
688
689 unsigned LoadOpcode;
690 // Should be float type or double type.
691 if (Ty->isFloatTy())
692 LoadOpcode = PPC::DFLOADf32;
693 else
694 LoadOpcode = PPC::DFLOADf64;
695
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 Register VReg2 = MRI->createVirtualRegister(RC);
701
702 // Generate Load from constant pool.
704 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
706 .addReg(VReg1, getKillRegState(true))
707 .addMemOperand(MMO);
708
709 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
710
711 // Insert the toc load instructions into InsInstrs.
712 InsInstrs.insert(InsInstrs.begin(), Load);
713 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
714 return VReg2;
715}
716
717// This function returns the const value in constant pool if the \p I is a load
718// from constant pool.
719const Constant *
721 MachineFunction *MF = I->getMF();
724 assert(I->mayLoad() && "Should be a load instruction.\n");
725 for (auto MO : I->uses()) {
726 if (!MO.isReg())
727 continue;
728 Register Reg = MO.getReg();
729 if (Reg == 0 || !Reg.isVirtual())
730 continue;
731 // Find the toc address.
732 MachineInstr *DefMI = MRI->getVRegDef(Reg);
733 for (auto MO2 : DefMI->uses())
734 if (MO2.isCPI())
735 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
736 }
737 return nullptr;
738}
739
741 switch (Pattern) {
748 default:
750 }
751}
752
755 bool DoRegPressureReduce) const {
756 // Using the machine combiner in this way is potentially expensive, so
757 // restrict to when aggressive optimizations are desired.
759 return false;
760
761 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
762 return true;
763
765 DoRegPressureReduce);
766}
767
769 MachineInstr &Root, unsigned Pattern,
772 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
773 switch (Pattern) {
778 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
779 break;
780 default:
781 // Reassociate default patterns.
783 DelInstrs, InstrIdxForVirtReg);
784 break;
785 }
786}
787
788void PPCInstrInfo::reassociateFMA(
789 MachineInstr &Root, unsigned Pattern,
792 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
793 MachineFunction *MF = Root.getMF();
796 MachineOperand &OpC = Root.getOperand(0);
797 Register RegC = OpC.getReg();
798 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
799 MRI.constrainRegClass(RegC, RC);
800
801 unsigned FmaOp = Root.getOpcode();
802 int16_t Idx = getFMAOpIdxInfo(FmaOp);
803 assert(Idx >= 0 && "Root must be a FMA instruction");
804
805 bool IsILPReassociate =
808
811
812 MachineInstr *Prev = nullptr;
813 MachineInstr *Leaf = nullptr;
814 switch (Pattern) {
815 default:
816 llvm_unreachable("not recognized pattern!");
819 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
820 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
821 break;
823 Register MULReg =
824 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
825 Leaf = MRI.getVRegDef(MULReg);
826 break;
827 }
829 Register MULReg = TRI->lookThruCopyLike(
830 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
831 Leaf = MRI.getVRegDef(MULReg);
832 break;
833 }
834 }
835
836 uint32_t IntersectedFlags = 0;
837 if (IsILPReassociate)
838 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
839 else
840 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
841
842 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
843 bool &KillFlag) {
844 Reg = Operand.getReg();
845 MRI.constrainRegClass(Reg, RC);
846 KillFlag = Operand.isKill();
847 };
848
849 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
850 Register &MulOp2, Register &AddOp,
851 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
852 bool &AddOpKillFlag) {
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
854 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
855 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
856 };
857
858 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
859 RegA21, RegB;
860 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
861 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
862 KillA11 = false, KillA21 = false, KillB = false;
863
864 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
865
866 if (IsILPReassociate)
867 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
868
870 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
871 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
874 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
875 } else {
876 // Get FSUB instruction info.
877 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
878 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
879 }
880
881 // Create new virtual registers for the new results instead of
882 // recycling legacy ones because the MachineCombiner's computation of the
883 // critical path requires a new register definition rather than an existing
884 // one.
885 // For register pressure reassociation, we only need create one virtual
886 // register for the new fma.
887 Register NewVRA = MRI.createVirtualRegister(RC);
888 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
889
890 Register NewVRB = 0;
891 if (IsILPReassociate) {
892 NewVRB = MRI.createVirtualRegister(RC);
893 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
894 }
895
896 Register NewVRD = 0;
898 NewVRD = MRI.createVirtualRegister(RC);
899 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
900 }
901
902 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
903 Register RegMul1, bool KillRegMul1,
904 Register RegMul2, bool KillRegMul2) {
905 MI->getOperand(AddOpIdx).setReg(RegAdd);
906 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
907 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
908 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
909 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
910 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
911 };
912
913 MachineInstrBuilder NewARegPressure, NewCRegPressure;
914 switch (Pattern) {
915 default:
916 llvm_unreachable("not recognized pattern!");
918 // Create new instructions for insertion.
919 MachineInstrBuilder MINewB =
920 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
921 .addReg(RegX, getKillRegState(KillX))
922 .addReg(RegM21, getKillRegState(KillM21))
923 .addReg(RegM22, getKillRegState(KillM22));
924 MachineInstrBuilder MINewA =
925 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
926 .addReg(RegY, getKillRegState(KillY))
927 .addReg(RegM31, getKillRegState(KillM31))
928 .addReg(RegM32, getKillRegState(KillM32));
929 // If AddOpIdx is not 1, adjust the order.
930 if (AddOpIdx != 1) {
931 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
932 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
933 }
934
935 MachineInstrBuilder MINewC =
936 BuildMI(*MF, Root.getDebugLoc(),
938 .addReg(NewVRB, getKillRegState(true))
939 .addReg(NewVRA, getKillRegState(true));
940
941 // Update flags for newly created instructions.
942 setSpecialOperandAttr(*MINewA, IntersectedFlags);
943 setSpecialOperandAttr(*MINewB, IntersectedFlags);
944 setSpecialOperandAttr(*MINewC, IntersectedFlags);
945
946 // Record new instructions for insertion.
947 InsInstrs.push_back(MINewA);
948 InsInstrs.push_back(MINewB);
949 InsInstrs.push_back(MINewC);
950 break;
951 }
953 assert(NewVRD && "new FMA register not created!");
954 // Create new instructions for insertion.
955 MachineInstrBuilder MINewA =
956 BuildMI(*MF, Leaf->getDebugLoc(),
958 .addReg(RegM11, getKillRegState(KillM11))
959 .addReg(RegM12, getKillRegState(KillM12));
960 MachineInstrBuilder MINewB =
961 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
962 .addReg(RegX, getKillRegState(KillX))
963 .addReg(RegM21, getKillRegState(KillM21))
964 .addReg(RegM22, getKillRegState(KillM22));
965 MachineInstrBuilder MINewD =
966 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
967 .addReg(NewVRA, getKillRegState(true))
968 .addReg(RegM31, getKillRegState(KillM31))
969 .addReg(RegM32, getKillRegState(KillM32));
970 // If AddOpIdx is not 1, adjust the order.
971 if (AddOpIdx != 1) {
972 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
973 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
974 KillM32);
975 }
976
977 MachineInstrBuilder MINewC =
978 BuildMI(*MF, Root.getDebugLoc(),
980 .addReg(NewVRB, getKillRegState(true))
981 .addReg(NewVRD, getKillRegState(true));
982
983 // Update flags for newly created instructions.
984 setSpecialOperandAttr(*MINewA, IntersectedFlags);
985 setSpecialOperandAttr(*MINewB, IntersectedFlags);
986 setSpecialOperandAttr(*MINewD, IntersectedFlags);
987 setSpecialOperandAttr(*MINewC, IntersectedFlags);
988
989 // Record new instructions for insertion.
990 InsInstrs.push_back(MINewA);
991 InsInstrs.push_back(MINewB);
992 InsInstrs.push_back(MINewD);
993 InsInstrs.push_back(MINewC);
994 break;
995 }
998 Register VarReg;
999 bool KillVarReg = false;
1001 VarReg = RegM31;
1002 KillVarReg = KillM31;
1003 } else {
1004 VarReg = RegM32;
1005 KillVarReg = KillM32;
1006 }
1007 // We don't want to get negative const from memory pool too early, as the
1008 // created entry will not be deleted even if it has no users. Since all
1009 // operand of Leaf and Root are virtual register, we use zero register
1010 // here as a placeholder. When the InsInstrs is selected in
1011 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1012 // with a virtual register which is a load from constant pool.
1013 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1014 .addReg(RegB, getKillRegState(RegB))
1015 .addReg(RegY, getKillRegState(KillY))
1016 .addReg(PPC::ZERO8);
1017 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1018 .addReg(NewVRA, getKillRegState(true))
1019 .addReg(RegX, getKillRegState(KillX))
1020 .addReg(VarReg, getKillRegState(KillVarReg));
1021 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1022 // both at index 1, no need to adjust.
1023 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1024 // the operand index here.
1025 break;
1026 }
1027 }
1028
1029 if (!IsILPReassociate) {
1030 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1031 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1032
1033 InsInstrs.push_back(NewARegPressure);
1034 InsInstrs.push_back(NewCRegPressure);
1035 }
1036
1037 assert(!InsInstrs.empty() &&
1038 "Insertion instructions set should not be empty!");
1039
1040 // Record old instructions for deletion.
1041 DelInstrs.push_back(Leaf);
1042 if (IsILPReassociate)
1043 DelInstrs.push_back(Prev);
1044 DelInstrs.push_back(&Root);
1045}
1046
1047// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1049 Register &SrcReg, Register &DstReg,
1050 unsigned &SubIdx) const {
1051 switch (MI.getOpcode()) {
1052 default: return false;
1053 case PPC::EXTSW:
1054 case PPC::EXTSW_32:
1055 case PPC::EXTSW_32_64:
1056 SrcReg = MI.getOperand(1).getReg();
1057 DstReg = MI.getOperand(0).getReg();
1058 SubIdx = PPC::sub_32;
1059 return true;
1060 }
1061}
1062
1064 int &FrameIndex) const {
1065 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1066 // Check for the operands added by addFrameReference (the immediate is the
1067 // offset which defaults to 0).
1068 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1069 MI.getOperand(2).isFI()) {
1070 FrameIndex = MI.getOperand(2).getIndex();
1071 return MI.getOperand(0).getReg();
1072 }
1073 }
1074 return 0;
1075}
1076
1077// For opcodes with the ReMaterializable flag set, this function is called to
1078// verify the instruction is really rematable.
1080 const MachineInstr &MI) const {
1081 switch (MI.getOpcode()) {
1082 default:
1083 // Let base implementaion decide.
1084 break;
1085 case PPC::LI:
1086 case PPC::LI8:
1087 case PPC::PLI:
1088 case PPC::PLI8:
1089 case PPC::LIS:
1090 case PPC::LIS8:
1091 case PPC::ADDIStocHA:
1092 case PPC::ADDIStocHA8:
1093 case PPC::ADDItocL:
1094 case PPC::ADDItocL8:
1095 case PPC::LOAD_STACK_GUARD:
1096 case PPC::PPCLdFixedAddr:
1097 case PPC::XXLXORz:
1098 case PPC::XXLXORspz:
1099 case PPC::XXLXORdpz:
1100 case PPC::XXLEQVOnes:
1101 case PPC::XXSPLTI32DX:
1102 case PPC::XXSPLTIW:
1103 case PPC::XXSPLTIDP:
1104 case PPC::V_SET0B:
1105 case PPC::V_SET0H:
1106 case PPC::V_SET0:
1107 case PPC::V_SETALLONESB:
1108 case PPC::V_SETALLONESH:
1109 case PPC::V_SETALLONES:
1110 case PPC::CRSET:
1111 case PPC::CRUNSET:
1112 case PPC::XXSETACCZ:
1113 case PPC::XXSETACCZW:
1114 return true;
1115 }
1117}
1118
1120 int &FrameIndex) const {
1121 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1122 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1123 MI.getOperand(2).isFI()) {
1124 FrameIndex = MI.getOperand(2).getIndex();
1125 return MI.getOperand(0).getReg();
1126 }
1127 }
1128 return 0;
1129}
1130
1132 unsigned OpIdx1,
1133 unsigned OpIdx2) const {
1134 MachineFunction &MF = *MI.getParent()->getParent();
1135
1136 // Normal instructions can be commuted the obvious way.
1137 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1138 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1139 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1140 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1141 // changing the relative order of the mask operands might change what happens
1142 // to the high-bits of the mask (and, thus, the result).
1143
1144 // Cannot commute if it has a non-zero rotate count.
1145 if (MI.getOperand(3).getImm() != 0)
1146 return nullptr;
1147
1148 // If we have a zero rotate count, we have:
1149 // M = mask(MB,ME)
1150 // Op0 = (Op1 & ~M) | (Op2 & M)
1151 // Change this to:
1152 // M = mask((ME+1)&31, (MB-1)&31)
1153 // Op0 = (Op2 & ~M) | (Op1 & M)
1154
1155 // Swap op1/op2
1156 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1157 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1158 Register Reg0 = MI.getOperand(0).getReg();
1159 Register Reg1 = MI.getOperand(1).getReg();
1160 Register Reg2 = MI.getOperand(2).getReg();
1161 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1162 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1163 bool Reg1IsKill = MI.getOperand(1).isKill();
1164 bool Reg2IsKill = MI.getOperand(2).isKill();
1165 bool ChangeReg0 = false;
1166 // If machine instrs are no longer in two-address forms, update
1167 // destination register as well.
1168 if (Reg0 == Reg1) {
1169 // Must be two address instruction (i.e. op1 is tied to op0).
1170 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1171 "Expecting a two-address instruction!");
1172 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1173 Reg2IsKill = false;
1174 ChangeReg0 = true;
1175 }
1176
1177 // Masks.
1178 unsigned MB = MI.getOperand(4).getImm();
1179 unsigned ME = MI.getOperand(5).getImm();
1180
1181 // We can't commute a trivial mask (there is no way to represent an all-zero
1182 // mask).
1183 if (MB == 0 && ME == 31)
1184 return nullptr;
1185
1186 if (NewMI) {
1187 // Create a new instruction.
1188 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1189 bool Reg0IsDead = MI.getOperand(0).isDead();
1190 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1191 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1192 .addReg(Reg2, getKillRegState(Reg2IsKill))
1193 .addReg(Reg1, getKillRegState(Reg1IsKill))
1194 .addImm((ME + 1) & 31)
1195 .addImm((MB - 1) & 31);
1196 }
1197
1198 if (ChangeReg0) {
1199 MI.getOperand(0).setReg(Reg2);
1200 MI.getOperand(0).setSubReg(SubReg2);
1201 }
1202 MI.getOperand(2).setReg(Reg1);
1203 MI.getOperand(1).setReg(Reg2);
1204 MI.getOperand(2).setSubReg(SubReg1);
1205 MI.getOperand(1).setSubReg(SubReg2);
1206 MI.getOperand(2).setIsKill(Reg1IsKill);
1207 MI.getOperand(1).setIsKill(Reg2IsKill);
1208
1209 // Swap the mask around.
1210 MI.getOperand(4).setImm((ME + 1) & 31);
1211 MI.getOperand(5).setImm((MB - 1) & 31);
1212 return &MI;
1213}
1214
1216 unsigned &SrcOpIdx1,
1217 unsigned &SrcOpIdx2) const {
1218 // For VSX A-Type FMA instructions, it is the first two operands that can be
1219 // commuted, however, because the non-encoded tied input operand is listed
1220 // first, the operands to swap are actually the second and third.
1221
1222 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1223 if (AltOpc == -1)
1224 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1225
1226 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1227 // and SrcOpIdx2.
1228 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1229}
1230
1233 // This function is used for scheduling, and the nop wanted here is the type
1234 // that terminates dispatch groups on the POWER cores.
1235 unsigned Directive = Subtarget.getCPUDirective();
1236 unsigned Opcode;
1237 switch (Directive) {
1238 default: Opcode = PPC::NOP; break;
1239 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1240 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1241 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1242 // FIXME: Update when POWER9 scheduling model is ready.
1243 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1244 }
1245
1246 DebugLoc DL;
1247 BuildMI(MBB, MI, DL, get(Opcode));
1248}
1249
1250/// Return the noop instruction to use for a noop.
1252 MCInst Nop;
1253 Nop.setOpcode(PPC::NOP);
1254 return Nop;
1255}
1256
1257// Branch analysis.
1258// Note: If the condition register is set to CTR or CTR8 then this is a
1259// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1262 MachineBasicBlock *&FBB,
1264 bool AllowModify) const {
1265 bool isPPC64 = Subtarget.isPPC64();
1266
1267 // If the block has no terminators, it just falls into the block after it.
1269 if (I == MBB.end())
1270 return false;
1271
1272 if (!isUnpredicatedTerminator(*I))
1273 return false;
1274
1275 if (AllowModify) {
1276 // If the BB ends with an unconditional branch to the fallthrough BB,
1277 // we eliminate the branch instruction.
1278 if (I->getOpcode() == PPC::B &&
1279 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1280 I->eraseFromParent();
1281
1282 // We update iterator after deleting the last branch.
1284 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1285 return false;
1286 }
1287 }
1288
1289 // Get the last instruction in the block.
1290 MachineInstr &LastInst = *I;
1291
1292 // If there is only one terminator instruction, process it.
1293 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1294 if (LastInst.getOpcode() == PPC::B) {
1295 if (!LastInst.getOperand(0).isMBB())
1296 return true;
1297 TBB = LastInst.getOperand(0).getMBB();
1298 return false;
1299 } else if (LastInst.getOpcode() == PPC::BCC) {
1300 if (!LastInst.getOperand(2).isMBB())
1301 return true;
1302 // Block ends with fall-through condbranch.
1303 TBB = LastInst.getOperand(2).getMBB();
1304 Cond.push_back(LastInst.getOperand(0));
1305 Cond.push_back(LastInst.getOperand(1));
1306 return false;
1307 } else if (LastInst.getOpcode() == PPC::BC) {
1308 if (!LastInst.getOperand(1).isMBB())
1309 return true;
1310 // Block ends with fall-through condbranch.
1311 TBB = LastInst.getOperand(1).getMBB();
1313 Cond.push_back(LastInst.getOperand(0));
1314 return false;
1315 } else if (LastInst.getOpcode() == PPC::BCn) {
1316 if (!LastInst.getOperand(1).isMBB())
1317 return true;
1318 // Block ends with fall-through condbranch.
1319 TBB = LastInst.getOperand(1).getMBB();
1321 Cond.push_back(LastInst.getOperand(0));
1322 return false;
1323 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1324 LastInst.getOpcode() == PPC::BDNZ) {
1325 if (!LastInst.getOperand(0).isMBB())
1326 return true;
1328 return true;
1329 TBB = LastInst.getOperand(0).getMBB();
1330 Cond.push_back(MachineOperand::CreateImm(1));
1331 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1332 true));
1333 return false;
1334 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1335 LastInst.getOpcode() == PPC::BDZ) {
1336 if (!LastInst.getOperand(0).isMBB())
1337 return true;
1339 return true;
1340 TBB = LastInst.getOperand(0).getMBB();
1341 Cond.push_back(MachineOperand::CreateImm(0));
1342 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1343 true));
1344 return false;
1345 }
1346
1347 // Otherwise, don't know what this is.
1348 return true;
1349 }
1350
1351 // Get the instruction before it if it's a terminator.
1352 MachineInstr &SecondLastInst = *I;
1353
1354 // If there are three terminators, we don't know what sort of block this is.
1355 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1356 return true;
1357
1358 // If the block ends with PPC::B and PPC:BCC, handle it.
1359 if (SecondLastInst.getOpcode() == PPC::BCC &&
1360 LastInst.getOpcode() == PPC::B) {
1361 if (!SecondLastInst.getOperand(2).isMBB() ||
1362 !LastInst.getOperand(0).isMBB())
1363 return true;
1364 TBB = SecondLastInst.getOperand(2).getMBB();
1365 Cond.push_back(SecondLastInst.getOperand(0));
1366 Cond.push_back(SecondLastInst.getOperand(1));
1367 FBB = LastInst.getOperand(0).getMBB();
1368 return false;
1369 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1370 LastInst.getOpcode() == PPC::B) {
1371 if (!SecondLastInst.getOperand(1).isMBB() ||
1372 !LastInst.getOperand(0).isMBB())
1373 return true;
1374 TBB = SecondLastInst.getOperand(1).getMBB();
1376 Cond.push_back(SecondLastInst.getOperand(0));
1377 FBB = LastInst.getOperand(0).getMBB();
1378 return false;
1379 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1380 LastInst.getOpcode() == PPC::B) {
1381 if (!SecondLastInst.getOperand(1).isMBB() ||
1382 !LastInst.getOperand(0).isMBB())
1383 return true;
1384 TBB = SecondLastInst.getOperand(1).getMBB();
1386 Cond.push_back(SecondLastInst.getOperand(0));
1387 FBB = LastInst.getOperand(0).getMBB();
1388 return false;
1389 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1390 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1391 LastInst.getOpcode() == PPC::B) {
1392 if (!SecondLastInst.getOperand(0).isMBB() ||
1393 !LastInst.getOperand(0).isMBB())
1394 return true;
1396 return true;
1397 TBB = SecondLastInst.getOperand(0).getMBB();
1398 Cond.push_back(MachineOperand::CreateImm(1));
1399 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1400 true));
1401 FBB = LastInst.getOperand(0).getMBB();
1402 return false;
1403 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1404 SecondLastInst.getOpcode() == PPC::BDZ) &&
1405 LastInst.getOpcode() == PPC::B) {
1406 if (!SecondLastInst.getOperand(0).isMBB() ||
1407 !LastInst.getOperand(0).isMBB())
1408 return true;
1410 return true;
1411 TBB = SecondLastInst.getOperand(0).getMBB();
1412 Cond.push_back(MachineOperand::CreateImm(0));
1413 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1414 true));
1415 FBB = LastInst.getOperand(0).getMBB();
1416 return false;
1417 }
1418
1419 // If the block ends with two PPC:Bs, handle it. The second one is not
1420 // executed, so remove it.
1421 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1422 if (!SecondLastInst.getOperand(0).isMBB())
1423 return true;
1424 TBB = SecondLastInst.getOperand(0).getMBB();
1425 I = LastInst;
1426 if (AllowModify)
1427 I->eraseFromParent();
1428 return false;
1429 }
1430
1431 // Otherwise, can't handle this.
1432 return true;
1433}
1434
1436 int *BytesRemoved) const {
1437 assert(!BytesRemoved && "code size not handled");
1438
1440 if (I == MBB.end())
1441 return 0;
1442
1443 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1444 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1445 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1446 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1447 return 0;
1448
1449 // Remove the branch.
1450 I->eraseFromParent();
1451
1452 I = MBB.end();
1453
1454 if (I == MBB.begin()) return 1;
1455 --I;
1456 if (I->getOpcode() != PPC::BCC &&
1457 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1458 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1459 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1460 return 1;
1461
1462 // Remove the branch.
1463 I->eraseFromParent();
1464 return 2;
1465}
1466
1469 MachineBasicBlock *FBB,
1471 const DebugLoc &DL,
1472 int *BytesAdded) const {
1473 // Shouldn't be a fall through.
1474 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1475 assert((Cond.size() == 2 || Cond.size() == 0) &&
1476 "PPC branch conditions have two components!");
1477 assert(!BytesAdded && "code size not handled");
1478
1479 bool isPPC64 = Subtarget.isPPC64();
1480
1481 // One-way branch.
1482 if (!FBB) {
1483 if (Cond.empty()) // Unconditional branch
1484 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1485 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1486 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1487 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1488 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1489 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1490 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1491 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1492 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1493 else // Conditional branch
1494 BuildMI(&MBB, DL, get(PPC::BCC))
1495 .addImm(Cond[0].getImm())
1496 .add(Cond[1])
1497 .addMBB(TBB);
1498 return 1;
1499 }
1500
1501 // Two-way Conditional Branch.
1502 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1503 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1504 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1505 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1506 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1507 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1508 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1509 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1510 else
1511 BuildMI(&MBB, DL, get(PPC::BCC))
1512 .addImm(Cond[0].getImm())
1513 .add(Cond[1])
1514 .addMBB(TBB);
1515 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1516 return 2;
1517}
1518
1519// Select analysis.
1522 Register DstReg, Register TrueReg,
1523 Register FalseReg, int &CondCycles,
1524 int &TrueCycles, int &FalseCycles) const {
1525 if (!Subtarget.hasISEL())
1526 return false;
1527
1528 if (Cond.size() != 2)
1529 return false;
1530
1531 // If this is really a bdnz-like condition, then it cannot be turned into a
1532 // select.
1533 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1534 return false;
1535
1536 // If the conditional branch uses a physical register, then it cannot be
1537 // turned into a select.
1538 if (Cond[1].getReg().isPhysical())
1539 return false;
1540
1541 // Check register classes.
1543 const TargetRegisterClass *RC =
1544 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1545 if (!RC)
1546 return false;
1547
1548 // isel is for regular integer GPRs only.
1549 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1550 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1552 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1553 return false;
1554
1555 // FIXME: These numbers are for the A2, how well they work for other cores is
1556 // an open question. On the A2, the isel instruction has a 2-cycle latency
1557 // but single-cycle throughput. These numbers are used in combination with
1558 // the MispredictPenalty setting from the active SchedMachineModel.
1559 CondCycles = 1;
1560 TrueCycles = 1;
1561 FalseCycles = 1;
1562
1563 return true;
1564}
1565
1568 const DebugLoc &dl, Register DestReg,
1570 Register FalseReg) const {
1571 assert(Cond.size() == 2 &&
1572 "PPC branch conditions have two components!");
1573
1574 // Get the register classes.
1576 const TargetRegisterClass *RC =
1577 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1578 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1579
1580 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1581 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1582 assert((Is64Bit ||
1583 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1584 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1585 "isel is for regular integer GPRs only");
1586
1587 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1588 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1589
1590 unsigned SubIdx = 0;
1591 bool SwapOps = false;
1592 switch (SelectPred) {
1593 case PPC::PRED_EQ:
1594 case PPC::PRED_EQ_MINUS:
1595 case PPC::PRED_EQ_PLUS:
1596 SubIdx = PPC::sub_eq; SwapOps = false; break;
1597 case PPC::PRED_NE:
1598 case PPC::PRED_NE_MINUS:
1599 case PPC::PRED_NE_PLUS:
1600 SubIdx = PPC::sub_eq; SwapOps = true; break;
1601 case PPC::PRED_LT:
1602 case PPC::PRED_LT_MINUS:
1603 case PPC::PRED_LT_PLUS:
1604 SubIdx = PPC::sub_lt; SwapOps = false; break;
1605 case PPC::PRED_GE:
1606 case PPC::PRED_GE_MINUS:
1607 case PPC::PRED_GE_PLUS:
1608 SubIdx = PPC::sub_lt; SwapOps = true; break;
1609 case PPC::PRED_GT:
1610 case PPC::PRED_GT_MINUS:
1611 case PPC::PRED_GT_PLUS:
1612 SubIdx = PPC::sub_gt; SwapOps = false; break;
1613 case PPC::PRED_LE:
1614 case PPC::PRED_LE_MINUS:
1615 case PPC::PRED_LE_PLUS:
1616 SubIdx = PPC::sub_gt; SwapOps = true; break;
1617 case PPC::PRED_UN:
1618 case PPC::PRED_UN_MINUS:
1619 case PPC::PRED_UN_PLUS:
1620 SubIdx = PPC::sub_un; SwapOps = false; break;
1621 case PPC::PRED_NU:
1622 case PPC::PRED_NU_MINUS:
1623 case PPC::PRED_NU_PLUS:
1624 SubIdx = PPC::sub_un; SwapOps = true; break;
1625 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1626 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1627 }
1628
1629 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1630 SecondReg = SwapOps ? TrueReg : FalseReg;
1631
1632 // The first input register of isel cannot be r0. If it is a member
1633 // of a register class that can be r0, then copy it first (the
1634 // register allocator should eliminate the copy).
1635 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1636 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1637 const TargetRegisterClass *FirstRC =
1638 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1639 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1640 Register OldFirstReg = FirstReg;
1641 FirstReg = MRI.createVirtualRegister(FirstRC);
1642 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1643 .addReg(OldFirstReg);
1644 }
1645
1646 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1647 .addReg(FirstReg).addReg(SecondReg)
1648 .addReg(Cond[1].getReg(), 0, SubIdx);
1649}
1650
1651static unsigned getCRBitValue(unsigned CRBit) {
1652 unsigned Ret = 4;
1653 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1654 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1655 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1656 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1657 Ret = 3;
1658 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1659 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1660 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1661 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1662 Ret = 2;
1663 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1664 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1665 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1666 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1667 Ret = 1;
1668 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1669 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1670 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1671 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1672 Ret = 0;
1673
1674 assert(Ret != 4 && "Invalid CR bit register");
1675 return Ret;
1676}
1677
1680 const DebugLoc &DL, MCRegister DestReg,
1681 MCRegister SrcReg, bool KillSrc) const {
1682 // We can end up with self copies and similar things as a result of VSX copy
1683 // legalization. Promote them here.
1685 if (PPC::F8RCRegClass.contains(DestReg) &&
1686 PPC::VSRCRegClass.contains(SrcReg)) {
1687 MCRegister SuperReg =
1688 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1689
1690 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1691 llvm_unreachable("nop VSX copy");
1692
1693 DestReg = SuperReg;
1694 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1695 PPC::VSRCRegClass.contains(DestReg)) {
1696 MCRegister SuperReg =
1697 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1698
1699 if (VSXSelfCopyCrash && DestReg == SuperReg)
1700 llvm_unreachable("nop VSX copy");
1701
1702 SrcReg = SuperReg;
1703 }
1704
1705 // Different class register copy
1706 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1707 PPC::GPRCRegClass.contains(DestReg)) {
1708 MCRegister CRReg = getCRFromCRBit(SrcReg);
1709 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1710 getKillRegState(KillSrc);
1711 // Rotate the CR bit in the CR fields to be the least significant bit and
1712 // then mask with 0x1 (MB = ME = 31).
1713 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1714 .addReg(DestReg, RegState::Kill)
1715 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1716 .addImm(31)
1717 .addImm(31);
1718 return;
1719 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1720 (PPC::G8RCRegClass.contains(DestReg) ||
1721 PPC::GPRCRegClass.contains(DestReg))) {
1722 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1723 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1724 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1725 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1726 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1727 getKillRegState(KillSrc);
1728 if (CRNum == 7)
1729 return;
1730 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1731 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1732 .addReg(DestReg, RegState::Kill)
1733 .addImm(CRNum * 4 + 4)
1734 .addImm(28)
1735 .addImm(31);
1736 return;
1737 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1738 PPC::VSFRCRegClass.contains(DestReg)) {
1739 assert(Subtarget.hasDirectMove() &&
1740 "Subtarget doesn't support directmove, don't know how to copy.");
1741 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1742 NumGPRtoVSRSpill++;
1743 getKillRegState(KillSrc);
1744 return;
1745 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1746 PPC::G8RCRegClass.contains(DestReg)) {
1747 assert(Subtarget.hasDirectMove() &&
1748 "Subtarget doesn't support directmove, don't know how to copy.");
1749 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1750 getKillRegState(KillSrc);
1751 return;
1752 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1753 PPC::GPRCRegClass.contains(DestReg)) {
1754 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1755 getKillRegState(KillSrc);
1756 return;
1757 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1758 PPC::SPERCRegClass.contains(DestReg)) {
1759 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1760 getKillRegState(KillSrc);
1761 return;
1762 }
1763
1764 unsigned Opc;
1765 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1766 Opc = PPC::OR;
1767 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1768 Opc = PPC::OR8;
1769 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1770 Opc = PPC::FMR;
1771 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1772 Opc = PPC::MCRF;
1773 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1774 Opc = PPC::VOR;
1775 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1776 // There are two different ways this can be done:
1777 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1778 // issue in VSU pipeline 0.
1779 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1780 // can go to either pipeline.
1781 // We'll always use xxlor here, because in practically all cases where
1782 // copies are generated, they are close enough to some use that the
1783 // lower-latency form is preferable.
1784 Opc = PPC::XXLOR;
1785 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1786 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1787 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1788 else if (Subtarget.pairedVectorMemops() &&
1789 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1790 if (SrcReg > PPC::VSRp15)
1791 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1792 else
1793 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1794 if (DestReg > PPC::VSRp15)
1795 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1796 else
1797 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1798 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1799 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1800 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1801 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1802 return;
1803 }
1804 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1805 Opc = PPC::CROR;
1806 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1807 Opc = PPC::EVOR;
1808 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1809 PPC::UACCRCRegClass.contains(DestReg)) &&
1810 (PPC::ACCRCRegClass.contains(SrcReg) ||
1811 PPC::UACCRCRegClass.contains(SrcReg))) {
1812 // If primed, de-prime the source register, copy the individual registers
1813 // and prime the destination if needed. The vector subregisters are
1814 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1815 // source is primed, we need to re-prime it after the copy as well.
1816 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1817 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1818 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1819 MCRegister VSLSrcReg =
1820 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1821 MCRegister VSLDestReg =
1822 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1823 if (SrcPrimed)
1824 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1825 for (unsigned Idx = 0; Idx < 4; Idx++)
1826 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1827 .addReg(VSLSrcReg + Idx)
1828 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1829 if (DestPrimed)
1830 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1831 if (SrcPrimed && !KillSrc)
1832 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1833 return;
1834 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1835 PPC::G8pRCRegClass.contains(SrcReg)) {
1836 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1837 unsigned DestRegIdx = DestReg - PPC::G8p0;
1838 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1839 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1840 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1841 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1842 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1843 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1844 .addReg(SrcRegSub0)
1845 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1846 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1847 .addReg(SrcRegSub1)
1848 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1849 return;
1850 } else
1851 llvm_unreachable("Impossible reg-to-reg copy");
1852
1853 const MCInstrDesc &MCID = get(Opc);
1854 if (MCID.getNumOperands() == 3)
1855 BuildMI(MBB, I, DL, MCID, DestReg)
1856 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1857 else
1858 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1859}
1860
1861unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1862 int OpcodeIndex = 0;
1863
1864 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1865 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1867 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1868 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1870 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1872 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1874 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1876 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1878 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1880 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1882 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1884 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1886 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1888 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1890 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1891 assert(Subtarget.pairedVectorMemops() &&
1892 "Register unexpected when paired memops are disabled.");
1894 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1895 assert(Subtarget.pairedVectorMemops() &&
1896 "Register unexpected when paired memops are disabled.");
1898 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1899 assert(Subtarget.pairedVectorMemops() &&
1900 "Register unexpected when paired memops are disabled.");
1902 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1903 assert(Subtarget.pairedVectorMemops() &&
1904 "Register unexpected when paired memops are disabled.");
1906 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1908 } else {
1909 llvm_unreachable("Unknown regclass!");
1910 }
1911 return OpcodeIndex;
1912}
1913
1914unsigned
1916 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1917 return OpcodesForSpill[getSpillIndex(RC)];
1918}
1919
1920unsigned
1922 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1923 return OpcodesForSpill[getSpillIndex(RC)];
1924}
1925
1926void PPCInstrInfo::StoreRegToStackSlot(
1927 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1928 const TargetRegisterClass *RC,
1929 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1930 unsigned Opcode = getStoreOpcodeForSpill(RC);
1931 DebugLoc DL;
1932
1933 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1934 FuncInfo->setHasSpills();
1935
1937 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
1938 FrameIdx));
1939
1940 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
1941 PPC::CRBITRCRegClass.hasSubClassEq(RC))
1942 FuncInfo->setSpillsCR();
1943
1944 if (isXFormMemOp(Opcode))
1945 FuncInfo->setHasNonRISpills();
1946}
1947
1950 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1951 const TargetRegisterInfo *TRI) const {
1952 MachineFunction &MF = *MBB.getParent();
1954
1955 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1956
1957 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
1958 MBB.insert(MI, NewMIs[i]);
1959
1960 const MachineFrameInfo &MFI = MF.getFrameInfo();
1964 MFI.getObjectAlign(FrameIdx));
1965 NewMIs.back()->addMemOperand(MF, MMO);
1966}
1967
1970 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1971 const TargetRegisterInfo *TRI, Register VReg) const {
1972 // We need to avoid a situation in which the value from a VRRC register is
1973 // spilled using an Altivec instruction and reloaded into a VSRC register
1974 // using a VSX instruction. The issue with this is that the VSX
1975 // load/store instructions swap the doublewords in the vector and the Altivec
1976 // ones don't. The register classes on the spill/reload may be different if
1977 // the register is defined using an Altivec instruction and is then used by a
1978 // VSX instruction.
1979 RC = updatedRC(RC);
1980 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
1981}
1982
1983void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
1984 unsigned DestReg, int FrameIdx,
1985 const TargetRegisterClass *RC,
1987 const {
1988 unsigned Opcode = getLoadOpcodeForSpill(RC);
1989 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
1990 FrameIdx));
1991}
1992
1995 int FrameIdx, const TargetRegisterClass *RC,
1996 const TargetRegisterInfo *TRI) const {
1997 MachineFunction &MF = *MBB.getParent();
1999 DebugLoc DL;
2000 if (MI != MBB.end()) DL = MI->getDebugLoc();
2001
2002 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2003
2004 for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
2005 MBB.insert(MI, NewMIs[i]);
2006
2007 const MachineFrameInfo &MFI = MF.getFrameInfo();
2011 MFI.getObjectAlign(FrameIdx));
2012 NewMIs.back()->addMemOperand(MF, MMO);
2013}
2014
2017 Register DestReg, int FrameIdx,
2018 const TargetRegisterClass *RC,
2019 const TargetRegisterInfo *TRI,
2020 Register VReg) const {
2021 // We need to avoid a situation in which the value from a VRRC register is
2022 // spilled using an Altivec instruction and reloaded into a VSRC register
2023 // using a VSX instruction. The issue with this is that the VSX
2024 // load/store instructions swap the doublewords in the vector and the Altivec
2025 // ones don't. The register classes on the spill/reload may be different if
2026 // the register is defined using an Altivec instruction and is then used by a
2027 // VSX instruction.
2028 RC = updatedRC(RC);
2029
2030 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2031}
2032
2035 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2036 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2037 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2038 else
2039 // Leave the CR# the same, but invert the condition.
2040 Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
2041 return false;
2042}
2043
2044// For some instructions, it is legal to fold ZERO into the RA register field.
2045// This function performs that fold by replacing the operand with PPC::ZERO,
2046// it does not consider whether the load immediate zero is no longer in use.
2048 Register Reg) const {
2049 // A zero immediate should always be loaded with a single li.
2050 unsigned DefOpc = DefMI.getOpcode();
2051 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2052 return false;
2053 if (!DefMI.getOperand(1).isImm())
2054 return false;
2055 if (DefMI.getOperand(1).getImm() != 0)
2056 return false;
2057
2058 // Note that we cannot here invert the arguments of an isel in order to fold
2059 // a ZERO into what is presented as the second argument. All we have here
2060 // is the condition bit, and that might come from a CR-logical bit operation.
2061
2062 const MCInstrDesc &UseMCID = UseMI.getDesc();
2063
2064 // Only fold into real machine instructions.
2065 if (UseMCID.isPseudo())
2066 return false;
2067
2068 // We need to find which of the User's operands is to be folded, that will be
2069 // the operand that matches the given register ID.
2070 unsigned UseIdx;
2071 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2072 if (UseMI.getOperand(UseIdx).isReg() &&
2073 UseMI.getOperand(UseIdx).getReg() == Reg)
2074 break;
2075
2076 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2077 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2078
2079 const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2080
2081 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2082 // register (which might also be specified as a pointer class kind).
2083 if (UseInfo->isLookupPtrRegClass()) {
2084 if (UseInfo->RegClass /* Kind */ != 1)
2085 return false;
2086 } else {
2087 if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2088 UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2089 return false;
2090 }
2091
2092 // Make sure this is not tied to an output register (or otherwise
2093 // constrained). This is true for ST?UX registers, for example, which
2094 // are tied to their output registers.
2095 if (UseInfo->Constraints != 0)
2096 return false;
2097
2098 MCRegister ZeroReg;
2099 if (UseInfo->isLookupPtrRegClass()) {
2100 bool isPPC64 = Subtarget.isPPC64();
2101 ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2102 } else {
2103 ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2104 PPC::ZERO8 : PPC::ZERO;
2105 }
2106
2107 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2108 LLVM_DEBUG(UseMI.dump());
2109 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2110 LLVM_DEBUG(dbgs() << "Into: ");
2111 LLVM_DEBUG(UseMI.dump());
2112 return true;
2113}
2114
2115// Folds zero into instructions which have a load immediate zero as an operand
2116// but also recognize zero as immediate zero. If the definition of the load
2117// has no more users it is deleted.
2119 Register Reg, MachineRegisterInfo *MRI) const {
2120 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2121 if (MRI->use_nodbg_empty(Reg))
2122 DefMI.eraseFromParent();
2123 return Changed;
2124}
2125
2127 for (MachineInstr &MI : MBB)
2128 if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
2129 return true;
2130 return false;
2131}
2132
2133// We should make sure that, if we're going to predicate both sides of a
2134// condition (a diamond), that both sides don't define the counter register. We
2135// can predicate counter-decrement-based branches, but while that predicates
2136// the branching, it does not predicate the counter decrement. If we tried to
2137// merge the triangle into one predicated block, we'd decrement the counter
2138// twice.
2140 unsigned NumT, unsigned ExtraT,
2141 MachineBasicBlock &FMBB,
2142 unsigned NumF, unsigned ExtraF,
2143 BranchProbability Probability) const {
2144 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2145}
2146
2147
2149 // The predicated branches are identified by their type, not really by the
2150 // explicit presence of a predicate. Furthermore, some of them can be
2151 // predicated more than once. Because if conversion won't try to predicate
2152 // any instruction which already claims to be predicated (by returning true
2153 // here), always return false. In doing so, we let isPredicable() be the
2154 // final word on whether not the instruction can be (further) predicated.
2155
2156 return false;
2157}
2158
2160 const MachineBasicBlock *MBB,
2161 const MachineFunction &MF) const {
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2166 // across them, since some FP operations may change content of FPSCR.
2167 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2168 case PPC::MFFS:
2169 case PPC::MTFSF:
2170 case PPC::FENCE:
2171 return true;
2172 }
2174}
2175
2177 ArrayRef<MachineOperand> Pred) const {
2178 unsigned OpC = MI.getOpcode();
2179 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2180 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2181 bool isPPC64 = Subtarget.isPPC64();
2182 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2183 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2184 // Need add Def and Use for CTR implicit operand.
2185 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2186 .addReg(Pred[1].getReg(), RegState::Implicit)
2188 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2189 MI.setDesc(get(PPC::BCLR));
2190 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2191 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2192 MI.setDesc(get(PPC::BCLRn));
2193 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2194 } else {
2195 MI.setDesc(get(PPC::BCCLR));
2196 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2197 .addImm(Pred[0].getImm())
2198 .add(Pred[1]);
2199 }
2200
2201 return true;
2202 } else if (OpC == PPC::B) {
2203 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2204 bool isPPC64 = Subtarget.isPPC64();
2205 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2206 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2207 // Need add Def and Use for CTR implicit operand.
2208 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2209 .addReg(Pred[1].getReg(), RegState::Implicit)
2211 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2212 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2213 MI.removeOperand(0);
2214
2215 MI.setDesc(get(PPC::BC));
2216 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2217 .add(Pred[1])
2218 .addMBB(MBB);
2219 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2220 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2221 MI.removeOperand(0);
2222
2223 MI.setDesc(get(PPC::BCn));
2224 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2225 .add(Pred[1])
2226 .addMBB(MBB);
2227 } else {
2228 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2229 MI.removeOperand(0);
2230
2231 MI.setDesc(get(PPC::BCC));
2232 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2233 .addImm(Pred[0].getImm())
2234 .add(Pred[1])
2235 .addMBB(MBB);
2236 }
2237
2238 return true;
2239 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2240 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2241 OpC == PPC::BCTRL8_RM) {
2242 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2243 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2244
2245 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2246 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2247 bool isPPC64 = Subtarget.isPPC64();
2248
2249 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2250 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2251 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2252 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2253 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2254 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2255 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2256 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2257 } else {
2258 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2259 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2260 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2261 .addImm(Pred[0].getImm())
2262 .add(Pred[1]);
2263 }
2264
2265 // Need add Def and Use for LR implicit operand.
2266 if (setLR)
2267 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2268 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2269 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2270 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2271 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2273
2274 return true;
2275 }
2276
2277 return false;
2278}
2279
2281 ArrayRef<MachineOperand> Pred2) const {
2282 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2283 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2284
2285 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2286 return false;
2287 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2288 return false;
2289
2290 // P1 can only subsume P2 if they test the same condition register.
2291 if (Pred1[1].getReg() != Pred2[1].getReg())
2292 return false;
2293
2294 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2295 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2296
2297 if (P1 == P2)
2298 return true;
2299
2300 // Does P1 subsume P2, e.g. GE subsumes GT.
2301 if (P1 == PPC::PRED_LE &&
2302 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2303 return true;
2304 if (P1 == PPC::PRED_GE &&
2305 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2306 return true;
2307
2308 return false;
2309}
2310
2312 std::vector<MachineOperand> &Pred,
2313 bool SkipDead) const {
2314 // Note: At the present time, the contents of Pred from this function is
2315 // unused by IfConversion. This implementation follows ARM by pushing the
2316 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2317 // predicate, instructions defining CTR or CTR8 are also included as
2318 // predicate-defining instructions.
2319
2320 const TargetRegisterClass *RCs[] =
2321 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2322 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2323
2324 bool Found = false;
2325 for (const MachineOperand &MO : MI.operands()) {
2326 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2327 const TargetRegisterClass *RC = RCs[c];
2328 if (MO.isReg()) {
2329 if (MO.isDef() && RC->contains(MO.getReg())) {
2330 Pred.push_back(MO);
2331 Found = true;
2332 }
2333 } else if (MO.isRegMask()) {
2334 for (MCPhysReg R : *RC)
2335 if (MO.clobbersPhysReg(R)) {
2336 Pred.push_back(MO);
2337 Found = true;
2338 }
2339 }
2340 }
2341 }
2342
2343 return Found;
2344}
2345
2347 Register &SrcReg2, int64_t &Mask,
2348 int64_t &Value) const {
2349 unsigned Opc = MI.getOpcode();
2350
2351 switch (Opc) {
2352 default: return false;
2353 case PPC::CMPWI:
2354 case PPC::CMPLWI:
2355 case PPC::CMPDI:
2356 case PPC::CMPLDI:
2357 SrcReg = MI.getOperand(1).getReg();
2358 SrcReg2 = 0;
2359 Value = MI.getOperand(2).getImm();
2360 Mask = 0xFFFF;
2361 return true;
2362 case PPC::CMPW:
2363 case PPC::CMPLW:
2364 case PPC::CMPD:
2365 case PPC::CMPLD:
2366 case PPC::FCMPUS:
2367 case PPC::FCMPUD:
2368 SrcReg = MI.getOperand(1).getReg();
2369 SrcReg2 = MI.getOperand(2).getReg();
2370 Value = 0;
2371 Mask = 0;
2372 return true;
2373 }
2374}
2375
2377 Register SrcReg2, int64_t Mask,
2378 int64_t Value,
2379 const MachineRegisterInfo *MRI) const {
2380 if (DisableCmpOpt)
2381 return false;
2382
2383 int OpC = CmpInstr.getOpcode();
2384 Register CRReg = CmpInstr.getOperand(0).getReg();
2385
2386 // FP record forms set CR1 based on the exception status bits, not a
2387 // comparison with zero.
2388 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2389 return false;
2390
2392 // The record forms set the condition register based on a signed comparison
2393 // with zero (so says the ISA manual). This is not as straightforward as it
2394 // seems, however, because this is always a 64-bit comparison on PPC64, even
2395 // for instructions that are 32-bit in nature (like slw for example).
2396 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2397 // for equality checks (as those don't depend on the sign). On PPC64,
2398 // we are restricted to equality for unsigned 64-bit comparisons and for
2399 // signed 32-bit comparisons the applicability is more restricted.
2400 bool isPPC64 = Subtarget.isPPC64();
2401 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2402 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2403 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2404
2405 // Look through copies unless that gets us to a physical register.
2406 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2407 if (ActualSrc.isVirtual())
2408 SrcReg = ActualSrc;
2409
2410 // Get the unique definition of SrcReg.
2411 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2412 if (!MI) return false;
2413
2414 bool equalityOnly = false;
2415 bool noSub = false;
2416 if (isPPC64) {
2417 if (is32BitSignedCompare) {
2418 // We can perform this optimization only if SrcReg is sign-extending.
2419 if (isSignExtended(SrcReg, MRI))
2420 noSub = true;
2421 else
2422 return false;
2423 } else if (is32BitUnsignedCompare) {
2424 // We can perform this optimization, equality only, if SrcReg is
2425 // zero-extending.
2426 if (isZeroExtended(SrcReg, MRI)) {
2427 noSub = true;
2428 equalityOnly = true;
2429 } else
2430 return false;
2431 } else
2432 equalityOnly = is64BitUnsignedCompare;
2433 } else
2434 equalityOnly = is32BitUnsignedCompare;
2435
2436 if (equalityOnly) {
2437 // We need to check the uses of the condition register in order to reject
2438 // non-equality comparisons.
2440 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2441 I != IE; ++I) {
2442 MachineInstr *UseMI = &*I;
2443 if (UseMI->getOpcode() == PPC::BCC) {
2445 unsigned PredCond = PPC::getPredicateCondition(Pred);
2446 // We ignore hint bits when checking for non-equality comparisons.
2447 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2448 return false;
2449 } else if (UseMI->getOpcode() == PPC::ISEL ||
2450 UseMI->getOpcode() == PPC::ISEL8) {
2451 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2452 if (SubIdx != PPC::sub_eq)
2453 return false;
2454 } else
2455 return false;
2456 }
2457 }
2458
2459 MachineBasicBlock::iterator I = CmpInstr;
2460
2461 // Scan forward to find the first use of the compare.
2462 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2463 ++I) {
2464 bool FoundUse = false;
2466 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2467 J != JE; ++J)
2468 if (&*J == &*I) {
2469 FoundUse = true;
2470 break;
2471 }
2472
2473 if (FoundUse)
2474 break;
2475 }
2476
2479
2480 // There are two possible candidates which can be changed to set CR[01].
2481 // One is MI, the other is a SUB instruction.
2482 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2483 MachineInstr *Sub = nullptr;
2484 if (SrcReg2 != 0)
2485 // MI is not a candidate for CMPrr.
2486 MI = nullptr;
2487 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2488 // same BB as the comparison. This is to allow the check below to avoid calls
2489 // (and other explicit clobbers); instead we should really check for these
2490 // more explicitly (in at least a few predecessors).
2491 else if (MI->getParent() != CmpInstr.getParent())
2492 return false;
2493 else if (Value != 0) {
2494 // The record-form instructions set CR bit based on signed comparison
2495 // against 0. We try to convert a compare against 1 or -1 into a compare
2496 // against 0 to exploit record-form instructions. For example, we change
2497 // the condition "greater than -1" into "greater than or equal to 0"
2498 // and "less than 1" into "less than or equal to 0".
2499
2500 // Since we optimize comparison based on a specific branch condition,
2501 // we don't optimize if condition code is used by more than once.
2502 if (equalityOnly || !MRI->hasOneUse(CRReg))
2503 return false;
2504
2505 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2506 if (UseMI->getOpcode() != PPC::BCC)
2507 return false;
2508
2510 unsigned PredCond = PPC::getPredicateCondition(Pred);
2511 unsigned PredHint = PPC::getPredicateHint(Pred);
2512 int16_t Immed = (int16_t)Value;
2513
2514 // When modifying the condition in the predicate, we propagate hint bits
2515 // from the original predicate to the new one.
2516 if (Immed == -1 && PredCond == PPC::PRED_GT)
2517 // We convert "greater than -1" into "greater than or equal to 0",
2518 // since we are assuming signed comparison by !equalityOnly
2519 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2520 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2521 // We convert "less than or equal to -1" into "less than 0".
2522 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2523 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2524 // We convert "less than 1" into "less than or equal to 0".
2525 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2526 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2527 // We convert "greater than or equal to 1" into "greater than 0".
2528 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2529 else
2530 return false;
2531
2532 // Convert the comparison and its user to a compare against zero with the
2533 // appropriate predicate on the branch. Zero comparison might provide
2534 // optimization opportunities post-RA (see optimization in
2535 // PPCPreEmitPeephole.cpp).
2536 UseMI->getOperand(0).setImm(Pred);
2537 CmpInstr.getOperand(2).setImm(0);
2538 }
2539
2540 // Search for Sub.
2541 --I;
2542
2543 // Get ready to iterate backward from CmpInstr.
2544 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2545
2546 for (; I != E && !noSub; --I) {
2547 const MachineInstr &Instr = *I;
2548 unsigned IOpC = Instr.getOpcode();
2549
2550 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2551 Instr.readsRegister(PPC::CR0, TRI)))
2552 // This instruction modifies or uses the record condition register after
2553 // the one we want to change. While we could do this transformation, it
2554 // would likely not be profitable. This transformation removes one
2555 // instruction, and so even forcing RA to generate one move probably
2556 // makes it unprofitable.
2557 return false;
2558
2559 // Check whether CmpInstr can be made redundant by the current instruction.
2560 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2561 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2562 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2563 ((Instr.getOperand(1).getReg() == SrcReg &&
2564 Instr.getOperand(2).getReg() == SrcReg2) ||
2565 (Instr.getOperand(1).getReg() == SrcReg2 &&
2566 Instr.getOperand(2).getReg() == SrcReg))) {
2567 Sub = &*I;
2568 break;
2569 }
2570
2571 if (I == B)
2572 // The 'and' is below the comparison instruction.
2573 return false;
2574 }
2575
2576 // Return false if no candidates exist.
2577 if (!MI && !Sub)
2578 return false;
2579
2580 // The single candidate is called MI.
2581 if (!MI) MI = Sub;
2582
2583 int NewOpC = -1;
2584 int MIOpC = MI->getOpcode();
2585 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2586 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2587 NewOpC = MIOpC;
2588 else {
2589 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2590 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2591 NewOpC = MIOpC;
2592 }
2593
2594 // FIXME: On the non-embedded POWER architectures, only some of the record
2595 // forms are fast, and we should use only the fast ones.
2596
2597 // The defining instruction has a record form (or is already a record
2598 // form). It is possible, however, that we'll need to reverse the condition
2599 // code of the users.
2600 if (NewOpC == -1)
2601 return false;
2602
2603 // This transformation should not be performed if `nsw` is missing and is not
2604 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2605 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2606 // CRReg can reflect if compared values are equal, this optz is still valid.
2607 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2608 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2609 return false;
2610
2611 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2612 // needs to be updated to be based on SUB. Push the condition code
2613 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2614 // condition code of these operands will be modified.
2615 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2616 // comparison against 0, which may modify predicate.
2617 bool ShouldSwap = false;
2618 if (Sub && Value == 0) {
2619 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2620 Sub->getOperand(2).getReg() == SrcReg;
2621
2622 // The operands to subf are the opposite of sub, so only in the fixed-point
2623 // case, invert the order.
2624 ShouldSwap = !ShouldSwap;
2625 }
2626
2627 if (ShouldSwap)
2629 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2630 I != IE; ++I) {
2631 MachineInstr *UseMI = &*I;
2632 if (UseMI->getOpcode() == PPC::BCC) {
2634 unsigned PredCond = PPC::getPredicateCondition(Pred);
2635 assert((!equalityOnly ||
2636 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2637 "Invalid predicate for equality-only optimization");
2638 (void)PredCond; // To suppress warning in release build.
2639 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2641 } else if (UseMI->getOpcode() == PPC::ISEL ||
2642 UseMI->getOpcode() == PPC::ISEL8) {
2643 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2644 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2645 "Invalid CR bit for equality-only optimization");
2646
2647 if (NewSubReg == PPC::sub_lt)
2648 NewSubReg = PPC::sub_gt;
2649 else if (NewSubReg == PPC::sub_gt)
2650 NewSubReg = PPC::sub_lt;
2651
2652 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2653 NewSubReg));
2654 } else // We need to abort on a user we don't understand.
2655 return false;
2656 }
2657 assert(!(Value != 0 && ShouldSwap) &&
2658 "Non-zero immediate support and ShouldSwap"
2659 "may conflict in updating predicate");
2660
2661 // Create a new virtual register to hold the value of the CR set by the
2662 // record-form instruction. If the instruction was not previously in
2663 // record form, then set the kill flag on the CR.
2664 CmpInstr.eraseFromParent();
2665
2667 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2668 get(TargetOpcode::COPY), CRReg)
2669 .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
2670
2671 // Even if CR0 register were dead before, it is alive now since the
2672 // instruction we just built uses it.
2673 MI->clearRegisterDeads(PPC::CR0);
2674
2675 if (MIOpC != NewOpC) {
2676 // We need to be careful here: we're replacing one instruction with
2677 // another, and we need to make sure that we get all of the right
2678 // implicit uses and defs. On the other hand, the caller may be holding
2679 // an iterator to this instruction, and so we can't delete it (this is
2680 // specifically the case if this is the instruction directly after the
2681 // compare).
2682
2683 // Rotates are expensive instructions. If we're emitting a record-form
2684 // rotate that can just be an andi/andis, we should just emit that.
2685 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2686 Register GPRRes = MI->getOperand(0).getReg();
2687 int64_t SH = MI->getOperand(2).getImm();
2688 int64_t MB = MI->getOperand(3).getImm();
2689 int64_t ME = MI->getOperand(4).getImm();
2690 // We can only do this if both the start and end of the mask are in the
2691 // same halfword.
2692 bool MBInLoHWord = MB >= 16;
2693 bool MEInLoHWord = ME >= 16;
2694 uint64_t Mask = ~0LLU;
2695
2696 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2697 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2698 // The mask value needs to shift right 16 if we're emitting andis.
2699 Mask >>= MBInLoHWord ? 0 : 16;
2700 NewOpC = MIOpC == PPC::RLWINM
2701 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2702 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2703 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2704 (ME - MB + 1 == SH) && (MB >= 16)) {
2705 // If we are rotating by the exact number of bits as are in the mask
2706 // and the mask is in the least significant bits of the register,
2707 // that's just an andis. (as long as the GPR result has no uses).
2708 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2709 Mask >>= 16;
2710 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2711 }
2712 // If we've set the mask, we can transform.
2713 if (Mask != ~0LLU) {
2714 MI->removeOperand(4);
2715 MI->removeOperand(3);
2716 MI->getOperand(2).setImm(Mask);
2717 NumRcRotatesConvertedToRcAnd++;
2718 }
2719 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2720 int64_t MB = MI->getOperand(3).getImm();
2721 if (MB >= 48) {
2722 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2723 NewOpC = PPC::ANDI8_rec;
2724 MI->removeOperand(3);
2725 MI->getOperand(2).setImm(Mask);
2726 NumRcRotatesConvertedToRcAnd++;
2727 }
2728 }
2729
2730 const MCInstrDesc &NewDesc = get(NewOpC);
2731 MI->setDesc(NewDesc);
2732
2733 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2734 if (!MI->definesRegister(ImpDef)) {
2735 MI->addOperand(*MI->getParent()->getParent(),
2736 MachineOperand::CreateReg(ImpDef, true, true));
2737 }
2738 }
2739 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2740 if (!MI->readsRegister(ImpUse)) {
2741 MI->addOperand(*MI->getParent()->getParent(),
2742 MachineOperand::CreateReg(ImpUse, false, true));
2743 }
2744 }
2745 }
2746 assert(MI->definesRegister(PPC::CR0) &&
2747 "Record-form instruction does not define cr0?");
2748
2749 // Modify the condition code of operands in OperandsToUpdate.
2750 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2751 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2752 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2753 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2754
2755 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2756 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2757
2758 return true;
2759}
2760
2763 if (MRI->isSSA())
2764 return false;
2765
2766 Register SrcReg, SrcReg2;
2767 int64_t CmpMask, CmpValue;
2768 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2769 return false;
2770
2771 // Try to optimize the comparison against 0.
2772 if (CmpValue || !CmpMask || SrcReg2)
2773 return false;
2774
2775 // The record forms set the condition register based on a signed comparison
2776 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2777 // equality checks in post-RA, we are more restricted on a unsigned
2778 // comparison.
2779 unsigned Opc = CmpMI.getOpcode();
2780 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2781 return false;
2782
2783 // The record forms are always based on a 64-bit comparison on PPC64
2784 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2785 // comparison. Since we can't do the equality checks in post-RA, we bail out
2786 // the case.
2787 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2788 return false;
2789
2790 // CmpMI can't be deleted if it has implicit def.
2791 if (CmpMI.hasImplicitDef())
2792 return false;
2793
2794 bool SrcRegHasOtherUse = false;
2795 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2796 if (!SrcMI || !SrcMI->definesRegister(SrcReg))
2797 return false;
2798
2799 MachineOperand RegMO = CmpMI.getOperand(0);
2800 Register CRReg = RegMO.getReg();
2801 if (CRReg != PPC::CR0)
2802 return false;
2803
2804 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2805 bool SeenUseOfCRReg = false;
2806 bool IsCRRegKilled = false;
2807 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2808 SeenUseOfCRReg) ||
2809 SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
2810 return false;
2811
2812 int SrcMIOpc = SrcMI->getOpcode();
2813 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2814 if (NewOpC == -1)
2815 return false;
2816
2817 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2818 LLVM_DEBUG(SrcMI->dump());
2819
2820 const MCInstrDesc &NewDesc = get(NewOpC);
2821 SrcMI->setDesc(NewDesc);
2822 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2824 SrcMI->clearRegisterDeads(CRReg);
2825
2826 assert(SrcMI->definesRegister(PPC::CR0) &&
2827 "Record-form instruction does not define cr0?");
2828
2829 LLVM_DEBUG(dbgs() << "with: ");
2830 LLVM_DEBUG(SrcMI->dump());
2831 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2832 LLVM_DEBUG(CmpMI.dump());
2833 return true;
2834}
2835
2838 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2839 const TargetRegisterInfo *TRI) const {
2840 const MachineOperand *BaseOp;
2841 OffsetIsScalable = false;
2842 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2843 return false;
2844 BaseOps.push_back(BaseOp);
2845 return true;
2846}
2847
2848static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2849 const TargetRegisterInfo *TRI) {
2850 // If this is a volatile load/store, don't mess with it.
2851 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2852 return false;
2853
2854 if (LdSt.getOperand(2).isFI())
2855 return true;
2856
2857 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2858 // Can't cluster if the instruction modifies the base register
2859 // or it is update form. e.g. ld r2,3(r2)
2860 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2861 return false;
2862
2863 return true;
2864}
2865
2866// Only cluster instruction pair that have the same opcode, and they are
2867// clusterable according to PowerPC specification.
2868static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2869 const PPCSubtarget &Subtarget) {
2870 switch (FirstOpc) {
2871 default:
2872 return false;
2873 case PPC::STD:
2874 case PPC::STFD:
2875 case PPC::STXSD:
2876 case PPC::DFSTOREf64:
2877 return FirstOpc == SecondOpc;
2878 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2879 // 32bit and 64bit instruction selection. They are clusterable pair though
2880 // they are different opcode.
2881 case PPC::STW:
2882 case PPC::STW8:
2883 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2884 }
2885}
2886
2888 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2889 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2890 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2891 unsigned NumBytes) const {
2892
2893 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2894 const MachineOperand &BaseOp1 = *BaseOps1.front();
2895 const MachineOperand &BaseOp2 = *BaseOps2.front();
2896 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2897 "Only base registers and frame indices are supported.");
2898
2899 // ClusterSize means the number of memory operations that will have been
2900 // clustered if this hook returns true.
2901 // Don't cluster memory op if there are already two ops clustered at least.
2902 if (ClusterSize > 2)
2903 return false;
2904
2905 // Cluster the load/store only when they have the same base
2906 // register or FI.
2907 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2908 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2909 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2910 return false;
2911
2912 // Check if the load/store are clusterable according to the PowerPC
2913 // specification.
2914 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2915 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2916 unsigned FirstOpc = FirstLdSt.getOpcode();
2917 unsigned SecondOpc = SecondLdSt.getOpcode();
2919 // Cluster the load/store only when they have the same opcode, and they are
2920 // clusterable opcode according to PowerPC specification.
2921 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2922 return false;
2923
2924 // Can't cluster load/store that have ordered or volatile memory reference.
2925 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2926 !isLdStSafeToCluster(SecondLdSt, TRI))
2927 return false;
2928
2929 int64_t Offset1 = 0, Offset2 = 0;
2930 LocationSize Width1 = 0, Width2 = 0;
2931 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2932 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2933 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2934 Width1 != Width2)
2935 return false;
2936
2937 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2938 "getMemOperandWithOffsetWidth return incorrect base op");
2939 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2940 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2941 return Offset1 + (int64_t)Width1.getValue() == Offset2;
2942}
2943
2944/// GetInstSize - Return the number of bytes of code the specified
2945/// instruction may be. This returns the maximum number of bytes.
2946///
2948 unsigned Opcode = MI.getOpcode();
2949
2950 if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
2951 const MachineFunction *MF = MI.getParent()->getParent();
2952 const char *AsmStr = MI.getOperand(0).getSymbolName();
2953 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
2954 } else if (Opcode == TargetOpcode::STACKMAP) {
2955 StackMapOpers Opers(&MI);
2956 return Opers.getNumPatchBytes();
2957 } else if (Opcode == TargetOpcode::PATCHPOINT) {
2958 PatchPointOpers Opers(&MI);
2959 return Opers.getNumPatchBytes();
2960 } else {
2961 return get(Opcode).getSize();
2962 }
2963}
2964
2965std::pair<unsigned, unsigned>
2967 // PPC always uses a direct mask.
2968 return std::make_pair(TF, 0u);
2969}
2970
2973 using namespace PPCII;
2974 static const std::pair<unsigned, const char *> TargetFlags[] = {
2975 {MO_PLT, "ppc-plt"},
2976 {MO_PIC_FLAG, "ppc-pic"},
2977 {MO_PCREL_FLAG, "ppc-pcrel"},
2978 {MO_GOT_FLAG, "ppc-got"},
2979 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
2980 {MO_TLSGD_FLAG, "ppc-tlsgd"},
2981 {MO_TPREL_FLAG, "ppc-tprel"},
2982 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
2983 {MO_TLSLD_FLAG, "ppc-tlsld"},
2984 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
2985 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
2986 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
2987 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
2988 {MO_LO, "ppc-lo"},
2989 {MO_HA, "ppc-ha"},
2990 {MO_TPREL_LO, "ppc-tprel-lo"},
2991 {MO_TPREL_HA, "ppc-tprel-ha"},
2992 {MO_DTPREL_LO, "ppc-dtprel-lo"},
2993 {MO_TLSLD_LO, "ppc-tlsld-lo"},
2994 {MO_TOC_LO, "ppc-toc-lo"},
2995 {MO_TLS, "ppc-tls"},
2996 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
2997 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
2998 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
2999 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3000 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3001 };
3002 return ArrayRef(TargetFlags);
3003}
3004
3005// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3006// The VSX versions have the advantage of a full 64-register target whereas
3007// the FP ones have the advantage of lower latency and higher throughput. So
3008// what we are after is using the faster instructions in low register pressure
3009// situations and using the larger register file in high register pressure
3010// situations.
3012 unsigned UpperOpcode, LowerOpcode;
3013 switch (MI.getOpcode()) {
3014 case PPC::DFLOADf32:
3015 UpperOpcode = PPC::LXSSP;
3016 LowerOpcode = PPC::LFS;
3017 break;
3018 case PPC::DFLOADf64:
3019 UpperOpcode = PPC::LXSD;
3020 LowerOpcode = PPC::LFD;
3021 break;
3022 case PPC::DFSTOREf32:
3023 UpperOpcode = PPC::STXSSP;
3024 LowerOpcode = PPC::STFS;
3025 break;
3026 case PPC::DFSTOREf64:
3027 UpperOpcode = PPC::STXSD;
3028 LowerOpcode = PPC::STFD;
3029 break;
3030 case PPC::XFLOADf32:
3031 UpperOpcode = PPC::LXSSPX;
3032 LowerOpcode = PPC::LFSX;
3033 break;
3034 case PPC::XFLOADf64:
3035 UpperOpcode = PPC::LXSDX;
3036 LowerOpcode = PPC::LFDX;
3037 break;
3038 case PPC::XFSTOREf32:
3039 UpperOpcode = PPC::STXSSPX;
3040 LowerOpcode = PPC::STFSX;
3041 break;
3042 case PPC::XFSTOREf64:
3043 UpperOpcode = PPC::STXSDX;
3044 LowerOpcode = PPC::STFDX;
3045 break;
3046 case PPC::LIWAX:
3047 UpperOpcode = PPC::LXSIWAX;
3048 LowerOpcode = PPC::LFIWAX;
3049 break;
3050 case PPC::LIWZX:
3051 UpperOpcode = PPC::LXSIWZX;
3052 LowerOpcode = PPC::LFIWZX;
3053 break;
3054 case PPC::STIWX:
3055 UpperOpcode = PPC::STXSIWX;
3056 LowerOpcode = PPC::STFIWX;
3057 break;
3058 default:
3059 llvm_unreachable("Unknown Operation!");
3060 }
3061
3062 Register TargetReg = MI.getOperand(0).getReg();
3063 unsigned Opcode;
3064 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3065 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3066 Opcode = LowerOpcode;
3067 else
3068 Opcode = UpperOpcode;
3069 MI.setDesc(get(Opcode));
3070 return true;
3071}
3072
3073static bool isAnImmediateOperand(const MachineOperand &MO) {
3074 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3075}
3076
3078 auto &MBB = *MI.getParent();
3079 auto DL = MI.getDebugLoc();
3080
3081 switch (MI.getOpcode()) {
3082 case PPC::BUILD_UACC: {
3083 MCRegister ACC = MI.getOperand(0).getReg();
3084 MCRegister UACC = MI.getOperand(1).getReg();
3085 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3086 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3087 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3088 // FIXME: This can easily be improved to look up to the top of the MBB
3089 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3090 // we can just re-target any such XXLOR's to DstVSR + offset.
3091 for (int VecNo = 0; VecNo < 4; VecNo++)
3092 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3093 .addReg(SrcVSR + VecNo)
3094 .addReg(SrcVSR + VecNo);
3095 }
3096 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3097 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3098 // with a NOP.
3099 [[fallthrough]];
3100 }
3101 case PPC::KILL_PAIR: {
3102 MI.setDesc(get(PPC::UNENCODED_NOP));
3103 MI.removeOperand(1);
3104 MI.removeOperand(0);
3105 return true;
3106 }
3107 case TargetOpcode::LOAD_STACK_GUARD: {
3108 assert(Subtarget.isTargetLinux() &&
3109 "Only Linux target is expected to contain LOAD_STACK_GUARD");
3110 const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3111 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3112 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3113 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3114 .addImm(Offset)
3115 .addReg(Reg);
3116 return true;
3117 }
3118 case PPC::PPCLdFixedAddr: {
3119 assert(Subtarget.getTargetTriple().isOSGlibc() &&
3120 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3121 int64_t Offset = 0;
3122 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3123 MI.setDesc(get(PPC::LWZ));
3124 uint64_t FAType = MI.getOperand(1).getImm();
3125#undef PPC_LNX_FEATURE
3126#undef PPC_LNX_CPU
3127#define PPC_LNX_DEFINE_OFFSETS
3128#include "llvm/TargetParser/PPCTargetParser.def"
3129 bool IsLE = Subtarget.isLittleEndian();
3130 bool Is64 = Subtarget.isPPC64();
3131 if (FAType == PPC_FAWORD_HWCAP) {
3132 if (IsLE)
3133 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3134 else
3135 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3136 } else if (FAType == PPC_FAWORD_HWCAP2) {
3137 if (IsLE)
3138 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3139 else
3140 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3141 } else if (FAType == PPC_FAWORD_CPUID) {
3142 if (IsLE)
3143 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3144 else
3145 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3146 }
3147 assert(Offset && "Do not know the offset for this fixed addr load");
3148 MI.removeOperand(1);
3150 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3151 .addImm(Offset)
3152 .addReg(Reg);
3153 return true;
3154#define PPC_TGT_PARSER_UNDEF_MACROS
3155#include "llvm/TargetParser/PPCTargetParser.def"
3156#undef PPC_TGT_PARSER_UNDEF_MACROS
3157 }
3158 case PPC::DFLOADf32:
3159 case PPC::DFLOADf64:
3160 case PPC::DFSTOREf32:
3161 case PPC::DFSTOREf64: {
3162 assert(Subtarget.hasP9Vector() &&
3163 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3164 assert(MI.getOperand(2).isReg() &&
3165 isAnImmediateOperand(MI.getOperand(1)) &&
3166 "D-form op must have register and immediate operands");
3167 return expandVSXMemPseudo(MI);
3168 }
3169 case PPC::XFLOADf32:
3170 case PPC::XFSTOREf32:
3171 case PPC::LIWAX:
3172 case PPC::LIWZX:
3173 case PPC::STIWX: {
3174 assert(Subtarget.hasP8Vector() &&
3175 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3176 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3177 "X-form op must have register and register operands");
3178 return expandVSXMemPseudo(MI);
3179 }
3180 case PPC::XFLOADf64:
3181 case PPC::XFSTOREf64: {
3182 assert(Subtarget.hasVSX() &&
3183 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3184 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3185 "X-form op must have register and register operands");
3186 return expandVSXMemPseudo(MI);
3187 }
3188 case PPC::SPILLTOVSR_LD: {
3189 Register TargetReg = MI.getOperand(0).getReg();
3190 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3191 MI.setDesc(get(PPC::DFLOADf64));
3192 return expandPostRAPseudo(MI);
3193 }
3194 else
3195 MI.setDesc(get(PPC::LD));
3196 return true;
3197 }
3198 case PPC::SPILLTOVSR_ST: {
3199 Register SrcReg = MI.getOperand(0).getReg();
3200 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3201 NumStoreSPILLVSRRCAsVec++;
3202 MI.setDesc(get(PPC::DFSTOREf64));
3203 return expandPostRAPseudo(MI);
3204 } else {
3205 NumStoreSPILLVSRRCAsGpr++;
3206 MI.setDesc(get(PPC::STD));
3207 }
3208 return true;
3209 }
3210 case PPC::SPILLTOVSR_LDX: {
3211 Register TargetReg = MI.getOperand(0).getReg();
3212 if (PPC::VSFRCRegClass.contains(TargetReg))
3213 MI.setDesc(get(PPC::LXSDX));
3214 else
3215 MI.setDesc(get(PPC::LDX));
3216 return true;
3217 }
3218 case PPC::SPILLTOVSR_STX: {
3219 Register SrcReg = MI.getOperand(0).getReg();
3220 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3221 NumStoreSPILLVSRRCAsVec++;
3222 MI.setDesc(get(PPC::STXSDX));
3223 } else {
3224 NumStoreSPILLVSRRCAsGpr++;
3225 MI.setDesc(get(PPC::STDX));
3226 }
3227 return true;
3228 }
3229
3230 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3231 case PPC::CFENCE:
3232 case PPC::CFENCE8: {
3233 auto Val = MI.getOperand(0).getReg();
3234 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3235 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3236 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3238 .addReg(PPC::CR7)
3239 .addImm(1);
3240 MI.setDesc(get(PPC::ISYNC));
3241 MI.removeOperand(0);
3242 return true;
3243 }
3244 }
3245 return false;
3246}
3247
3248// Essentially a compile-time implementation of a compare->isel sequence.
3249// It takes two constants to compare, along with the true/false registers
3250// and the comparison type (as a subreg to a CR field) and returns one
3251// of the true/false registers, depending on the comparison results.
3252static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3253 unsigned TrueReg, unsigned FalseReg,
3254 unsigned CRSubReg) {
3255 // Signed comparisons. The immediates are assumed to be sign-extended.
3256 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3257 switch (CRSubReg) {
3258 default: llvm_unreachable("Unknown integer comparison type.");
3259 case PPC::sub_lt:
3260 return Imm1 < Imm2 ? TrueReg : FalseReg;
3261 case PPC::sub_gt:
3262 return Imm1 > Imm2 ? TrueReg : FalseReg;
3263 case PPC::sub_eq:
3264 return Imm1 == Imm2 ? TrueReg : FalseReg;
3265 }
3266 }
3267 // Unsigned comparisons.
3268 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3269 switch (CRSubReg) {
3270 default: llvm_unreachable("Unknown integer comparison type.");
3271 case PPC::sub_lt:
3272 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3273 case PPC::sub_gt:
3274 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3275 case PPC::sub_eq:
3276 return Imm1 == Imm2 ? TrueReg : FalseReg;
3277 }
3278 }
3279 return PPC::NoRegister;
3280}
3281
3283 unsigned OpNo,
3284 int64_t Imm) const {
3285 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3286 // Replace the REG with the Immediate.
3287 Register InUseReg = MI.getOperand(OpNo).getReg();
3288 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3289
3290 // We need to make sure that the MI didn't have any implicit use
3291 // of this REG any more. We don't call MI.implicit_operands().empty() to
3292 // return early, since MI's MCID might be changed in calling context, as a
3293 // result its number of explicit operands may be changed, thus the begin of
3294 // implicit operand is changed.
3296 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
3297 if (UseOpIdx >= 0) {
3298 MachineOperand &MO = MI.getOperand(UseOpIdx);
3299 if (MO.isImplicit())
3300 // The operands must always be in the following order:
3301 // - explicit reg defs,
3302 // - other explicit operands (reg uses, immediates, etc.),
3303 // - implicit reg defs
3304 // - implicit reg uses
3305 // Therefore, removing the implicit operand won't change the explicit
3306 // operands layout.
3307 MI.removeOperand(UseOpIdx);
3308 }
3309}
3310
3311// Replace an instruction with one that materializes a constant (and sets
3312// CR0 if the original instruction was a record-form instruction).
3314 const LoadImmediateInfo &LII) const {
3315 // Remove existing operands.
3316 int OperandToKeep = LII.SetCR ? 1 : 0;
3317 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3318 MI.removeOperand(i);
3319
3320 // Replace the instruction.
3321 if (LII.SetCR) {
3322 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3323 // Set the immediate.
3324 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3325 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3326 return;
3327 }
3328 else
3329 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3330
3331 // Set the immediate.
3332 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3333 .addImm(LII.Imm);
3334}
3335
3337 bool &SeenIntermediateUse) const {
3338 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3339 "Should be called after register allocation.");
3341 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3342 It++;
3343 SeenIntermediateUse = false;
3344 for (; It != E; ++It) {
3345 if (It->modifiesRegister(Reg, TRI))
3346 return &*It;
3347 if (It->readsRegister(Reg, TRI))
3348 SeenIntermediateUse = true;
3349 }
3350 return nullptr;
3351}
3352
3355 const DebugLoc &DL, Register Reg,
3356 int64_t Imm) const {
3358 "Register should be in non-SSA form after RA");
3359 bool isPPC64 = Subtarget.isPPC64();
3360 // FIXME: Materialization here is not optimal.
3361 // For some special bit patterns we can use less instructions.
3362 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3363 if (isInt<16>(Imm)) {
3364 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3365 } else if (isInt<32>(Imm)) {
3366 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3367 .addImm(Imm >> 16);
3368 if (Imm & 0xFFFF)
3369 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3370 .addReg(Reg, RegState::Kill)
3371 .addImm(Imm & 0xFFFF);
3372 } else {
3373 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3374 "only supported in PPC64");
3375 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3376 if ((Imm >> 32) & 0xFFFF)
3377 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3378 .addReg(Reg, RegState::Kill)
3379 .addImm((Imm >> 32) & 0xFFFF);
3380 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3381 .addReg(Reg, RegState::Kill)
3382 .addImm(32)
3383 .addImm(31);
3384 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3385 .addReg(Reg, RegState::Kill)
3386 .addImm((Imm >> 16) & 0xFFFF);
3387 if (Imm & 0xFFFF)
3388 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3389 .addReg(Reg, RegState::Kill)
3390 .addImm(Imm & 0xFFFF);
3391 }
3392}
3393
3394MachineInstr *PPCInstrInfo::getForwardingDefMI(
3396 unsigned &OpNoForForwarding,
3397 bool &SeenIntermediateUse) const {
3398 OpNoForForwarding = ~0U;
3399 MachineInstr *DefMI = nullptr;
3400 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3402 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3403 // within the basic block to see if the register is defined using an
3404 // LI/LI8/ADDI/ADDI8.
3405 if (MRI->isSSA()) {
3406 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3407 if (!MI.getOperand(i).isReg())
3408 continue;
3409 Register Reg = MI.getOperand(i).getReg();
3410 if (!Reg.isVirtual())
3411 continue;
3412 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3413 if (TrueReg.isVirtual()) {
3414 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3415 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3416 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3417 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3418 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3419 OpNoForForwarding = i;
3420 DefMI = DefMIForTrueReg;
3421 // The ADDI and LI operand maybe exist in one instruction at same
3422 // time. we prefer to fold LI operand as LI only has one Imm operand
3423 // and is more possible to be converted. So if current DefMI is
3424 // ADDI/ADDI8, we continue to find possible LI/LI8.
3425 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3426 break;
3427 }
3428 }
3429 }
3430 } else {
3431 // Looking back through the definition for each operand could be expensive,
3432 // so exit early if this isn't an instruction that either has an immediate
3433 // form or is already an immediate form that we can handle.
3434 ImmInstrInfo III;
3435 unsigned Opc = MI.getOpcode();
3436 bool ConvertibleImmForm =
3437 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3438 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3439 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3440 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3441 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3442 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3443 Opc == PPC::RLWINM8_rec;
3444 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3445 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3446 : false;
3447 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3448 return nullptr;
3449
3450 // Don't convert or %X, %Y, %Y since that's just a register move.
3451 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3452 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3453 return nullptr;
3454 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3455 MachineOperand &MO = MI.getOperand(i);
3456 SeenIntermediateUse = false;
3457 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3458 Register Reg = MI.getOperand(i).getReg();
3459 // If we see another use of this reg between the def and the MI,
3460 // we want to flag it so the def isn't deleted.
3461 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3462 if (DefMI) {
3463 // Is this register defined by some form of add-immediate (including
3464 // load-immediate) within this basic block?
3465 switch (DefMI->getOpcode()) {
3466 default:
3467 break;
3468 case PPC::LI:
3469 case PPC::LI8:
3470 case PPC::ADDItocL8:
3471 case PPC::ADDI:
3472 case PPC::ADDI8:
3473 OpNoForForwarding = i;
3474 return DefMI;
3475 }
3476 }
3477 }
3478 }
3479 }
3480 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3481}
3482
3483unsigned PPCInstrInfo::getSpillTarget() const {
3484 // With P10, we may need to spill paired vector registers or accumulator
3485 // registers. MMA implies paired vectors, so we can just check that.
3486 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3487 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3488 2 : Subtarget.hasP9Vector() ?
3489 1 : 0;
3490}
3491
3492ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3493 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3494}
3495
3496ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3497 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3498}
3499
3500// This opt tries to convert the following imm form to an index form to save an
3501// add for stack variables.
3502// Return false if no such pattern found.
3503//
3504// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3505// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3506// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3507//
3508// can be converted to:
3509//
3510// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3511// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3512//
3513// In order to eliminate ADD instr, make sure that:
3514// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3515// new ADDI instr and ADDI can only take int16 Imm.
3516// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3517// between ADDI and ADD instr since its original def in ADDI will be changed
3518// in new ADDI instr. And also there should be no new def for it between
3519// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3520// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3521// between ADD and Imm instr since ADD instr will be eliminated.
3522// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3523// moved to Index instr.
3525 MachineFunction *MF = MI.getParent()->getParent();
3527 bool PostRA = !MRI->isSSA();
3528 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3529 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3530 // frame base(OffsetAddi) are determined.
3531 if (!PostRA)
3532 return false;
3533 unsigned ToBeDeletedReg = 0;
3534 int64_t OffsetImm = 0;
3535 unsigned XFormOpcode = 0;
3536 ImmInstrInfo III;
3537
3538 // Check if Imm instr meets requirement.
3539 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3540 III))
3541 return false;
3542
3543 bool OtherIntermediateUse = false;
3544 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3545
3546 // Exit if there is other use between ADD and Imm instr or no def found.
3547 if (OtherIntermediateUse || !ADDMI)
3548 return false;
3549
3550 // Check if ADD instr meets requirement.
3551 if (!isADDInstrEligibleForFolding(*ADDMI))
3552 return false;
3553
3554 unsigned ScaleRegIdx = 0;
3555 int64_t OffsetAddi = 0;
3556 MachineInstr *ADDIMI = nullptr;
3557
3558 // Check if there is a valid ToBeChangedReg in ADDMI.
3559 // 1: It must be killed.
3560 // 2: Its definition must be a valid ADDIMI.
3561 // 3: It must satify int16 offset requirement.
3562 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3563 ScaleRegIdx = 2;
3564 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3565 ScaleRegIdx = 1;
3566 else
3567 return false;
3568
3569 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3570 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3571 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3572 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3574 for (auto It = ++Start; It != End; It++)
3575 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3576 return true;
3577 return false;
3578 };
3579
3580 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3581 // treated as special zero when ScaleReg is R0/X0 register.
3582 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3583 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3584 return false;
3585
3586 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3587 // and Imm Instr.
3588 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3589 return false;
3590
3591 // Now start to do the transformation.
3592 LLVM_DEBUG(dbgs() << "Replace instruction: "
3593 << "\n");
3594 LLVM_DEBUG(ADDIMI->dump());
3595 LLVM_DEBUG(ADDMI->dump());
3596 LLVM_DEBUG(MI.dump());
3597 LLVM_DEBUG(dbgs() << "with: "
3598 << "\n");
3599
3600 // Update ADDI instr.
3601 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3602
3603 // Update Imm instr.
3604 MI.setDesc(get(XFormOpcode));
3605 MI.getOperand(III.ImmOpNo)
3606 .ChangeToRegister(ScaleReg, false, false,
3607 ADDMI->getOperand(ScaleRegIdx).isKill());
3608
3609 MI.getOperand(III.OpNoForForwarding)
3610 .ChangeToRegister(ToBeChangedReg, false, false, true);
3611
3612 // Eliminate ADD instr.
3613 ADDMI->eraseFromParent();
3614
3615 LLVM_DEBUG(ADDIMI->dump());
3616 LLVM_DEBUG(MI.dump());
3617
3618 return true;
3619}
3620
3622 int64_t &Imm) const {
3623 unsigned Opc = ADDIMI.getOpcode();
3624
3625 // Exit if the instruction is not ADDI.
3626 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3627 return false;
3628
3629 // The operand may not necessarily be an immediate - it could be a relocation.
3630 if (!ADDIMI.getOperand(2).isImm())
3631 return false;
3632
3633 Imm = ADDIMI.getOperand(2).getImm();
3634
3635 return true;
3636}
3637
3639 unsigned Opc = ADDMI.getOpcode();
3640
3641 // Exit if the instruction is not ADD.
3642 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3643}
3644
3646 unsigned &ToBeDeletedReg,
3647 unsigned &XFormOpcode,
3648 int64_t &OffsetImm,
3649 ImmInstrInfo &III) const {
3650 // Only handle load/store.
3651 if (!MI.mayLoadOrStore())
3652 return false;
3653
3654 unsigned Opc = MI.getOpcode();
3655
3656 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3657
3658 // Exit if instruction has no index form.
3659 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3660 return false;
3661
3662 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3663 if (!instrHasImmForm(XFormOpcode,
3664 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3665 return false;
3666
3667 if (!III.IsSummingOperands)
3668 return false;
3669
3670 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3671 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3672 // Only support imm operands, not relocation slots or others.
3673 if (!ImmOperand.isImm())
3674 return false;
3675
3676 assert(RegOperand.isReg() && "Instruction format is not right");
3677
3678 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3679 if (!RegOperand.isKill())
3680 return false;
3681
3682 ToBeDeletedReg = RegOperand.getReg();
3683 OffsetImm = ImmOperand.getImm();
3684
3685 return true;
3686}
3687
3689 MachineInstr *&ADDIMI,
3690 int64_t &OffsetAddi,
3691 int64_t OffsetImm) const {
3692 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3693 MachineOperand &MO = ADDMI->getOperand(Index);
3694
3695 if (!MO.isKill())
3696 return false;
3697
3698 bool OtherIntermediateUse = false;
3699
3700 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3701 // Currently handle only one "add + Imminstr" pair case, exit if other
3702 // intermediate use for ToBeChangedReg found.
3703 // TODO: handle the cases where there are other "add + Imminstr" pairs
3704 // with same offset in Imminstr which is like:
3705 //
3706 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3707 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3708 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3709 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3710 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3711 //
3712 // can be converted to:
3713 //
3714 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3715 // (OffsetAddi + OffsetImm)
3716 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3717 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3718
3719 if (OtherIntermediateUse || !ADDIMI)
3720 return false;
3721 // Check if ADDI instr meets requirement.
3722 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3723 return false;
3724
3725 if (isInt<16>(OffsetAddi + OffsetImm))
3726 return true;
3727 return false;
3728}
3729
3730// If this instruction has an immediate form and one of its operands is a
3731// result of a load-immediate or an add-immediate, convert it to
3732// the immediate form if the constant is in range.
3734 SmallSet<Register, 4> &RegsToUpdate,
3735 MachineInstr **KilledDef) const {
3736 MachineFunction *MF = MI.getParent()->getParent();
3738 bool PostRA = !MRI->isSSA();
3739 bool SeenIntermediateUse = true;
3740 unsigned ForwardingOperand = ~0U;
3741 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3742 SeenIntermediateUse);
3743 if (!DefMI)
3744 return false;
3745 assert(ForwardingOperand < MI.getNumOperands() &&
3746 "The forwarding operand needs to be valid at this point");
3747 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3748 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3749 if (KilledDef && KillFwdDefMI)
3750 *KilledDef = DefMI;
3751
3752 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3753 // registers that need their kill flags updated.
3754 for (const MachineOperand &MO : DefMI->operands())
3755 if (MO.isReg() && MO.isDef())
3756 RegsToUpdate.insert(MO.getReg());
3757 for (const MachineOperand &MO : MI.operands())
3758 if (MO.isReg())
3759 RegsToUpdate.insert(MO.getReg());
3760
3761 // If this is a imm instruction and its register operands is produced by ADDI,
3762 // put the imm into imm inst directly.
3763 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3764 PPC::INSTRUCTION_LIST_END &&
3765 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3766 return true;
3767
3768 ImmInstrInfo III;
3769 bool IsVFReg = MI.getOperand(0).isReg()
3770 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3771 : false;
3772 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3773 // If this is a reg+reg instruction that has a reg+imm form,
3774 // and one of the operands is produced by an add-immediate,
3775 // try to convert it.
3776 if (HasImmForm &&
3777 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3778 KillFwdDefMI))
3779 return true;
3780
3781 // If this is a reg+reg instruction that has a reg+imm form,
3782 // and one of the operands is produced by LI, convert it now.
3783 if (HasImmForm &&
3784 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3785 return true;
3786
3787 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3788 // can be simpified to LI.
3789 if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
3790 return true;
3791
3792 return false;
3793}
3794
3796 MachineInstr **ToErase) const {
3797 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3798 Register FoldingReg = MI.getOperand(1).getReg();
3799 if (!FoldingReg.isVirtual())
3800 return false;
3801 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3802 if (SrcMI->getOpcode() != PPC::RLWINM &&
3803 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3804 SrcMI->getOpcode() != PPC::RLWINM8 &&
3805 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3806 return false;
3807 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3808 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3809 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3810 "Invalid PPC::RLWINM Instruction!");
3811 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3812 uint64_t SHMI = MI.getOperand(2).getImm();
3813 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3814 uint64_t MBMI = MI.getOperand(3).getImm();
3815 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3816 uint64_t MEMI = MI.getOperand(4).getImm();
3817
3818 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3819 "Invalid PPC::RLWINM Instruction!");
3820 // If MBMI is bigger than MEMI, we always can not get run of ones.
3821 // RotatedSrcMask non-wrap:
3822 // 0........31|32........63
3823 // RotatedSrcMask: B---E B---E
3824 // MaskMI: -----------|--E B------
3825 // Result: ----- --- (Bad candidate)
3826 //
3827 // RotatedSrcMask wrap:
3828 // 0........31|32........63
3829 // RotatedSrcMask: --E B----|--E B----
3830 // MaskMI: -----------|--E B------
3831 // Result: --- -----|--- ----- (Bad candidate)
3832 //
3833 // One special case is RotatedSrcMask is a full set mask.
3834 // RotatedSrcMask full:
3835 // 0........31|32........63
3836 // RotatedSrcMask: ------EB---|-------EB---
3837 // MaskMI: -----------|--E B------
3838 // Result: -----------|--- ------- (Good candidate)
3839
3840 // Mark special case.
3841 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3842
3843 // For other MBMI > MEMI cases, just return.
3844 if ((MBMI > MEMI) && !SrcMaskFull)
3845 return false;
3846
3847 // Handle MBMI <= MEMI cases.
3848 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3849 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3850 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3851 // while in PowerPC ISA, lowerest bit is at index 63.
3852 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3853
3854 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3855 APInt FinalMask = RotatedSrcMask & MaskMI;
3856 uint32_t NewMB, NewME;
3857 bool Simplified = false;
3858
3859 // If final mask is 0, MI result should be 0 too.
3860 if (FinalMask.isZero()) {
3861 bool Is64Bit =
3862 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3863 Simplified = true;
3864 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3865 LLVM_DEBUG(MI.dump());
3866
3867 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3868 // Replace MI with "LI 0"
3869 MI.removeOperand(4);
3870 MI.removeOperand(3);
3871 MI.removeOperand(2);
3872 MI.getOperand(1).ChangeToImmediate(0);
3873 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3874 } else {
3875 // Replace MI with "ANDI_rec reg, 0"
3876 MI.removeOperand(4);
3877 MI.removeOperand(3);
3878 MI.getOperand(2).setImm(0);
3879 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3880 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3881 if (SrcMI->getOperand(1).isKill()) {
3882 MI.getOperand(1).setIsKill(true);
3883 SrcMI->getOperand(1).setIsKill(false);
3884 } else
3885 // About to replace MI.getOperand(1), clear its kill flag.
3886 MI.getOperand(1).setIsKill(false);
3887 }
3888
3889 LLVM_DEBUG(dbgs() << "With: ");
3890 LLVM_DEBUG(MI.dump());
3891
3892 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3893 NewMB <= NewME) ||
3894 SrcMaskFull) {
3895 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3896 // than NewME. Otherwise we get a 64 bit value after folding, but MI
3897 // return a 32 bit value.
3898 Simplified = true;
3899 LLVM_DEBUG(dbgs() << "Converting Instr: ");
3900 LLVM_DEBUG(MI.dump());
3901
3902 uint16_t NewSH = (SHSrc + SHMI) % 32;
3903 MI.getOperand(2).setImm(NewSH);
3904 // If SrcMI mask is full, no need to update MBMI and MEMI.
3905 if (!SrcMaskFull) {
3906 MI.getOperand(3).setImm(NewMB);
3907 MI.getOperand(4).setImm(NewME);
3908 }
3909 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3910 if (SrcMI->getOperand(1).isKill()) {
3911 MI.getOperand(1).setIsKill(true);
3912 SrcMI->getOperand(1).setIsKill(false);
3913 } else
3914 // About to replace MI.getOperand(1), clear its kill flag.
3915 MI.getOperand(1).setIsKill(false);
3916
3917 LLVM_DEBUG(dbgs() << "To: ");
3918 LLVM_DEBUG(MI.dump());
3919 }
3920 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
3921 !SrcMI->hasImplicitDef()) {
3922 // If FoldingReg has no non-debug use and it has no implicit def (it
3923 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3924 // Otherwise keep it.
3925 *ToErase = SrcMI;
3926 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3927 LLVM_DEBUG(SrcMI->dump());
3928 }
3929 return Simplified;
3930}
3931
3932bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3933 ImmInstrInfo &III, bool PostRA) const {
3934 // The vast majority of the instructions would need their operand 2 replaced
3935 // with an immediate when switching to the reg+imm form. A marked exception
3936 // are the update form loads/stores for which a constant operand 2 would need
3937 // to turn into a displacement and move operand 1 to the operand 2 position.
3938 III.ImmOpNo = 2;
3939 III.OpNoForForwarding = 2;
3940 III.ImmWidth = 16;
3941 III.ImmMustBeMultipleOf = 1;
3942 III.TruncateImmTo = 0;
3943 III.IsSummingOperands = false;
3944 switch (Opc) {
3945 default: return false;
3946 case PPC::ADD4:
3947 case PPC::ADD8:
3948 III.SignedImm = true;
3949 III.ZeroIsSpecialOrig = 0;
3950 III.ZeroIsSpecialNew = 1;
3951 III.IsCommutative = true;
3952 III.IsSummingOperands = true;
3953 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3954 break;
3955 case PPC::ADDC:
3956 case PPC::ADDC8:
3957 III.SignedImm = true;
3958 III.ZeroIsSpecialOrig = 0;
3959 III.ZeroIsSpecialNew = 0;
3960 III.IsCommutative = true;
3961 III.IsSummingOperands = true;
3962 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3963 break;
3964 case PPC::ADDC_rec:
3965 III.SignedImm = true;
3966 III.ZeroIsSpecialOrig = 0;
3967 III.ZeroIsSpecialNew = 0;
3968 III.IsCommutative = true;
3969 III.IsSummingOperands = true;
3970 III.ImmOpcode = PPC::ADDIC_rec;
3971 break;
3972 case PPC::SUBFC:
3973 case PPC::SUBFC8:
3974 III.SignedImm = true;
3975 III.ZeroIsSpecialOrig = 0;
3976 III.ZeroIsSpecialNew = 0;
3977 III.IsCommutative = false;
3978 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
3979 break;
3980 case PPC::CMPW:
3981 case PPC::CMPD:
3982 III.SignedImm = true;
3983 III.ZeroIsSpecialOrig = 0;
3984 III.ZeroIsSpecialNew = 0;
3985 III.IsCommutative = false;
3986 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
3987 break;
3988 case PPC::CMPLW:
3989 case PPC::CMPLD:
3990 III.SignedImm = false;
3991 III.ZeroIsSpecialOrig = 0;
3992 III.ZeroIsSpecialNew = 0;
3993 III.IsCommutative = false;
3994 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
3995 break;
3996 case PPC::AND_rec:
3997 case PPC::AND8_rec:
3998 case PPC::OR:
3999 case PPC::OR8:
4000 case PPC::XOR:
4001 case PPC::XOR8:
4002 III.SignedImm = false;
4003 III.ZeroIsSpecialOrig = 0;
4004 III.ZeroIsSpecialNew = 0;
4005 III.IsCommutative = true;
4006 switch(Opc) {
4007 default: llvm_unreachable("Unknown opcode");
4008 case PPC::AND_rec:
4009 III.ImmOpcode = PPC::ANDI_rec;
4010 break;
4011 case PPC::AND8_rec:
4012 III.ImmOpcode = PPC::ANDI8_rec;
4013 break;
4014 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4015 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4016 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4017 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4018 }
4019 break;
4020 case PPC::RLWNM:
4021 case PPC::RLWNM8:
4022 case PPC::RLWNM_rec:
4023 case PPC::RLWNM8_rec:
4024 case PPC::SLW:
4025 case PPC::SLW8:
4026 case PPC::SLW_rec:
4027 case PPC::SLW8_rec:
4028 case PPC::SRW:
4029 case PPC::SRW8:
4030 case PPC::SRW_rec:
4031 case PPC::SRW8_rec:
4032 case PPC::SRAW:
4033 case PPC::SRAW_rec:
4034 III.SignedImm = false;
4035 III.ZeroIsSpecialOrig = 0;
4036 III.ZeroIsSpecialNew = 0;
4037 III.IsCommutative = false;
4038 // This isn't actually true, but the instructions ignore any of the
4039 // upper bits, so any immediate loaded with an LI is acceptable.
4040 // This does not apply to shift right algebraic because a value
4041 // out of range will produce a -1/0.
4042 III.ImmWidth = 16;
4043 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4044 Opc == PPC::RLWNM8_rec)
4045 III.TruncateImmTo = 5;
4046 else
4047 III.TruncateImmTo = 6;
4048 switch(Opc) {
4049 default: llvm_unreachable("Unknown opcode");
4050 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4051 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4052 case PPC::RLWNM_rec:
4053 III.ImmOpcode = PPC::RLWINM_rec;
4054 break;
4055 case PPC::RLWNM8_rec:
4056 III.ImmOpcode = PPC::RLWINM8_rec;
4057 break;
4058 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4059 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4060 case PPC::SLW_rec:
4061 III.ImmOpcode = PPC::RLWINM_rec;
4062 break;
4063 case PPC::SLW8_rec:
4064 III.ImmOpcode = PPC::RLWINM8_rec;
4065 break;
4066 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4067 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4068 case PPC::SRW_rec:
4069 III.ImmOpcode = PPC::RLWINM_rec;
4070 break;
4071 case PPC::SRW8_rec:
4072 III.ImmOpcode = PPC::RLWINM8_rec;
4073 break;
4074 case PPC::SRAW:
4075 III.ImmWidth = 5;
4076 III.TruncateImmTo = 0;
4077 III.ImmOpcode = PPC::SRAWI;
4078 break;
4079 case PPC::SRAW_rec:
4080 III.ImmWidth = 5;
4081 III.TruncateImmTo = 0;
4082 III.ImmOpcode = PPC::SRAWI_rec;
4083 break;
4084 }
4085 break;
4086 case PPC::RLDCL:
4087 case PPC::RLDCL_rec:
4088 case PPC::RLDCR:
4089 case PPC::RLDCR_rec:
4090 case PPC::SLD:
4091 case PPC::SLD_rec:
4092 case PPC::SRD:
4093 case PPC::SRD_rec:
4094 case PPC::SRAD:
4095 case PPC::SRAD_rec:
4096 III.SignedImm = false;
4097 III.ZeroIsSpecialOrig = 0;
4098 III.ZeroIsSpecialNew = 0;
4099 III.IsCommutative = false;
4100 // This isn't actually true, but the instructions ignore any of the
4101 // upper bits, so any immediate loaded with an LI is acceptable.
4102 // This does not apply to shift right algebraic because a value
4103 // out of range will produce a -1/0.
4104 III.ImmWidth = 16;
4105 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4106 Opc == PPC::RLDCR_rec)
4107 III.TruncateImmTo = 6;
4108 else
4109 III.TruncateImmTo = 7;
4110 switch(Opc) {
4111 default: llvm_unreachable("Unknown opcode");
4112 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4113 case PPC::RLDCL_rec:
4114 III.ImmOpcode = PPC::RLDICL_rec;
4115 break;
4116 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4117 case PPC::RLDCR_rec:
4118 III.ImmOpcode = PPC::RLDICR_rec;
4119 break;
4120 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4121 case PPC::SLD_rec:
4122 III.ImmOpcode = PPC::RLDICR_rec;
4123 break;
4124 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4125 case PPC::SRD_rec:
4126 III.ImmOpcode = PPC::RLDICL_rec;
4127 break;
4128 case PPC::SRAD:
4129 III.ImmWidth = 6;
4130 III.TruncateImmTo = 0;
4131 III.ImmOpcode = PPC::SRADI;
4132 break;
4133 case PPC::SRAD_rec:
4134 III.ImmWidth = 6;
4135 III.TruncateImmTo = 0;
4136 III.ImmOpcode = PPC::SRADI_rec;
4137 break;
4138 }
4139 break;
4140 // Loads and stores:
4141 case PPC::LBZX:
4142 case PPC::LBZX8:
4143 case PPC::LHZX:
4144 case PPC::LHZX8:
4145 case PPC::LHAX:
4146 case PPC::LHAX8:
4147 case PPC::LWZX:
4148 case PPC::LWZX8:
4149 case PPC::LWAX:
4150 case PPC::LDX:
4151 case PPC::LFSX:
4152 case PPC::LFDX:
4153 case PPC::STBX:
4154 case PPC::STBX8:
4155 case PPC::STHX:
4156 case PPC::STHX8:
4157 case PPC::STWX:
4158 case PPC::STWX8:
4159 case PPC::STDX:
4160 case PPC::STFSX:
4161 case PPC::STFDX:
4162 III.SignedImm = true;
4163 III.ZeroIsSpecialOrig = 1;
4164 III.ZeroIsSpecialNew = 2;
4165 III.IsCommutative = true;
4166 III.IsSummingOperands = true;
4167 III.ImmOpNo = 1;
4168 III.OpNoForForwarding = 2;
4169 switch(Opc) {
4170 default: llvm_unreachable("Unknown opcode");
4171 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4172 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4173 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4174 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4175 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4176 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4177 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4178 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4179 case PPC::LWAX:
4180 III.ImmOpcode = PPC::LWA;
4181 III.ImmMustBeMultipleOf = 4;
4182 break;
4183 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4184 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4185 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4186 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4187 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4188 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4189 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4190 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4191 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4192 case PPC::STDX:
4193 III.ImmOpcode = PPC::STD;
4194 III.ImmMustBeMultipleOf = 4;
4195 break;
4196 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4197 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4198 }
4199 break;
4200 case PPC::LBZUX:
4201 case PPC::LBZUX8:
4202 case PPC::LHZUX:
4203 case PPC::LHZUX8:
4204 case PPC::LHAUX:
4205 case PPC::LHAUX8:
4206 case PPC::LWZUX:
4207 case PPC::LWZUX8:
4208 case PPC::LDUX:
4209 case PPC::LFSUX:
4210 case PPC::LFDUX:
4211 case PPC::STBUX:
4212 case PPC::STBUX8:
4213 case PPC::STHUX:
4214 case PPC::STHUX8:
4215 case PPC::STWUX:
4216 case PPC::STWUX8:
4217 case PPC::STDUX:
4218 case PPC::STFSUX:
4219 case PPC::STFDUX:
4220 III.SignedImm = true;
4221 III.ZeroIsSpecialOrig = 2;
4222 III.ZeroIsSpecialNew = 3;
4223 III.IsCommutative = false;
4224 III.IsSummingOperands = true;
4225 III.ImmOpNo = 2;
4226 III.OpNoForForwarding = 3;
4227 switch(Opc) {
4228 default: llvm_unreachable("Unknown opcode");
4229 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4230 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4231 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4232 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4233 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4234 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4235 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4236 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4237 case PPC::LDUX:
4238 III.ImmOpcode = PPC::LDU;
4239 III.ImmMustBeMultipleOf = 4;
4240 break;
4241 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4242 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4243 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4244 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4245 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4246 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4247 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4248 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4249 case PPC::STDUX:
4250 III.ImmOpcode = PPC::STDU;
4251 III.ImmMustBeMultipleOf = 4;
4252 break;
4253 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4254 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4255 }
4256 break;
4257 // Power9 and up only. For some of these, the X-Form version has access to all
4258 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4259 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4260 // into or stored from is one of the VR registers.
4261 case PPC::LXVX:
4262 case PPC::LXSSPX:
4263 case PPC::LXSDX:
4264 case PPC::STXVX:
4265 case PPC::STXSSPX:
4266 case PPC::STXSDX:
4267 case PPC::XFLOADf32:
4268 case PPC::XFLOADf64:
4269 case PPC::XFSTOREf32:
4270 case PPC::XFSTOREf64:
4271 if (!Subtarget.hasP9Vector())
4272 return false;
4273 III.SignedImm = true;
4274 III.ZeroIsSpecialOrig = 1;
4275 III.ZeroIsSpecialNew = 2;
4276 III.IsCommutative = true;
4277 III.IsSummingOperands = true;
4278 III.ImmOpNo = 1;
4279 III.OpNoForForwarding = 2;
4280 III.ImmMustBeMultipleOf = 4;
4281 switch(Opc) {
4282 default: llvm_unreachable("Unknown opcode");
4283 case PPC::LXVX:
4284 III.ImmOpcode = PPC::LXV;
4285 III.ImmMustBeMultipleOf = 16;
4286 break;
4287 case PPC::LXSSPX:
4288 if (PostRA) {
4289 if (IsVFReg)
4290 III.ImmOpcode = PPC::LXSSP;
4291 else {
4292 III.ImmOpcode = PPC::LFS;
4293 III.ImmMustBeMultipleOf = 1;
4294 }
4295 break;
4296 }
4297 [[fallthrough]];
4298 case PPC::XFLOADf32:
4299 III.ImmOpcode = PPC::DFLOADf32;
4300 break;
4301 case PPC::LXSDX:
4302 if (PostRA) {
4303 if (IsVFReg)
4304 III.ImmOpcode = PPC::LXSD;
4305 else {
4306 III.ImmOpcode = PPC::LFD;
4307 III.ImmMustBeMultipleOf = 1;
4308 }
4309 break;
4310 }
4311 [[fallthrough]];
4312 case PPC::XFLOADf64:
4313 III.ImmOpcode = PPC::DFLOADf64;
4314 break;
4315 case PPC::STXVX:
4316 III.ImmOpcode = PPC::STXV;
4317 III.ImmMustBeMultipleOf = 16;
4318 break;
4319 case PPC::STXSSPX:
4320 if (PostRA) {
4321 if (IsVFReg)
4322 III.ImmOpcode = PPC::STXSSP;
4323 else {
4324 III.ImmOpcode = PPC::STFS;
4325 III.ImmMustBeMultipleOf = 1;
4326 }
4327 break;
4328 }
4329 [[fallthrough]];
4330 case PPC::XFSTOREf32:
4331 III.ImmOpcode = PPC::DFSTOREf32;
4332 break;
4333 case PPC::STXSDX:
4334 if (PostRA) {
4335 if (IsVFReg)
4336 III.ImmOpcode = PPC::STXSD;
4337 else {
4338 III.ImmOpcode = PPC::STFD;
4339 III.ImmMustBeMultipleOf = 1;
4340 }
4341 break;
4342 }
4343 [[fallthrough]];
4344 case PPC::XFSTOREf64:
4345 III.ImmOpcode = PPC::DFSTOREf64;
4346 break;
4347 }
4348 break;
4349 }
4350 return true;
4351}
4352
4353// Utility function for swaping two arbitrary operands of an instruction.
4354static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4355 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4356
4357 unsigned MaxOp = std::max(Op1, Op2);
4358 unsigned MinOp = std::min(Op1, Op2);
4359 MachineOperand MOp1 = MI.getOperand(MinOp);
4360 MachineOperand MOp2 = MI.getOperand(MaxOp);
4361 MI.removeOperand(std::max(Op1, Op2));
4362 MI.removeOperand(std::min(Op1, Op2));
4363
4364 // If the operands we are swapping are the two at the end (the common case)
4365 // we can just remove both and add them in the opposite order.
4366 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4367 MI.addOperand(MOp2);
4368 MI.addOperand(MOp1);
4369 } else {
4370 // Store all operands in a temporary vector, remove them and re-add in the
4371 // right order.
4373 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4374 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4375 MOps.push_back(MI.getOperand(i));
4376 MI.removeOperand(i);
4377 }
4378 // MOp2 needs to be added next.
4379 MI.addOperand(MOp2);
4380 // Now add the rest.
4381 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4382 if (i == MaxOp)
4383 MI.addOperand(MOp1);
4384 else {
4385 MI.addOperand(MOps.back());
4386 MOps.pop_back();
4387 }
4388 }
4389 }
4390}
4391
4392// Check if the 'MI' that has the index OpNoForForwarding
4393// meets the requirement described in the ImmInstrInfo.
4394bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4395 const ImmInstrInfo &III,
4396 unsigned OpNoForForwarding
4397 ) const {
4398 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4399 // would not work pre-RA, we can only do the check post RA.
4400 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4401 if (MRI.isSSA())
4402 return false;
4403
4404 // Cannot do the transform if MI isn't summing the operands.
4405 if (!III.IsSummingOperands)
4406 return false;
4407
4408 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4409 if (!III.ZeroIsSpecialOrig)
4410 return false;
4411
4412 // We cannot do the transform if the operand we are trying to replace
4413 // isn't the same as the operand the instruction allows.
4414 if (OpNoForForwarding != III.OpNoForForwarding)
4415 return false;
4416
4417 // Check if the instruction we are trying to transform really has
4418 // the special zero register as its operand.
4419 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4420 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4421 return false;
4422
4423 // This machine instruction is convertible if it is,
4424 // 1. summing the operands.
4425 // 2. one of the operands is special zero register.
4426 // 3. the operand we are trying to replace is allowed by the MI.
4427 return true;
4428}
4429
4430// Check if the DefMI is the add inst and set the ImmMO and RegMO
4431// accordingly.
4432bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4433 const ImmInstrInfo &III,
4434 MachineOperand *&ImmMO,
4435 MachineOperand *&RegMO) const {
4436 unsigned Opc = DefMI.getOpcode();
4437 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4438 return false;
4439
4440 assert(DefMI.getNumOperands() >= 3 &&
4441 "Add inst must have at least three operands");
4442 RegMO = &DefMI.getOperand(1);
4443 ImmMO = &DefMI.getOperand(2);
4444
4445 // Before RA, ADDI first operand could be a frame index.
4446 if (!RegMO->isReg())
4447 return false;
4448
4449 // This DefMI is elgible for forwarding if it is:
4450 // 1. add inst
4451 // 2. one of the operands is Imm/CPI/Global.
4452 return isAnImmediateOperand(*ImmMO);
4453}
4454
4455bool PPCInstrInfo::isRegElgibleForForwarding(
4456 const MachineOperand &RegMO, const MachineInstr &DefMI,
4457 const MachineInstr &MI, bool KillDefMI,
4458 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4459 // x = addi y, imm
4460 // ...
4461 // z = lfdx 0, x -> z = lfd imm(y)
4462 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4463 // of "y" between the DEF of "x" and "z".
4464 // The query is only valid post RA.
4465 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4466 if (MRI.isSSA())
4467 return false;
4468
4469 Register Reg = RegMO.getReg();
4470
4471 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4473 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4474 It++;
4475 for (; It != E; ++It) {
4476 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4477 return false;
4478 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4479 IsFwdFeederRegKilled = true;
4480 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4481 SeenIntermediateUse = true;
4482 // Made it to DefMI without encountering a clobber.
4483 if ((&*It) == &DefMI)
4484 break;
4485 }
4486 assert((&*It) == &DefMI && "DefMI is missing");
4487
4488 // If DefMI also defines the register to be forwarded, we can only forward it
4489 // if DefMI is being erased.
4490 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4491 return KillDefMI;
4492
4493 return true;
4494}
4495
4496bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4497 const MachineInstr &DefMI,
4498 const ImmInstrInfo &III,
4499 int64_t &Imm,
4500 int64_t BaseImm) const {
4501 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4502 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4503 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4504 // However, we know that, it is 16-bit width, and has the alignment of 4.
4505 // Check if the instruction met the requirement.
4506 if (III.ImmMustBeMultipleOf > 4 ||
4507 III.TruncateImmTo || III.ImmWidth != 16)
4508 return false;
4509
4510 // Going from XForm to DForm loads means that the displacement needs to be
4511 // not just an immediate but also a multiple of 4, or 16 depending on the
4512 // load. A DForm load cannot be represented if it is a multiple of say 2.
4513 // XForm loads do not have this restriction.
4514 if (ImmMO.isGlobal()) {
4515 const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
4517 return false;
4518 }
4519
4520 return true;
4521 }
4522
4523 if (ImmMO.isImm()) {
4524 // It is Imm, we need to check if the Imm fit the range.
4525 // Sign-extend to 64-bits.
4526 // DefMI may be folded with another imm form instruction, the result Imm is
4527 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4528 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4529 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4530 return false;
4531 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4532 return false;
4533 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4534
4535 if (Imm % III.ImmMustBeMultipleOf)
4536 return false;
4537 if (III.TruncateImmTo)
4538 Imm &= ((1 << III.TruncateImmTo) - 1);
4539 }
4540 else
4541 return false;
4542
4543 // This ImmMO is forwarded if it meets the requriement describle
4544 // in ImmInstrInfo
4545 return true;
4546}
4547
4548bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4549 unsigned OpNoForForwarding,
4550 MachineInstr **KilledDef) const {
4551 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4552 !DefMI.getOperand(1).isImm())
4553 return false;
4554
4555 MachineFunction *MF = MI.getParent()->getParent();
4557 bool PostRA = !MRI->isSSA();
4558
4559 int64_t Immediate = DefMI.getOperand(1).getImm();
4560 // Sign-extend to 64-bits.
4561 int64_t SExtImm = SignExtend64<16>(Immediate);
4562
4563 bool ReplaceWithLI = false;
4564 bool Is64BitLI = false;
4565 int64_t NewImm = 0;
4566 bool SetCR = false;
4567 unsigned Opc = MI.getOpcode();
4568 switch (Opc) {
4569 default:
4570 return false;
4571
4572 // FIXME: Any branches conditional on such a comparison can be made
4573 // unconditional. At this time, this happens too infrequently to be worth
4574 // the implementation effort, but if that ever changes, we could convert
4575 // such a pattern here.
4576 case PPC::CMPWI:
4577 case PPC::CMPLWI:
4578 case PPC::CMPDI:
4579 case PPC::CMPLDI: {
4580 // Doing this post-RA would require dataflow analysis to reliably find uses
4581 // of the CR register set by the compare.
4582 // No need to fixup killed/dead flag since this transformation is only valid
4583 // before RA.
4584 if (PostRA)
4585 return false;
4586 // If a compare-immediate is fed by an immediate and is itself an input of
4587 // an ISEL (the most common case) into a COPY of the correct register.
4588 bool Changed = false;
4589 Register DefReg = MI.getOperand(0).getReg();
4590 int64_t Comparand = MI.getOperand(2).getImm();
4591 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4592 ? (Comparand | 0xFFFFFFFFFFFF0000)
4593 : Comparand;
4594
4595 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4596 unsigned UseOpc = CompareUseMI.getOpcode();
4597 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4598 continue;
4599 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4600 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4601 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4602 unsigned RegToCopy =
4603 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4604 if (RegToCopy == PPC::NoRegister)
4605 continue;
4606 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4607 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4608 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4609 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4610 CompareUseMI.removeOperand(3);
4611 CompareUseMI.removeOperand(2);
4612 continue;
4613 }
4614 LLVM_DEBUG(
4615 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4616 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4617 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4618 // Convert to copy and remove unneeded operands.
4619 CompareUseMI.setDesc(get(PPC::COPY));
4620 CompareUseMI.removeOperand(3);
4621 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4622 CmpIselsConverted++;
4623 Changed = true;
4624 LLVM_DEBUG(CompareUseMI.dump());
4625 }
4626 if (Changed)
4627 return true;
4628 // This may end up incremented multiple times since this function is called
4629 // during a fixed-point transformation, but it is only meant to indicate the
4630 // presence of this opportunity.
4631 MissedConvertibleImmediateInstrs++;
4632 return false;
4633 }
4634
4635 // Immediate forms - may simply be convertable to an LI.
4636 case PPC::ADDI:
4637 case PPC::ADDI8: {
4638 // Does the sum fit in a 16-bit signed field?
4639 int64_t Addend = MI.getOperand(2).getImm();
4640 if (isInt<16>(Addend + SExtImm)) {
4641 ReplaceWithLI = true;
4642 Is64BitLI = Opc == PPC::ADDI8;
4643 NewImm = Addend + SExtImm;
4644 break;
4645 }
4646 return false;
4647 }
4648 case PPC::SUBFIC:
4649 case PPC::SUBFIC8: {
4650 // Only transform this if the CARRY implicit operand is dead.
4651 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4652 return false;
4653 int64_t Minuend = MI.getOperand(2).getImm();
4654 if (isInt<16>(Minuend - SExtImm)) {
4655 ReplaceWithLI = true;
4656 Is64BitLI = Opc == PPC::SUBFIC8;
4657 NewImm = Minuend - SExtImm;
4658 break;
4659 }
4660 return false;
4661 }
4662 case PPC::RLDICL:
4663 case PPC::RLDICL_rec:
4664 case PPC::RLDICL_32:
4665 case PPC::RLDICL_32_64: {
4666 // Use APInt's rotate function.
4667 int64_t SH = MI.getOperand(2).getImm();
4668 int64_t MB = MI.getOperand(3).getImm();
4669 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4670 SExtImm, true);
4671 InVal = InVal.rotl(SH);
4672 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4673 InVal &= Mask;
4674 // Can't replace negative values with an LI as that will sign-extend
4675 // and not clear the left bits. If we're setting the CR bit, we will use
4676 // ANDI_rec which won't sign extend, so that's safe.
4677 if (isUInt<15>(InVal.getSExtValue()) ||
4678 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4679 ReplaceWithLI = true;
4680 Is64BitLI = Opc != PPC::RLDICL_32;
4681 NewImm = InVal.getSExtValue();
4682 SetCR = Opc == PPC::RLDICL_rec;
4683 break;
4684 }
4685 return false;
4686 }
4687 case PPC::RLWINM:
4688 case PPC::RLWINM8:
4689 case PPC::RLWINM_rec:
4690 case PPC::RLWINM8_rec: {
4691 int64_t SH = MI.getOperand(2).getImm();
4692 int64_t MB = MI.getOperand(3).getImm();
4693 int64_t ME = MI.getOperand(4).getImm();
4694 APInt InVal(32, SExtImm, true);
4695 InVal = InVal.rotl(SH);
4696 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4697 InVal &= Mask;
4698 // Can't replace negative values with an LI as that will sign-extend
4699 // and not clear the left bits. If we're setting the CR bit, we will use
4700 // ANDI_rec which won't sign extend, so that's safe.
4701 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4702 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4703 isUInt<16>(InVal.getSExtValue()));
4704 if (ValueFits) {
4705 ReplaceWithLI = true;
4706 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4707 NewImm = InVal.getSExtValue();
4708 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4709 break;
4710 }
4711 return false;
4712 }
4713 case PPC::ORI:
4714 case PPC::ORI8:
4715 case PPC::XORI:
4716 case PPC::XORI8: {
4717 int64_t LogicalImm = MI.getOperand(2).getImm();
4718 int64_t Result = 0;
4719 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4720 Result = LogicalImm | SExtImm;
4721 else
4722 Result = LogicalImm ^ SExtImm;
4723 if (isInt<16>(Result)) {
4724 ReplaceWithLI = true;
4725 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4726 NewImm = Result;
4727 break;
4728 }
4729 return false;
4730 }
4731 }
4732
4733 if (ReplaceWithLI) {
4734 // We need to be careful with CR-setting instructions we're replacing.
4735 if (SetCR) {
4736 // We don't know anything about uses when we're out of SSA, so only
4737 // replace if the new immediate will be reproduced.
4738 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4739 if (PostRA && ImmChanged)
4740 return false;
4741
4742 if (!PostRA) {
4743 // If the defining load-immediate has no other uses, we can just replace
4744 // the immediate with the new immediate.
4745 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4746 DefMI.getOperand(1).setImm(NewImm);
4747
4748 // If we're not using the GPR result of the CR-setting instruction, we
4749 // just need to and with zero/non-zero depending on the new immediate.
4750 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4751 if (NewImm) {
4752 assert(Immediate && "Transformation converted zero to non-zero?");
4753 NewImm = Immediate;
4754 }
4755 } else if (ImmChanged)
4756 return false;
4757 }
4758 }
4759
4760 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4761 LLVM_DEBUG(MI.dump());
4762 LLVM_DEBUG(dbgs() << "Fed by:\n");
4763 LLVM_DEBUG(DefMI.dump());
4765 LII.Imm = NewImm;
4766 LII.Is64Bit = Is64BitLI;
4767 LII.SetCR = SetCR;
4768 // If we're setting the CR, the original load-immediate must be kept (as an
4769 // operand to ANDI_rec/ANDI8_rec).
4770 if (KilledDef && SetCR)
4771 *KilledDef = nullptr;
4772 replaceInstrWithLI(MI, LII);
4773
4774 if (PostRA)
4775 recomputeLivenessFlags(*MI.getParent());
4776
4777 LLVM_DEBUG(dbgs() << "With:\n");
4778 LLVM_DEBUG(MI.dump());
4779 return true;
4780 }
4781 return false;
4782}
4783
4784bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4785 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4786 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4787 bool PostRA = !MRI->isSSA();
4788 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4789 // for post-ra.
4790 if (PostRA)
4791 return false;
4792
4793 // Only handle load/store.
4794 if (!MI.mayLoadOrStore())
4795 return false;
4796
4797 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4798
4799 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4800 "MI must have x-form opcode");
4801
4802 // get Imm Form info.
4803 ImmInstrInfo III;
4804 bool IsVFReg = MI.getOperand(0).isReg()
4805 ? PPC::isVFRegister(MI.getOperand(0).getReg())
4806 : false;
4807
4808 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4809 return false;
4810
4811 if (!III.IsSummingOperands)
4812 return false;
4813
4814 if (OpNoForForwarding != III.OpNoForForwarding)
4815 return false;
4816
4817 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4818 if (!ImmOperandMI.isImm())
4819 return false;
4820
4821 // Check DefMI.
4822 MachineOperand *ImmMO = nullptr;
4823 MachineOperand *RegMO = nullptr;
4824 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4825 return false;
4826 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4827
4828 // Check Imm.
4829 // Set ImmBase from imm instruction as base and get new Imm inside
4830 // isImmElgibleForForwarding.
4831 int64_t ImmBase = ImmOperandMI.getImm();
4832 int64_t Imm = 0;
4833 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4834 return false;
4835
4836 // Do the transform
4837 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4838 LLVM_DEBUG(MI.dump());
4839 LLVM_DEBUG(dbgs() << "Fed by:\n");
4840 LLVM_DEBUG(DefMI.dump());
4841
4842 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4843 MI.getOperand(III.ImmOpNo).setImm(Imm);
4844
4845 LLVM_DEBUG(dbgs() << "With:\n");
4846 LLVM_DEBUG(MI.dump());
4847 return true;
4848}
4849
4850// If an X-Form instruction is fed by an add-immediate and one of its operands
4851// is the literal zero, attempt to forward the source of the add-immediate to
4852// the corresponding D-Form instruction with the displacement coming from
4853// the immediate being added.
4854bool PPCInstrInfo::transformToImmFormFedByAdd(
4855 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4856 MachineInstr &DefMI, bool KillDefMI) const {
4857 // RegMO ImmMO
4858 // | |
4859 // x = addi reg, imm <----- DefMI
4860 // y = op 0 , x <----- MI
4861 // |
4862 // OpNoForForwarding
4863 // Check if the MI meet the requirement described in the III.
4864 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4865 return false;
4866
4867 // Check if the DefMI meet the requirement
4868 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4869 MachineOperand *ImmMO = nullptr;
4870 MachineOperand *RegMO = nullptr;
4871 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4872 return false;
4873 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4874
4875 // As we get the Imm operand now, we need to check if the ImmMO meet
4876 // the requirement described in the III. If yes set the Imm.
4877 int64_t Imm = 0;
4878 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4879 return false;
4880
4881 bool IsFwdFeederRegKilled = false;
4882 bool SeenIntermediateUse = false;
4883 // Check if the RegMO can be forwarded to MI.
4884 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4885 IsFwdFeederRegKilled, SeenIntermediateUse))
4886 return false;
4887
4888 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4889 bool PostRA = !MRI.isSSA();
4890
4891 // We know that, the MI and DefMI both meet the pattern, and
4892 // the Imm also meet the requirement with the new Imm-form.
4893 // It is safe to do the transformation now.
4894 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4895 LLVM_DEBUG(MI.dump());
4896 LLVM_DEBUG(dbgs() << "Fed by:\n");
4897 LLVM_DEBUG(DefMI.dump());
4898
4899 // Update the base reg first.
4900 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
4901 false, false,
4902 RegMO->isKill());
4903
4904 // Then, update the imm.
4905 if (ImmMO->isImm()) {
4906 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
4907 // directly.
4909 }
4910 else {
4911 // Otherwise, it is Constant Pool Index(CPI) or Global,
4912 // which is relocation in fact. We need to replace the special zero
4913 // register with ImmMO.
4914 // Before that, we need to fixup the target flags for imm.
4915 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
4916 if (DefMI.getOpcode() == PPC::ADDItocL8)
4918
4919 // MI didn't have the interface such as MI.setOperand(i) though
4920 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4921 // ImmMO, we need to remove ZERO operand and all the operands behind it,
4922 // and, add the ImmMO, then, move back all the operands behind ZERO.
4924 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
4925 MOps.push_back(MI.getOperand(i));
4926 MI.removeOperand(i);
4927 }
4928
4929 // Remove the last MO in the list, which is ZERO operand in fact.
4930 MOps.pop_back();
4931 // Add the imm operand.
4932 MI.addOperand(*ImmMO);
4933 // Now add the rest back.
4934 for (auto &MO : MOps)
4935 MI.addOperand(MO);
4936 }
4937
4938 // Update the opcode.
4939 MI.setDesc(get(III.ImmOpcode));
4940
4941 if (PostRA)
4942 recomputeLivenessFlags(*MI.getParent());
4943 LLVM_DEBUG(dbgs() << "With:\n");
4944 LLVM_DEBUG(MI.dump());
4945
4946 return true;
4947}
4948
4949bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4950 const ImmInstrInfo &III,
4951 unsigned ConstantOpNo,
4952 MachineInstr &DefMI) const {
4953 // DefMI must be LI or LI8.
4954 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4955 !DefMI.getOperand(1).isImm())
4956 return false;
4957
4958 // Get Imm operand and Sign-extend to 64-bits.
4959 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
4960
4961 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4962 bool PostRA = !MRI.isSSA();
4963 // Exit early if we can't convert this.
4964 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
4965 return false;
4966 if (Imm % III.ImmMustBeMultipleOf)
4967 return false;
4968 if (III.TruncateImmTo)
4969 Imm &= ((1 << III.TruncateImmTo) - 1);
4970 if (III.SignedImm) {
4971 APInt ActualValue(64, Imm, true);
4972 if (!ActualValue.isSignedIntN(III.ImmWidth))
4973 return false;
4974 } else {
4975 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
4976 if ((uint64_t)Imm > UnsignedMax)
4977 return false;
4978 }
4979
4980 // If we're post-RA, the instructions don't agree on whether register zero is
4981 // special, we can transform this as long as the register operand that will
4982 // end up in the location where zero is special isn't R0.
4983 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
4984 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
4985 III.ZeroIsSpecialNew + 1;
4986 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
4987 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
4988 // If R0 is in the operand where zero is special for the new instruction,
4989 // it is unsafe to transform if the constant operand isn't that operand.
4990 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
4991 ConstantOpNo != III.ZeroIsSpecialNew)
4992 return false;
4993 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
4994 ConstantOpNo != PosForOrigZero)
4995 return false;
4996 }
4997
4998 unsigned Opc = MI.getOpcode();
4999 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5000 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5001 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5002 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5003 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5004 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5005 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5006 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5007 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5008 Opc == PPC::SRD_rec;
5009
5010 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5011 LLVM_DEBUG(MI.dump());
5012 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5013 LLVM_DEBUG(DefMI.dump());
5014 MI.setDesc(get(III.ImmOpcode));
5015 if (ConstantOpNo == III.OpNoForForwarding) {
5016 // Converting shifts to immediate form is a bit tricky since they may do
5017 // one of three things:
5018 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5019 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5020 // setting CR0)
5021 // 3. If the shift amount is in [1, OpSize), it's just a shift
5022 if (SpecialShift32 || SpecialShift64) {
5024 LII.Imm = 0;
5025 LII.SetCR = SetCR;
5026 LII.Is64Bit = SpecialShift64;
5027 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5028 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5029 replaceInstrWithLI(MI, LII);
5030 // Shifts by zero don't change the value. If we don't need to set CR0,
5031 // just convert this to a COPY. Can't do this post-RA since we've already
5032 // cleaned up the copies.
5033 else if (!SetCR && ShAmt == 0 && !PostRA) {
5034 MI.removeOperand(2);
5035 MI.setDesc(get(PPC::COPY));
5036 } else {
5037 // The 32 bit and 64 bit instructions are quite different.
5038 if (SpecialShift32) {
5039 // Left shifts use (N, 0, 31-N).
5040 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5041 // use (0, 0, 31) if N == 0.
5042 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5043 uint64_t MB = RightShift ? ShAmt : 0;
5044 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5046 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5047 .addImm(ME);
5048 } else {
5049 // Left shifts use (N, 63-N).
5050 // Right shifts use (64-N, N) if 0 < N < 64.
5051 // use (0, 0) if N == 0.
5052 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5053 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5055 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5056 }
5057 }
5058 } else
5059 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5060 }
5061 // Convert commutative instructions (switch the operands and convert the
5062 // desired one to an immediate.
5063 else if (III.IsCommutative) {
5064 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5065 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5066 } else
5067 llvm_unreachable("Should have exited early!");
5068
5069 // For instructions for which the constant register replaces a different
5070 // operand than where the immediate goes, we need to swap them.
5071 if (III.OpNoForForwarding != III.ImmOpNo)
5073
5074 // If the special R0/X0 register index are different for original instruction
5075 // and new instruction, we need to fix up the register class in new
5076 // instruction.
5077 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5078 if (III.ZeroIsSpecialNew) {
5079 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5080 // need to fix up register class.
5081 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5082 if (RegToModify.isVirtual()) {
5083 const TargetRegisterClass *NewRC =
5084 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5085 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5086 MRI.setRegClass(RegToModify, NewRC);
5087 }
5088 }
5089 }
5090
5091 if (PostRA)
5092 recomputeLivenessFlags(*MI.getParent());
5093
5094 LLVM_DEBUG(dbgs() << "With: ");
5095 LLVM_DEBUG(MI.dump());
5096 LLVM_DEBUG(dbgs() << "\n");
5097 return true;
5098}
5099
5100const TargetRegisterClass *
5102 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5103 return &PPC::VSRCRegClass;
5104 return RC;
5105}
5106
5108 return PPC::getRecordFormOpcode(Opcode);
5109}
5110
5111static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5112 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5113 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5114 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5115 Opcode == PPC::LHZUX8);
5116}
5117
5118// This function checks for sign extension from 32 bits to 64 bits.
5119static bool definedBySignExtendingOp(const unsigned Reg,
5120 const MachineRegisterInfo *MRI) {
5122 return false;
5123
5124 MachineInstr *MI = MRI->getVRegDef(Reg);
5125 if (!MI)
5126 return false;
5127
5128 int Opcode = MI->getOpcode();
5129 const PPCInstrInfo *TII =
5130 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5131 if (TII->isSExt32To64(Opcode))
5132 return true;
5133
5134 // The first def of LBZU/LHZU is sign extended.
5135 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5136 return true;
5137
5138 // RLDICL generates sign-extended output if it clears at least
5139 // 33 bits from the left (MSB).
5140 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5141 return true;
5142
5143 // If at least one bit from left in a lower word is masked out,
5144 // all of 0 to 32-th bits of the output are cleared.
5145 // Hence the output is already sign extended.
5146 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5147 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5148 MI->getOperand(3).getImm() > 0 &&
5149 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5150 return true;
5151
5152 // If the most significant bit of immediate in ANDIS is zero,
5153 // all of 0 to 32-th bits are cleared.
5154 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5155 uint16_t Imm = MI->getOperand(2).getImm();
5156 if ((Imm & 0x8000) == 0)
5157 return true;
5158 }
5159
5160 return false;
5161}
5162
5163// This function checks the machine instruction that defines the input register
5164// Reg. If that machine instruction always outputs a value that has only zeros
5165// in the higher 32 bits then this function will return true.
5166static bool definedByZeroExtendingOp(const unsigned Reg,
5167 const MachineRegisterInfo *MRI) {
5169 return false;
5170
5171 MachineInstr *MI = MRI->getVRegDef(Reg);
5172 if (!MI)
5173 return false;
5174
5175 int Opcode = MI->getOpcode();
5176 const PPCInstrInfo *TII =
5177 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5178 if (TII->isZExt32To64(Opcode))
5179 return true;
5180
5181 // The first def of LBZU/LHZU/LWZU are zero extended.
5182 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5183 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5184 MI->getOperand(0).getReg() == Reg)
5185 return true;
5186
5187 // The 16-bit immediate is sign-extended in li/lis.
5188 // If the most significant bit is zero, all higher bits are zero.
5189 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5190 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5191 int64_t Imm = MI->getOperand(1).getImm();
5192 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5193 return true;
5194 }
5195
5196 // We have some variations of rotate-and-mask instructions
5197 // that clear higher 32-bits.
5198 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5199 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5200 Opcode == PPC::RLDICL_32_64) &&
5201 MI->getOperand(3).getImm() >= 32)
5202 return true;
5203
5204 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5205 MI->getOperand(3).getImm() >= 32 &&
5206 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5207 return true;
5208
5209 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5210 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5211 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5212 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5213 return true;
5214
5215 return false;
5216}
5217
5218// This function returns true if the input MachineInstr is a TOC save
5219// instruction.
5221 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5222 return false;
5223 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5224 unsigned StackOffset = MI.getOperand(1).getImm();
5225 Register StackReg = MI.getOperand(2).getReg();
5226 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5227 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5228 return true;
5229
5230 return false;
5231}
5232
5233// We limit the max depth to track incoming values of PHIs or binary ops
5234// (e.g. AND) to avoid excessive cost.
5235const unsigned MAX_BINOP_DEPTH = 1;
5236// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5237// does not count all of the recursions. The parameter BinOpDepth is incremented
5238// only when isSignOrZeroExtended calls itself more than once. This is done to
5239// prevent expontential recursion. There is no parameter to track linear
5240// recursion.
5241std::pair<bool, bool>
5243 const unsigned BinOpDepth,
5244 const MachineRegisterInfo *MRI) const {
5246 return std::pair<bool, bool>(false, false);
5247
5248 MachineInstr *MI = MRI->getVRegDef(Reg);
5249 if (!MI)
5250 return std::pair<bool, bool>(false, false);
5251
5252 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5253 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5254
5255 // If we know the instruction always returns sign- and zero-extended result,
5256 // return here.
5257 if (IsSExt && IsZExt)
5258 return std::pair<bool, bool>(IsSExt, IsZExt);
5259
5260 switch (MI->getOpcode()) {
5261 case PPC::COPY: {
5262 Register SrcReg = MI->getOperand(1).getReg();
5263
5264 // In both ELFv1 and v2 ABI, method parameters and the return value
5265 // are sign- or zero-extended.
5266 const MachineFunction *MF = MI->getMF();
5267
5268 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5269 // If this is a copy from another register, we recursively check source.
5270 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5271 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5272 SrcExt.second || IsZExt);
5273 }
5274
5275 // From here on everything is SVR4ABI
5276 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5277 // We check the ZExt/SExt flags for a method parameter.
5278 if (MI->getParent()->getBasicBlock() ==
5279 &MF->getFunction().getEntryBlock()) {
5280 Register VReg = MI->getOperand(0).getReg();
5281 if (MF->getRegInfo().isLiveIn(VReg)) {
5282 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5283 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5284 return std::pair<bool, bool>(IsSExt, IsZExt);
5285 }
5286 }
5287
5288 if (SrcReg != PPC::X3) {
5289 // If this is a copy from another register, we recursively check source.
5290 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5291 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5292 SrcExt.second || IsZExt);
5293 }
5294
5295 // For a method return value, we check the ZExt/SExt flags in attribute.
5296 // We assume the following code sequence for method call.
5297 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5298 // BL8_NOP @func,...
5299 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5300 // %5 = COPY %x3; G8RC:%5
5301 const MachineBasicBlock *MBB = MI->getParent();
5302 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5305 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5306 return IsExtendPair;
5307
5308 const MachineInstr &CallMI = *(--II);
5309 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5310 return IsExtendPair;
5311
5312 const Function *CalleeFn =
5313 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
5314 if (!CalleeFn)
5315 return IsExtendPair;
5316 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5317 if (IntTy && IntTy->getBitWidth() <= 32) {
5318 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5319 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5320 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5321 return std::pair<bool, bool>(IsSExt, IsZExt);
5322 }
5323
5324 return IsExtendPair;
5325 }
5326
5327 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5328 // So, we track the operand register as we do for register copy.
5329 case PPC::ORI:
5330 case PPC::XORI:
5331 case PPC::ORI8:
5332 case PPC::XORI8: {
5333 Register SrcReg = MI->getOperand(1).getReg();
5334 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5335 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5336 SrcExt.second || IsZExt);
5337 }
5338
5339 // OR, XOR with shifted 16-bit immediate does not change the upper
5340 // 32 bits. So, we track the operand register for zero extension.
5341 // For sign extension when the MSB of the immediate is zero, we also
5342 // track the operand register since the upper 33 bits are unchanged.
5343 case PPC::ORIS:
5344 case PPC::XORIS:
5345 case PPC::ORIS8:
5346 case PPC::XORIS8: {
5347 Register SrcReg = MI->getOperand(1).getReg();
5348 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5349 uint16_t Imm = MI->getOperand(2).getImm();
5350 if (Imm & 0x8000)
5351 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5352 else
5353 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5354 SrcExt.second || IsZExt);
5355 }
5356
5357 // If all incoming values are sign-/zero-extended,
5358 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5359 case PPC::OR:
5360 case PPC::OR8:
5361 case PPC::ISEL:
5362 case PPC::PHI: {
5363 if (BinOpDepth >= MAX_BINOP_DEPTH)
5364 return std::pair<bool, bool>(false, false);
5365
5366 // The input registers for PHI are operand 1, 3, ...
5367 // The input registers for others are operand 1 and 2.
5368 unsigned OperandEnd = 3, OperandStride = 1;
5369 if (MI->getOpcode() == PPC::PHI) {
5370 OperandEnd = MI->getNumOperands();
5371 OperandStride = 2;
5372 }
5373
5374 IsSExt = true;
5375 IsZExt = true;
5376 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5377 if (!MI->getOperand(I).isReg())
5378 return std::pair<bool, bool>(false, false);
5379
5380 Register SrcReg = MI->getOperand(I).getReg();
5381 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5382 IsSExt &= SrcExt.first;
5383 IsZExt &= SrcExt.second;
5384 }
5385 return std::pair<bool, bool>(IsSExt, IsZExt);
5386 }
5387
5388 // If at least one of the incoming values of an AND is zero extended
5389 // then the output is also zero-extended. If both of the incoming values
5390 // are sign-extended then the output is also sign extended.
5391 case PPC::AND:
5392 case PPC::AND8: {
5393 if (BinOpDepth >= MAX_BINOP_DEPTH)
5394 return std::pair<bool, bool>(false, false);
5395
5396 Register SrcReg1 = MI->getOperand(1).getReg();
5397 Register SrcReg2 = MI->getOperand(2).getReg();
5398 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5399 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5400 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5401 Src1Ext.second || Src2Ext.second);
5402 }
5403
5404 default:
5405 break;
5406 }
5407 return std::pair<bool, bool>(IsSExt, IsZExt);
5408}
5409
5410bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5411 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5412}
5413
5414namespace {
5415class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5416 MachineInstr *Loop, *EndLoop, *LoopCount;
5417 MachineFunction *MF;
5418 const TargetInstrInfo *TII;
5419 int64_t TripCount;
5420
5421public:
5422 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5423 MachineInstr *LoopCount)
5424 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5425 MF(Loop->getParent()->getParent()),
5426 TII(MF->getSubtarget().getInstrInfo()) {
5427 // Inspect the Loop instruction up-front, as it may be deleted when we call
5428 // createTripCountGreaterCondition.
5429 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5430 TripCount = LoopCount->getOperand(1).getImm();
5431 else
5432 TripCount = -1;
5433 }
5434
5435 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5436 // Only ignore the terminator.
5437 return MI == EndLoop;
5438 }
5439
5440 std::optional<bool> createTripCountGreaterCondition(
5441 int TC, MachineBasicBlock &MBB,
5443 if (TripCount == -1) {
5444 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5445 // so we don't need to generate any thing here.
5446 Cond.push_back(MachineOperand::CreateImm(0));
5448 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5449 true));
5450 return {};
5451 }
5452
5453 return TripCount > TC;
5454 }
5455
5456 void setPreheader(MachineBasicBlock *NewPreheader) override {
5457 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5458 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5459 }
5460
5461 void adjustTripCount(int TripCountAdjust) override {
5462 // If the loop trip count is a compile-time value, then just change the
5463 // value.
5464 if (LoopCount->getOpcode() == PPC::LI8 ||
5465 LoopCount->getOpcode() == PPC::LI) {
5466 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5467 LoopCount->getOperand(1).setImm(TripCount);
5468 return;
5469 }
5470
5471 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5472 // so we don't need to generate any thing here.
5473 }
5474
5475 void disposed() override {
5476 Loop->eraseFromParent();
5477 // Ensure the loop setup instruction is deleted too.
5478 LoopCount->eraseFromParent();
5479 }
5480};
5481} // namespace
5482
5483std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5485 // We really "analyze" only hardware loops right now.
5487 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5488 if (Preheader == LoopBB)
5489 Preheader = *std::next(LoopBB->pred_begin());
5490 MachineFunction *MF = Preheader->getParent();
5491
5492 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5494 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5495 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5497 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5498 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5499 }
5500 }
5501 return nullptr;
5502}
5503
5505 MachineBasicBlock &PreHeader,
5506 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5507
5508 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5509
5510 // The loop set-up instruction should be in preheader
5511 for (auto &I : PreHeader.instrs())
5512 if (I.getOpcode() == LOOPi)
5513 return &I;
5514 return nullptr;
5515}
5516
5517// Return true if get the base operand, byte offset of an instruction and the
5518// memory width. Width is the size of memory that is being loaded/stored.
5520 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5521 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5522 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5523 return false;
5524
5525 // Handle only loads/stores with base register followed by immediate offset.
5526 if (!LdSt.getOperand(1).isImm() ||
5527 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5528 return false;
5529 if (!LdSt.getOperand(1).isImm() ||
5530 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5531 return false;
5532
5533 if (!LdSt.hasOneMemOperand())
5534 return false;
5535
5536 Width = (*LdSt.memoperands_begin())->getSize();
5537 Offset = LdSt.getOperand(1).getImm();
5538 BaseReg = &LdSt.getOperand(2);
5539 return true;
5540}
5541
5543 const MachineInstr &MIa, const MachineInstr &MIb) const {
5544 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5545 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5546
5549 return false;
5550
5551 // Retrieve the base register, offset from the base register and width. Width
5552 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5553 // base registers are identical, and the offset of a lower memory access +
5554 // the width doesn't overlap the offset of a higher memory access,
5555 // then the memory accesses are different.
5557 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5558 int64_t OffsetA = 0, OffsetB = 0;
5559 LocationSize WidthA = 0, WidthB = 0;
5560 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5561 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5562 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5563 int LowOffset = std::min(OffsetA, OffsetB);
5564 int HighOffset = std::max(OffsetA, OffsetB);
5565 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5566 if (LowWidth.hasValue() &&
5567 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5568 return true;
5569 }
5570 }
5571 return false;
5572}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static unsigned getSize(unsigned Kind)
void changeSign()
Definition: APFloat.h:1158
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition: APInt.cpp:1111
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
A possibly irreducible generalization of a Loop.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
bool hasValue() const
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
void setOpcode(unsigned Op)
Definition: MCInst.h:197
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
Definition: MCInstrDesc.h:565
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:269
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
uint16_t Constraints
Operand constraints (see OperandConstraint enum).
Definition: MCInstrDesc.h:100
bool isLookupPtrRegClass() const
Set if this operand is a pointer value and it requires a callback to look up its register class.
Definition: MCInstrDesc.h:104
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Definition: MCInstrDesc.h:91
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:918
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:379
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:710
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
Definition: MachineInstr.h:620
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:792
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:662
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:777
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:475
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:374
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
PPCInstrInfo(PPCSubtarget &STI)
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
Definition: PPCInstrInfo.h:276
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: PPCInstrInfo.h:274
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:623
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
Definition: PPCInstrInfo.h:506
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
Definition: PPCInstrInfo.h:617
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
unsigned getMappedIdxOpcForImmOpc(unsigned ImmOpcode) const
getMappedIdxOpcForImmOpc - Return the mapped index form load/store opcode for a given imm form load/s...
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
const Triple & getTargetTriple() const
Definition: PPCSubtarget.h:208
void setGlibcHWCAPAccess(bool Val=true) const
void dump() const
Definition: Pass.cpp:136
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
Track the current register pressure at some position in the instruction stream, and remember the high...
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
CodeModel::Model getCodeModel() const
Returns the code model.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:687
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ MO_TOC_LO
Definition: PPC.h:185
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
int getAltVSXFMAOpcode(uint16_t Opcode)
int getNonRecordFormOpcode(uint16_t)
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Definition: PPCPredicates.h:77
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Definition: PPCPredicates.h:82
static bool isVFRegister(unsigned Reg)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
@ REASSOC_XY_BCA
Definition: PPCInstrInfo.h:96
@ REASSOC_XY_BAC
Definition: PPCInstrInfo.h:97
@ REASSOC_XY_AMM_BMM
Definition: PPCInstrInfo.h:91
@ REASSOC_XMM_AMM_BMM
Definition: PPCInstrInfo.h:92
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
unsigned getKillRegState(bool B)
@ SOK_CRBitSpill
Definition: PPCInstrInfo.h:73
@ SOK_VSXVectorSpill
Definition: PPCInstrInfo.h:75
@ SOK_SpillToVSR
Definition: PPCInstrInfo.h:78
@ SOK_Int4Spill
Definition: PPCInstrInfo.h:68
@ SOK_PairedVecSpill
Definition: PPCInstrInfo.h:79
@ SOK_VectorFloat8Spill
Definition: PPCInstrInfo.h:76
@ SOK_UAccumulatorSpill
Definition: PPCInstrInfo.h:81
@ SOK_PairedG8Spill
Definition: PPCInstrInfo.h:84
@ SOK_VectorFloat4Spill
Definition: PPCInstrInfo.h:77
@ SOK_Float8Spill
Definition: PPCInstrInfo.h:70
@ SOK_Float4Spill
Definition: PPCInstrInfo.h:71
@ SOK_VRVectorSpill
Definition: PPCInstrInfo.h:74
@ SOK_WAccumulatorSpill
Definition: PPCInstrInfo.h:82
@ SOK_SPESpill
Definition: PPCInstrInfo.h:83
@ SOK_CRSpill
Definition: PPCInstrInfo.h:72
@ SOK_AccumulatorSpill
Definition: PPCInstrInfo.h:80
@ SOK_Int8Spill
Definition: PPCInstrInfo.h:69
@ SOK_LastOpcodeSpill
Definition: PPCInstrInfo.h:85
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t IsSummingOperands
Definition: PPCInstrInfo.h:55
uint64_t OpNoForForwarding
Definition: PPCInstrInfo.h:45
uint64_t ImmMustBeMultipleOf
Definition: PPCInstrInfo.h:35
uint64_t IsCommutative
Definition: PPCInstrInfo.h:43
uint64_t ZeroIsSpecialNew
Definition: PPCInstrInfo.h:41
uint64_t TruncateImmTo
Definition: PPCInstrInfo.h:53
uint64_t ZeroIsSpecialOrig
Definition: PPCInstrInfo.h:38
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.