LLVM 23.0.0git
PPCInstrInfo.cpp
Go to the documentation of this file.
1//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PowerPC implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCInstrInfo.h"
15#include "PPC.h"
17#include "PPCInstrBuilder.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
37#include "llvm/IR/Module.h"
38#include "llvm/MC/MCInst.h"
41#include "llvm/Support/Debug.h"
44
45using namespace llvm;
46
47#define DEBUG_TYPE "ppc-instr-info"
48
49#define GET_INSTRMAP_INFO
50#define GET_INSTRINFO_CTOR_DTOR
51#include "PPCGenInstrInfo.inc"
52
53STATISTIC(NumStoreSPILLVSRRCAsVec,
54 "Number of spillvsrrc spilled to stack as vec");
55STATISTIC(NumStoreSPILLVSRRCAsGpr,
56 "Number of spillvsrrc spilled to stack as gpr");
57STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
58STATISTIC(CmpIselsConverted,
59 "Number of ISELs that depend on comparison of constants converted");
60STATISTIC(MissedConvertibleImmediateInstrs,
61 "Number of compare-immediate instructions fed by constants");
62STATISTIC(NumRcRotatesConvertedToRcAnd,
63 "Number of record-form rotates converted to record-form andi");
64
65static cl::
66opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
67 cl::desc("Disable analysis for CTR loops"));
68
69static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
70cl::desc("Disable compare instruction optimization"), cl::Hidden);
71
72static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
73cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
75
76static cl::opt<bool>
77UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
78 cl::desc("Use the old (incorrect) instruction latency calculation"));
79
80static cl::opt<float>
81 FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
82 cl::desc("register pressure factor for the transformations."));
83
85 "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
86 cl::desc("enable register pressure reduce in machine combiner pass."));
87
88// Pin the vtable to this file.
89void PPCInstrInfo::anchor() {}
90
92 : PPCGenInstrInfo(STI, RI, PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
93 /* CatchRetOpcode */ -1,
94 STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
95 Subtarget(STI), RI(STI.getTargetMachine()) {}
96
97/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
98/// this target when scheduling the DAG.
101 const ScheduleDAG *DAG) const {
102 unsigned Directive =
103 static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
106 const InstrItineraryData *II =
107 static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
108 return new ScoreboardHazardRecognizer(II, DAG);
109 }
110
112}
113
114/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
115/// to use for this target when scheduling the DAG.
118 const ScheduleDAG *DAG) const {
119 unsigned Directive =
120 DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
121
122 // FIXME: Leaving this as-is until we have POWER9 scheduling info
124 return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
125
126 // Most subtargets use a PPC970 recognizer.
129 assert(DAG->TII && "No InstrInfo?");
130
131 return new PPCHazardRecognizer970(*DAG);
132 }
133
134 return new ScoreboardHazardRecognizer(II, DAG);
135}
136
138 const MachineInstr &MI,
139 unsigned *PredCost) const {
140 if (!ItinData || UseOldLatencyCalc)
141 return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
142
143 // The default implementation of getInstrLatency calls getStageLatency, but
144 // getStageLatency does not do the right thing for us. While we have
145 // itinerary, most cores are fully pipelined, and so the itineraries only
146 // express the first part of the pipeline, not every stage. Instead, we need
147 // to use the listed output operand cycle number (using operand 0 here, which
148 // is an output).
149
150 unsigned Latency = 1;
151 unsigned DefClass = MI.getDesc().getSchedClass();
152 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
153 const MachineOperand &MO = MI.getOperand(i);
154 if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
155 continue;
156
157 std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);
158 if (!Cycle)
159 continue;
160
161 Latency = std::max(Latency, *Cycle);
162 }
163
164 return Latency;
165}
166
167std::optional<unsigned> PPCInstrInfo::getOperandLatency(
168 const InstrItineraryData *ItinData, const MachineInstr &DefMI,
169 unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
170 std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
171 ItinData, DefMI, DefIdx, UseMI, UseIdx);
172
173 if (!DefMI.getParent())
174 return Latency;
175
176 const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
177 Register Reg = DefMO.getReg();
178
179 bool IsRegCR;
180 if (Reg.isVirtual()) {
181 const MachineRegisterInfo *MRI =
182 &DefMI.getParent()->getParent()->getRegInfo();
183 IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
184 MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
185 } else {
186 IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
187 PPC::CRBITRCRegClass.contains(Reg);
188 }
189
190 if (UseMI.isBranch() && IsRegCR) {
191 if (!Latency)
192 Latency = getInstrLatency(ItinData, DefMI);
193
194 // On some cores, there is an additional delay between writing to a condition
195 // register, and using it from a branch.
196 unsigned Directive = Subtarget.getCPUDirective();
197 switch (Directive) {
198 default: break;
199 case PPC::DIR_7400:
200 case PPC::DIR_750:
201 case PPC::DIR_970:
202 case PPC::DIR_E5500:
203 case PPC::DIR_PWR4:
204 case PPC::DIR_PWR5:
205 case PPC::DIR_PWR5X:
206 case PPC::DIR_PWR6:
207 case PPC::DIR_PWR6X:
208 case PPC::DIR_PWR7:
209 case PPC::DIR_PWR8:
210 // FIXME: Is this needed for POWER9?
211 Latency = *Latency + 2;
212 break;
213 }
214 }
215
216 return Latency;
217}
218
220 uint32_t Flags) const {
221 MI.setFlags(Flags);
225}
226
227// This function does not list all associative and commutative operations, but
228// only those worth feeding through the machine combiner in an attempt to
229// reduce the critical path. Mostly, this means floating-point operations,
230// because they have high latencies(>=5) (compared to other operations, such as
231// and/or, which are also associative and commutative, but have low latencies).
233 bool Invert) const {
234 if (Invert)
235 return false;
236 switch (Inst.getOpcode()) {
237 // Floating point:
238 // FP Add:
239 case PPC::FADD:
240 case PPC::FADDS:
241 // FP Multiply:
242 case PPC::FMUL:
243 case PPC::FMULS:
244 // Altivec Add:
245 case PPC::VADDFP:
246 // VSX Add:
247 case PPC::XSADDDP:
248 case PPC::XVADDDP:
249 case PPC::XVADDSP:
250 case PPC::XSADDSP:
251 // VSX Multiply:
252 case PPC::XSMULDP:
253 case PPC::XVMULDP:
254 case PPC::XVMULSP:
255 case PPC::XSMULSP:
258 // Fixed point:
259 // Multiply:
260 case PPC::MULHD:
261 case PPC::MULLD:
262 case PPC::MULHW:
263 case PPC::MULLW:
264 return true;
265 default:
266 return false;
267 }
268}
269
270#define InfoArrayIdxFMAInst 0
271#define InfoArrayIdxFAddInst 1
272#define InfoArrayIdxFMULInst 2
273#define InfoArrayIdxAddOpIdx 3
274#define InfoArrayIdxMULOpIdx 4
275#define InfoArrayIdxFSubInst 5
276// Array keeps info for FMA instructions:
277// Index 0(InfoArrayIdxFMAInst): FMA instruction;
278// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
279// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
280// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
281// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
282// second MUL operand index is plus 1;
283// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
284static const uint16_t FMAOpIdxInfo[][6] = {
285 // FIXME: Add more FMA instructions like XSNMADDADP and so on.
286 {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
287 {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
288 {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
289 {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
290 {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
291 {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
292
293// Check if an opcode is a FMA instruction. If it is, return the index in array
294// FMAOpIdxInfo. Otherwise, return -1.
295int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
296 for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
297 if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
298 return I;
299 return -1;
300}
301
302// On PowerPC target, we have two kinds of patterns related to FMA:
303// 1: Improve ILP.
304// Try to reassociate FMA chains like below:
305//
306// Pattern 1:
307// A = FADD X, Y (Leaf)
308// B = FMA A, M21, M22 (Prev)
309// C = FMA B, M31, M32 (Root)
310// -->
311// A = FMA X, M21, M22
312// B = FMA Y, M31, M32
313// C = FADD A, B
314//
315// Pattern 2:
316// A = FMA X, M11, M12 (Leaf)
317// B = FMA A, M21, M22 (Prev)
318// C = FMA B, M31, M32 (Root)
319// -->
320// A = FMUL M11, M12
321// B = FMA X, M21, M22
322// D = FMA A, M31, M32
323// C = FADD B, D
324//
325// breaking the dependency between A and B, allowing FMA to be executed in
326// parallel (or back-to-back in a pipeline) instead of depending on each other.
327//
328// 2: Reduce register pressure.
329// Try to reassociate FMA with FSUB and a constant like below:
330// C is a floating point const.
331//
332// Pattern 1:
333// A = FSUB X, Y (Leaf)
334// D = FMA B, C, A (Root)
335// -->
336// A = FMA B, Y, -C
337// D = FMA A, X, C
338//
339// Pattern 2:
340// A = FSUB X, Y (Leaf)
341// D = FMA B, A, C (Root)
342// -->
343// A = FMA B, Y, -C
344// D = FMA A, X, C
345//
346// Before the transformation, A must be assigned with different hardware
347// register with D. After the transformation, A and D must be assigned with
348// same hardware register due to TIE attribute of FMA instructions.
349//
352 bool DoRegPressureReduce) const {
354 const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
356
357 auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
358 for (const auto &MO : Instr.explicit_operands())
359 if (!(MO.isReg() && MO.getReg().isVirtual()))
360 return false;
361 return true;
362 };
363
364 auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
365 unsigned OpType) {
366 if (Instr.getOpcode() !=
367 FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
368 return false;
369
370 // Instruction can be reassociated.
371 // fast math flags may prohibit reassociation.
372 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
373 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
374 return false;
375
376 // Instruction operands are virtual registers for reassociation.
377 if (!IsAllOpsVirtualReg(Instr))
378 return false;
379
380 // For register pressure reassociation, the FSub must have only one use as
381 // we want to delete the sub to save its def.
382 if (OpType == InfoArrayIdxFSubInst &&
383 !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
384 return false;
385
386 return true;
387 };
388
389 auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
390 int16_t &MulOpIdx, bool IsLeaf) {
391 int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
392 if (Idx < 0)
393 return false;
394
395 // Instruction can be reassociated.
396 // fast math flags may prohibit reassociation.
397 if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
398 Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
399 return false;
400
401 // Instruction operands are virtual registers for reassociation.
402 if (!IsAllOpsVirtualReg(Instr))
403 return false;
404
405 MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
406 if (IsLeaf)
407 return true;
408
409 AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
410
411 const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
412 MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
413 // If 'add' operand's def is not in current block, don't do ILP related opt.
414 if (!MIAdd || MIAdd->getParent() != MBB)
415 return false;
416
417 // If this is not Leaf FMA Instr, its 'add' operand should only have one use
418 // as this fma will be changed later.
419 return MRI->hasOneNonDBGUse(OpAdd.getReg());
420 };
421
422 int16_t AddOpIdx = -1;
423 int16_t MulOpIdx = -1;
424
425 bool IsUsedOnceL = false;
426 bool IsUsedOnceR = false;
427 MachineInstr *MULInstrL = nullptr;
428 MachineInstr *MULInstrR = nullptr;
429
430 auto IsRPReductionCandidate = [&]() {
431 // Currently, we only support float and double.
432 // FIXME: add support for other types.
433 unsigned Opcode = Root.getOpcode();
434 if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
435 return false;
436
437 // Root must be a valid FMA like instruction.
438 // Treat it as leaf as we don't care its add operand.
439 if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
440 assert((MulOpIdx >= 0) && "mul operand index not right!");
441 Register MULRegL = TRI->lookThruSingleUseCopyChain(
442 Root.getOperand(MulOpIdx).getReg(), MRI);
443 Register MULRegR = TRI->lookThruSingleUseCopyChain(
444 Root.getOperand(MulOpIdx + 1).getReg(), MRI);
445 if (!MULRegL && !MULRegR)
446 return false;
447
448 if (MULRegL && !MULRegR) {
449 MULRegR =
450 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
451 IsUsedOnceL = true;
452 } else if (!MULRegL && MULRegR) {
453 MULRegL =
454 TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
455 IsUsedOnceR = true;
456 } else {
457 IsUsedOnceL = true;
458 IsUsedOnceR = true;
459 }
460
461 if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
462 return false;
463
464 MULInstrL = MRI->getVRegDef(MULRegL);
465 MULInstrR = MRI->getVRegDef(MULRegR);
466 return true;
467 }
468 return false;
469 };
470
471 // Register pressure fma reassociation patterns.
472 if (DoRegPressureReduce && IsRPReductionCandidate()) {
473 assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
474 // Register pressure pattern 1
475 if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
476 IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
477 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
479 return true;
480 }
481
482 // Register pressure pattern 2
483 if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
484 IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
485 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
487 return true;
488 }
489 }
490
491 // ILP fma reassociation patterns.
492 // Root must be a valid FMA like instruction.
493 AddOpIdx = -1;
494 if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
495 return false;
496
497 assert((AddOpIdx >= 0) && "add operand index not right!");
498
499 Register RegB = Root.getOperand(AddOpIdx).getReg();
500 MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
501
502 // Prev must be a valid FMA like instruction.
503 AddOpIdx = -1;
504 if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
505 return false;
506
507 assert((AddOpIdx >= 0) && "add operand index not right!");
508
509 Register RegA = Prev->getOperand(AddOpIdx).getReg();
510 MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
511 AddOpIdx = -1;
512 if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
514 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
515 return true;
516 }
517 if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
519 LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
520 return true;
521 }
522 return false;
523}
524
526 MachineInstr &Root, unsigned &Pattern,
527 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
528 assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
529
530 MachineFunction *MF = Root.getMF();
531 MachineRegisterInfo *MRI = &MF->getRegInfo();
534
535 int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
536 if (Idx < 0)
537 return;
538
539 uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
540
541 // For now we only need to fix up placeholder for register pressure reduce
542 // patterns.
543 Register ConstReg = 0;
544 switch (Pattern) {
546 ConstReg =
547 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
548 break;
550 ConstReg =
551 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
552 break;
553 default:
554 // Not register pressure reduce patterns.
555 return;
556 }
557
558 MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
559 // Get const value from const pool.
560 const Constant *C = getConstantFromConstantPool(ConstDefInstr);
561 assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
562
563 // Get negative fp const.
564 APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
565 F1.changeSign();
566 Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
567 Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
568
569 // Put negative fp const into constant pool.
570 unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
571
572 MachineOperand *Placeholder = nullptr;
573 // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
574 for (auto *Inst : InsInstrs) {
575 for (MachineOperand &Operand : Inst->explicit_operands()) {
576 assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
577 if (Operand.getReg() == PPC::ZERO8) {
578 Placeholder = &Operand;
579 break;
580 }
581 }
582 }
583
584 assert(Placeholder && "Placeholder does not exist!");
585
586 // Generate instructions to load the const fp from constant pool.
587 // We only support PPC64 and medium code model.
588 Register LoadNewConst =
589 generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
590
591 // Fill the placeholder with the new load from constant pool.
592 Placeholder->setReg(LoadNewConst);
593}
594
596 const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
597
599 return false;
600
601 // Currently, we only enable register pressure reducing in machine combiner
602 // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
603 // support.
604 //
605 // So we need following instructions to access a TOC entry:
606 //
607 // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
608 // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
609 // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
610 //
611 // FIXME: add more supported targets, like Small and Large code model, PPC32,
612 // AIX.
613 if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
614 Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
615 return false;
616
618 const MachineFunction *MF = MBB->getParent();
619 const MachineRegisterInfo *MRI = &MF->getRegInfo();
620
621 auto GetMBBPressure =
622 [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
623 RegionPressure Pressure;
624 RegPressureTracker RPTracker(Pressure);
625
626 // Initialize the register pressure tracker.
627 RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
628 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
629
630 for (const auto &MI : reverse(*MBB)) {
631 if (MI.isDebugValue() || MI.isDebugLabel())
632 continue;
633 RegisterOperands RegOpers;
634 RegOpers.collect(MI, *TRI, *MRI, false, false);
635 RPTracker.recedeSkipDebugValues();
636 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
637 RPTracker.recede(RegOpers);
638 }
639
640 // Close the RPTracker to finalize live ins.
641 RPTracker.closeRegion();
642
643 return RPTracker.getPressure().MaxSetPressure;
644 };
645
646 // For now we only care about float and double type fma.
647 unsigned VSSRCLimit =
648 RegClassInfo->getRegPressureSetLimit(PPC::RegisterPressureSets::VSSRC);
649
650 // Only reduce register pressure when pressure is high.
651 return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
652 (float)VSSRCLimit * FMARPFactor;
653}
654
656 // I has only one memory operand which is load from constant pool.
657 if (!I->hasOneMemOperand())
658 return false;
659
660 MachineMemOperand *Op = I->memoperands()[0];
661 return Op->isLoad() && Op->getPseudoValue() &&
662 Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
663}
664
665Register PPCInstrInfo::generateLoadForNewConst(
666 unsigned Idx, MachineInstr *MI, Type *Ty,
667 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
668 // Now we only support PPC64, Medium code model and P9 with vector.
669 // We have immutable pattern to access const pool. See function
670 // shouldReduceRegisterPressure.
671 assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
673 "Target not supported!\n");
674
675 MachineFunction *MF = MI->getMF();
676 MachineRegisterInfo *MRI = &MF->getRegInfo();
677
678 // Generate ADDIStocHA8
679 Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
680 MachineInstrBuilder TOCOffset =
681 BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
682 .addReg(PPC::X2)
684
685 assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
686 "Only float and double are supported!");
687
688 unsigned LoadOpcode;
689 // Should be float type or double type.
690 if (Ty->isFloatTy())
691 LoadOpcode = PPC::DFLOADf32;
692 else
693 LoadOpcode = PPC::DFLOADf64;
694
695 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
696 Register VReg2 = MRI->createVirtualRegister(RC);
699 Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));
700
701 // Generate Load from constant pool.
703 BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
705 .addReg(VReg1, getKillRegState(true))
706 .addMemOperand(MMO);
707
708 Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
709
710 // Insert the toc load instructions into InsInstrs.
711 InsInstrs.insert(InsInstrs.begin(), Load);
712 InsInstrs.insert(InsInstrs.begin(), TOCOffset);
713 return VReg2;
714}
715
716// This function returns the const value in constant pool if the \p I is a load
717// from constant pool.
718const Constant *
720 MachineFunction *MF = I->getMF();
721 MachineRegisterInfo *MRI = &MF->getRegInfo();
723 assert(I->mayLoad() && "Should be a load instruction.\n");
724 for (auto MO : I->uses()) {
725 if (!MO.isReg())
726 continue;
727 Register Reg = MO.getReg();
728 if (Reg == 0 || !Reg.isVirtual())
729 continue;
730 // Find the toc address.
731 MachineInstr *DefMI = MRI->getVRegDef(Reg);
732 for (auto MO2 : DefMI->uses())
733 if (MO2.isCPI())
734 return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
735 }
736 return nullptr;
737}
738
751
754 bool DoRegPressureReduce) const {
755 // Using the machine combiner in this way is potentially expensive, so
756 // restrict to when aggressive optimizations are desired.
757 if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive)
758 return false;
759
760 if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
761 return true;
762
764 DoRegPressureReduce);
765}
766
768 MachineInstr &Root, unsigned Pattern,
771 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
772 switch (Pattern) {
777 reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
778 break;
779 default:
780 // Reassociate default patterns.
782 DelInstrs, InstrIdxForVirtReg);
783 break;
784 }
785}
786
787void PPCInstrInfo::reassociateFMA(
788 MachineInstr &Root, unsigned Pattern,
791 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
792 MachineFunction *MF = Root.getMF();
793 MachineRegisterInfo &MRI = MF->getRegInfo();
795 MachineOperand &OpC = Root.getOperand(0);
796 Register RegC = OpC.getReg();
797 const TargetRegisterClass *RC = MRI.getRegClass(RegC);
798 MRI.constrainRegClass(RegC, RC);
799
800 unsigned FmaOp = Root.getOpcode();
801 int16_t Idx = getFMAOpIdxInfo(FmaOp);
802 assert(Idx >= 0 && "Root must be a FMA instruction");
803
804 bool IsILPReassociate =
807
809 uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
810
811 MachineInstr *Prev = nullptr;
812 MachineInstr *Leaf = nullptr;
813 switch (Pattern) {
814 default:
815 llvm_unreachable("not recognized pattern!");
818 Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
819 Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
820 break;
822 Register MULReg =
823 TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
824 Leaf = MRI.getVRegDef(MULReg);
825 break;
826 }
828 Register MULReg = TRI->lookThruCopyLike(
829 Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
830 Leaf = MRI.getVRegDef(MULReg);
831 break;
832 }
833 }
834
835 uint32_t IntersectedFlags = 0;
836 if (IsILPReassociate)
837 IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
838 else
839 IntersectedFlags = Root.getFlags() & Leaf->getFlags();
840
841 auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
842 bool &KillFlag) {
843 Reg = Operand.getReg();
844 MRI.constrainRegClass(Reg, RC);
845 KillFlag = Operand.isKill();
846 };
847
848 auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
849 Register &MulOp2, Register &AddOp,
850 bool &MulOp1KillFlag, bool &MulOp2KillFlag,
851 bool &AddOpKillFlag) {
852 GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
853 GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
854 GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
855 };
856
857 Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
858 RegA21, RegB;
859 bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
860 KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
861 KillA11 = false, KillA21 = false, KillB = false;
862
863 GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
864
865 if (IsILPReassociate)
866 GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
867
869 GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
870 GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
871 } else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) {
872 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
873 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
874 } else {
875 // Get FSUB instruction info.
876 GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
877 GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
878 }
879
880 // Create new virtual registers for the new results instead of
881 // recycling legacy ones because the MachineCombiner's computation of the
882 // critical path requires a new register definition rather than an existing
883 // one.
884 // For register pressure reassociation, we only need create one virtual
885 // register for the new fma.
886 Register NewVRA = MRI.createVirtualRegister(RC);
887 InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
888
889 Register NewVRB = 0;
890 if (IsILPReassociate) {
891 NewVRB = MRI.createVirtualRegister(RC);
892 InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
893 }
894
895 Register NewVRD = 0;
897 NewVRD = MRI.createVirtualRegister(RC);
898 InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
899 }
900
901 auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
902 Register RegMul1, bool KillRegMul1,
903 Register RegMul2, bool KillRegMul2) {
904 MI->getOperand(AddOpIdx).setReg(RegAdd);
905 MI->getOperand(AddOpIdx).setIsKill(KillAdd);
906 MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
907 MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
908 MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
909 MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
910 };
911
912 MachineInstrBuilder NewARegPressure, NewCRegPressure;
913 switch (Pattern) {
914 default:
915 llvm_unreachable("not recognized pattern!");
917 // Create new instructions for insertion.
918 MachineInstrBuilder MINewB =
919 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
920 .addReg(RegX, getKillRegState(KillX))
921 .addReg(RegM21, getKillRegState(KillM21))
922 .addReg(RegM22, getKillRegState(KillM22));
923 MachineInstrBuilder MINewA =
924 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
925 .addReg(RegY, getKillRegState(KillY))
926 .addReg(RegM31, getKillRegState(KillM31))
927 .addReg(RegM32, getKillRegState(KillM32));
928 // If AddOpIdx is not 1, adjust the order.
929 if (AddOpIdx != 1) {
930 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
931 AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
932 }
933
934 MachineInstrBuilder MINewC =
935 BuildMI(*MF, Root.getDebugLoc(),
937 .addReg(NewVRB, getKillRegState(true))
938 .addReg(NewVRA, getKillRegState(true));
939
940 // Update flags for newly created instructions.
941 setSpecialOperandAttr(*MINewA, IntersectedFlags);
942 setSpecialOperandAttr(*MINewB, IntersectedFlags);
943 setSpecialOperandAttr(*MINewC, IntersectedFlags);
944
945 // Record new instructions for insertion.
946 InsInstrs.push_back(MINewA);
947 InsInstrs.push_back(MINewB);
948 InsInstrs.push_back(MINewC);
949 break;
950 }
952 assert(NewVRD && "new FMA register not created!");
953 // Create new instructions for insertion.
954 MachineInstrBuilder MINewA =
955 BuildMI(*MF, Leaf->getDebugLoc(),
957 .addReg(RegM11, getKillRegState(KillM11))
958 .addReg(RegM12, getKillRegState(KillM12));
959 MachineInstrBuilder MINewB =
960 BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
961 .addReg(RegX, getKillRegState(KillX))
962 .addReg(RegM21, getKillRegState(KillM21))
963 .addReg(RegM22, getKillRegState(KillM22));
964 MachineInstrBuilder MINewD =
965 BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
966 .addReg(NewVRA, getKillRegState(true))
967 .addReg(RegM31, getKillRegState(KillM31))
968 .addReg(RegM32, getKillRegState(KillM32));
969 // If AddOpIdx is not 1, adjust the order.
970 if (AddOpIdx != 1) {
971 AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
972 AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
973 KillM32);
974 }
975
976 MachineInstrBuilder MINewC =
977 BuildMI(*MF, Root.getDebugLoc(),
979 .addReg(NewVRB, getKillRegState(true))
980 .addReg(NewVRD, getKillRegState(true));
981
982 // Update flags for newly created instructions.
983 setSpecialOperandAttr(*MINewA, IntersectedFlags);
984 setSpecialOperandAttr(*MINewB, IntersectedFlags);
985 setSpecialOperandAttr(*MINewD, IntersectedFlags);
986 setSpecialOperandAttr(*MINewC, IntersectedFlags);
987
988 // Record new instructions for insertion.
989 InsInstrs.push_back(MINewA);
990 InsInstrs.push_back(MINewB);
991 InsInstrs.push_back(MINewD);
992 InsInstrs.push_back(MINewC);
993 break;
994 }
997 Register VarReg;
998 bool KillVarReg = false;
1000 VarReg = RegM31;
1001 KillVarReg = KillM31;
1002 } else {
1003 VarReg = RegM32;
1004 KillVarReg = KillM32;
1005 }
1006 // We don't want to get negative const from memory pool too early, as the
1007 // created entry will not be deleted even if it has no users. Since all
1008 // operand of Leaf and Root are virtual register, we use zero register
1009 // here as a placeholder. When the InsInstrs is selected in
1010 // MachineCombiner, we call finalizeInsInstrs to replace the zero register
1011 // with a virtual register which is a load from constant pool.
1012 NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
1013 .addReg(RegB, getKillRegState(RegB))
1014 .addReg(RegY, getKillRegState(KillY))
1015 .addReg(PPC::ZERO8);
1016 NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
1017 .addReg(NewVRA, getKillRegState(true))
1018 .addReg(RegX, getKillRegState(KillX))
1019 .addReg(VarReg, getKillRegState(KillVarReg));
1020 // For now, we only support xsmaddadp/xsmaddasp, their add operand are
1021 // both at index 1, no need to adjust.
1022 // FIXME: when add more fma instructions support, like fma/fmas, adjust
1023 // the operand index here.
1024 break;
1025 }
1026 }
1027
1028 if (!IsILPReassociate) {
1029 setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
1030 setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
1031
1032 InsInstrs.push_back(NewARegPressure);
1033 InsInstrs.push_back(NewCRegPressure);
1034 }
1035
1036 assert(!InsInstrs.empty() &&
1037 "Insertion instructions set should not be empty!");
1038
1039 // Record old instructions for deletion.
1040 DelInstrs.push_back(Leaf);
1041 if (IsILPReassociate)
1042 DelInstrs.push_back(Prev);
1043 DelInstrs.push_back(&Root);
1044}
1045
1046// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1048 Register &SrcReg, Register &DstReg,
1049 unsigned &SubIdx) const {
1050 switch (MI.getOpcode()) {
1051 default: return false;
1052 case PPC::EXTSW:
1053 case PPC::EXTSW_32:
1054 case PPC::EXTSW_32_64:
1055 SrcReg = MI.getOperand(1).getReg();
1056 DstReg = MI.getOperand(0).getReg();
1057 SubIdx = PPC::sub_32;
1058 return true;
1059 }
1060}
1061
1063 int &FrameIndex) const {
1064 if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
1065 // Check for the operands added by addFrameReference (the immediate is the
1066 // offset which defaults to 0).
1067 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1068 MI.getOperand(2).isFI()) {
1069 FrameIndex = MI.getOperand(2).getIndex();
1070 return MI.getOperand(0).getReg();
1071 }
1072 }
1073 return 0;
1074}
1075
1076// For opcodes with the ReMaterializable flag set, this function is called to
1077// verify the instruction is really rematable.
1079 const MachineInstr &MI) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 // Let base implementaion decide.
1083 break;
1084 case PPC::LI:
1085 case PPC::LI8:
1086 case PPC::PLI:
1087 case PPC::PLI8:
1088 case PPC::LIS:
1089 case PPC::LIS8:
1090 case PPC::ADDIStocHA:
1091 case PPC::ADDIStocHA8:
1092 case PPC::ADDItocL:
1093 case PPC::ADDItocL8:
1094 case PPC::LOAD_STACK_GUARD:
1095 case PPC::PPCLdFixedAddr:
1096 case PPC::XXLXORz:
1097 case PPC::XXLXORspz:
1098 case PPC::XXLXORdpz:
1099 case PPC::XXLEQVOnes:
1100 case PPC::XXSPLTI32DX:
1101 case PPC::XXSPLTIW:
1102 case PPC::XXSPLTIDP:
1103 case PPC::V_SET0B:
1104 case PPC::V_SET0H:
1105 case PPC::V_SET0:
1106 case PPC::V_SETALLONESB:
1107 case PPC::V_SETALLONESH:
1108 case PPC::V_SETALLONES:
1109 case PPC::CRSET:
1110 case PPC::CRUNSET:
1111 case PPC::XXSETACCZ:
1112 case PPC::DMXXSETACCZ:
1113 return true;
1114 }
1116}
1117
1119 int &FrameIndex) const {
1120 if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
1121 if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
1122 MI.getOperand(2).isFI()) {
1123 FrameIndex = MI.getOperand(2).getIndex();
1124 return MI.getOperand(0).getReg();
1125 }
1126 }
1127 return 0;
1128}
1129
1131 unsigned OpIdx1,
1132 unsigned OpIdx2) const {
1133 MachineFunction &MF = *MI.getParent()->getParent();
1134
1135 // Normal instructions can be commuted the obvious way.
1136 if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1137 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1138 // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1139 // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1140 // changing the relative order of the mask operands might change what happens
1141 // to the high-bits of the mask (and, thus, the result).
1142
1143 // Cannot commute if it has a non-zero rotate count.
1144 if (MI.getOperand(3).getImm() != 0)
1145 return nullptr;
1146
1147 // If we have a zero rotate count, we have:
1148 // M = mask(MB,ME)
1149 // Op0 = (Op1 & ~M) | (Op2 & M)
1150 // Change this to:
1151 // M = mask((ME+1)&31, (MB-1)&31)
1152 // Op0 = (Op2 & ~M) | (Op1 & M)
1153
1154 // Swap op1/op2
1155 assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
1156 "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1157 Register Reg0 = MI.getOperand(0).getReg();
1158 Register Reg1 = MI.getOperand(1).getReg();
1159 Register Reg2 = MI.getOperand(2).getReg();
1160 unsigned SubReg1 = MI.getOperand(1).getSubReg();
1161 unsigned SubReg2 = MI.getOperand(2).getSubReg();
1162 bool Reg1IsKill = MI.getOperand(1).isKill();
1163 bool Reg2IsKill = MI.getOperand(2).isKill();
1164 bool ChangeReg0 = false;
1165 // If machine instrs are no longer in two-address forms, update
1166 // destination register as well.
1167 if (Reg0 == Reg1) {
1168 // Must be two address instruction (i.e. op1 is tied to op0).
1169 assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&
1170 "Expecting a two-address instruction!");
1171 assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
1172 Reg2IsKill = false;
1173 ChangeReg0 = true;
1174 }
1175
1176 // Masks.
1177 unsigned MB = MI.getOperand(4).getImm();
1178 unsigned ME = MI.getOperand(5).getImm();
1179
1180 // We can't commute a trivial mask (there is no way to represent an all-zero
1181 // mask).
1182 if (MB == 0 && ME == 31)
1183 return nullptr;
1184
1185 if (NewMI) {
1186 // Create a new instruction.
1187 Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
1188 bool Reg0IsDead = MI.getOperand(0).isDead();
1189 return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
1190 .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
1191 .addReg(Reg2, getKillRegState(Reg2IsKill))
1192 .addReg(Reg1, getKillRegState(Reg1IsKill))
1193 .addImm((ME + 1) & 31)
1194 .addImm((MB - 1) & 31);
1195 }
1196
1197 if (ChangeReg0) {
1198 MI.getOperand(0).setReg(Reg2);
1199 MI.getOperand(0).setSubReg(SubReg2);
1200 }
1201 MI.getOperand(2).setReg(Reg1);
1202 MI.getOperand(1).setReg(Reg2);
1203 MI.getOperand(2).setSubReg(SubReg1);
1204 MI.getOperand(1).setSubReg(SubReg2);
1205 MI.getOperand(2).setIsKill(Reg1IsKill);
1206 MI.getOperand(1).setIsKill(Reg2IsKill);
1207
1208 // Swap the mask around.
1209 MI.getOperand(4).setImm((ME + 1) & 31);
1210 MI.getOperand(5).setImm((MB - 1) & 31);
1211 return &MI;
1212}
1213
1215 unsigned &SrcOpIdx1,
1216 unsigned &SrcOpIdx2) const {
1217 // For VSX A-Type FMA instructions, it is the first two operands that can be
1218 // commuted, however, because the non-encoded tied input operand is listed
1219 // first, the operands to swap are actually the second and third.
1220
1221 int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
1222 if (AltOpc == -1)
1223 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1224
1225 // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1226 // and SrcOpIdx2.
1227 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
1228}
1229
1232 // This function is used for scheduling, and the nop wanted here is the type
1233 // that terminates dispatch groups on the POWER cores.
1234 unsigned Directive = Subtarget.getCPUDirective();
1235 unsigned Opcode;
1236 switch (Directive) {
1237 default: Opcode = PPC::NOP; break;
1238 case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1239 case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1240 case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
1241 // FIXME: Update when POWER9 scheduling model is ready.
1242 case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1243 }
1244
1245 DebugLoc DL;
1246 BuildMI(MBB, MI, DL, get(Opcode));
1247}
1248
1249/// Return the noop instruction to use for a noop.
1251 MCInst Nop;
1252 Nop.setOpcode(PPC::NOP);
1253 return Nop;
1254}
1255
1256// Branch analysis.
1257// Note: If the condition register is set to CTR or CTR8 then this is a
1258// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1261 MachineBasicBlock *&FBB,
1263 bool AllowModify) const {
1264 bool isPPC64 = Subtarget.isPPC64();
1265
1266 // If the block has no terminators, it just falls into the block after it.
1267 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1268 if (I == MBB.end())
1269 return false;
1270
1271 if (!isUnpredicatedTerminator(*I))
1272 return false;
1273
1274 if (AllowModify) {
1275 // If the BB ends with an unconditional branch to the fallthrough BB,
1276 // we eliminate the branch instruction.
1277 if (I->getOpcode() == PPC::B &&
1278 MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
1279 I->eraseFromParent();
1280
1281 // We update iterator after deleting the last branch.
1282 I = MBB.getLastNonDebugInstr();
1283 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
1284 return false;
1285 }
1286 }
1287
1288 // Get the last instruction in the block.
1289 MachineInstr &LastInst = *I;
1290
1291 // If there is only one terminator instruction, process it.
1292 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
1293 if (LastInst.getOpcode() == PPC::B) {
1294 if (!LastInst.getOperand(0).isMBB())
1295 return true;
1296 TBB = LastInst.getOperand(0).getMBB();
1297 return false;
1298 } else if (LastInst.getOpcode() == PPC::BCC) {
1299 if (!LastInst.getOperand(2).isMBB())
1300 return true;
1301 // Block ends with fall-through condbranch.
1302 TBB = LastInst.getOperand(2).getMBB();
1303 Cond.push_back(LastInst.getOperand(0));
1304 Cond.push_back(LastInst.getOperand(1));
1305 return false;
1306 } else if (LastInst.getOpcode() == PPC::BC) {
1307 if (!LastInst.getOperand(1).isMBB())
1308 return true;
1309 // Block ends with fall-through condbranch.
1310 TBB = LastInst.getOperand(1).getMBB();
1312 Cond.push_back(LastInst.getOperand(0));
1313 return false;
1314 } else if (LastInst.getOpcode() == PPC::BCn) {
1315 if (!LastInst.getOperand(1).isMBB())
1316 return true;
1317 // Block ends with fall-through condbranch.
1318 TBB = LastInst.getOperand(1).getMBB();
1320 Cond.push_back(LastInst.getOperand(0));
1321 return false;
1322 } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
1323 LastInst.getOpcode() == PPC::BDNZ) {
1324 if (!LastInst.getOperand(0).isMBB())
1325 return true;
1327 return true;
1328 TBB = LastInst.getOperand(0).getMBB();
1329 Cond.push_back(MachineOperand::CreateImm(1));
1330 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1331 true));
1332 return false;
1333 } else if (LastInst.getOpcode() == PPC::BDZ8 ||
1334 LastInst.getOpcode() == PPC::BDZ) {
1335 if (!LastInst.getOperand(0).isMBB())
1336 return true;
1338 return true;
1339 TBB = LastInst.getOperand(0).getMBB();
1340 Cond.push_back(MachineOperand::CreateImm(0));
1341 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1342 true));
1343 return false;
1344 }
1345
1346 // Otherwise, don't know what this is.
1347 return true;
1348 }
1349
1350 // Get the instruction before it if it's a terminator.
1351 MachineInstr &SecondLastInst = *I;
1352
1353 // If there are three terminators, we don't know what sort of block this is.
1354 if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
1355 return true;
1356
1357 // If the block ends with PPC::B and PPC:BCC, handle it.
1358 if (SecondLastInst.getOpcode() == PPC::BCC &&
1359 LastInst.getOpcode() == PPC::B) {
1360 if (!SecondLastInst.getOperand(2).isMBB() ||
1361 !LastInst.getOperand(0).isMBB())
1362 return true;
1363 TBB = SecondLastInst.getOperand(2).getMBB();
1364 Cond.push_back(SecondLastInst.getOperand(0));
1365 Cond.push_back(SecondLastInst.getOperand(1));
1366 FBB = LastInst.getOperand(0).getMBB();
1367 return false;
1368 } else if (SecondLastInst.getOpcode() == PPC::BC &&
1369 LastInst.getOpcode() == PPC::B) {
1370 if (!SecondLastInst.getOperand(1).isMBB() ||
1371 !LastInst.getOperand(0).isMBB())
1372 return true;
1373 TBB = SecondLastInst.getOperand(1).getMBB();
1375 Cond.push_back(SecondLastInst.getOperand(0));
1376 FBB = LastInst.getOperand(0).getMBB();
1377 return false;
1378 } else if (SecondLastInst.getOpcode() == PPC::BCn &&
1379 LastInst.getOpcode() == PPC::B) {
1380 if (!SecondLastInst.getOperand(1).isMBB() ||
1381 !LastInst.getOperand(0).isMBB())
1382 return true;
1383 TBB = SecondLastInst.getOperand(1).getMBB();
1385 Cond.push_back(SecondLastInst.getOperand(0));
1386 FBB = LastInst.getOperand(0).getMBB();
1387 return false;
1388 } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
1389 SecondLastInst.getOpcode() == PPC::BDNZ) &&
1390 LastInst.getOpcode() == PPC::B) {
1391 if (!SecondLastInst.getOperand(0).isMBB() ||
1392 !LastInst.getOperand(0).isMBB())
1393 return true;
1395 return true;
1396 TBB = SecondLastInst.getOperand(0).getMBB();
1397 Cond.push_back(MachineOperand::CreateImm(1));
1398 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1399 true));
1400 FBB = LastInst.getOperand(0).getMBB();
1401 return false;
1402 } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
1403 SecondLastInst.getOpcode() == PPC::BDZ) &&
1404 LastInst.getOpcode() == PPC::B) {
1405 if (!SecondLastInst.getOperand(0).isMBB() ||
1406 !LastInst.getOperand(0).isMBB())
1407 return true;
1409 return true;
1410 TBB = SecondLastInst.getOperand(0).getMBB();
1411 Cond.push_back(MachineOperand::CreateImm(0));
1412 Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
1413 true));
1414 FBB = LastInst.getOperand(0).getMBB();
1415 return false;
1416 }
1417
1418 // If the block ends with two PPC:Bs, handle it. The second one is not
1419 // executed, so remove it.
1420 if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1421 if (!SecondLastInst.getOperand(0).isMBB())
1422 return true;
1423 TBB = SecondLastInst.getOperand(0).getMBB();
1424 I = LastInst;
1425 if (AllowModify)
1426 I->eraseFromParent();
1427 return false;
1428 }
1429
1430 // Otherwise, can't handle this.
1431 return true;
1432}
1433
1435 int *BytesRemoved) const {
1436 assert(!BytesRemoved && "code size not handled");
1437
1438 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1439 if (I == MBB.end())
1440 return 0;
1441
1442 if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
1443 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1444 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1445 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1446 return 0;
1447
1448 // Remove the branch.
1449 I->eraseFromParent();
1450
1451 I = MBB.end();
1452
1453 if (I == MBB.begin()) return 1;
1454 --I;
1455 if (I->getOpcode() != PPC::BCC &&
1456 I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
1457 I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
1458 I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
1459 return 1;
1460
1461 // Remove the branch.
1462 I->eraseFromParent();
1463 return 2;
1464}
1465
1468 MachineBasicBlock *FBB,
1470 const DebugLoc &DL,
1471 int *BytesAdded) const {
1472 // Shouldn't be a fall through.
1473 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1474 assert((Cond.size() == 2 || Cond.size() == 0) &&
1475 "PPC branch conditions have two components!");
1476 assert(!BytesAdded && "code size not handled");
1477
1478 bool isPPC64 = Subtarget.isPPC64();
1479
1480 // One-way branch.
1481 if (!FBB) {
1482 if (Cond.empty()) // Unconditional branch
1483 BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
1484 else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1485 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1486 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1487 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1488 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1489 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1490 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1491 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1492 else // Conditional branch
1493 BuildMI(&MBB, DL, get(PPC::BCC))
1494 .addImm(Cond[0].getImm())
1495 .add(Cond[1])
1496 .addMBB(TBB);
1497 return 1;
1498 }
1499
1500 // Two-way Conditional Branch.
1501 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1502 BuildMI(&MBB, DL, get(Cond[0].getImm() ?
1503 (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1504 (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
1505 else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
1506 BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
1507 else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
1508 BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
1509 else
1510 BuildMI(&MBB, DL, get(PPC::BCC))
1511 .addImm(Cond[0].getImm())
1512 .add(Cond[1])
1513 .addMBB(TBB);
1514 BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
1515 return 2;
1516}
1517
1518// Select analysis.
1521 Register DstReg, Register TrueReg,
1522 Register FalseReg, int &CondCycles,
1523 int &TrueCycles, int &FalseCycles) const {
1524 if (!Subtarget.hasISEL())
1525 return false;
1526
1527 if (Cond.size() != 2)
1528 return false;
1529
1530 // If this is really a bdnz-like condition, then it cannot be turned into a
1531 // select.
1532 if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
1533 return false;
1534
1535 // If the conditional branch uses a physical register, then it cannot be
1536 // turned into a select.
1537 if (Cond[1].getReg().isPhysical())
1538 return false;
1539
1540 // Check register classes.
1541 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1542 const TargetRegisterClass *RC =
1543 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1544 if (!RC)
1545 return false;
1546
1547 // isel is for regular integer GPRs only.
1548 if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1549 !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1550 !PPC::G8RCRegClass.hasSubClassEq(RC) &&
1551 !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1552 return false;
1553
1554 // FIXME: These numbers are for the A2, how well they work for other cores is
1555 // an open question. On the A2, the isel instruction has a 2-cycle latency
1556 // but single-cycle throughput. These numbers are used in combination with
1557 // the MispredictPenalty setting from the active SchedMachineModel.
1558 CondCycles = 1;
1559 TrueCycles = 1;
1560 FalseCycles = 1;
1561
1562 return true;
1563}
1564
1567 const DebugLoc &dl, Register DestReg,
1569 Register FalseReg) const {
1570 assert(Cond.size() == 2 &&
1571 "PPC branch conditions have two components!");
1572
1573 // Get the register classes.
1574 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1575 const TargetRegisterClass *RC =
1576 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
1577 assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1578
1579 bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
1580 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1581 assert((Is64Bit ||
1582 PPC::GPRCRegClass.hasSubClassEq(RC) ||
1583 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1584 "isel is for regular integer GPRs only");
1585
1586 unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1587 auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
1588
1589 unsigned SubIdx = 0;
1590 bool SwapOps = false;
1591 switch (SelectPred) {
1592 case PPC::PRED_EQ:
1593 case PPC::PRED_EQ_MINUS:
1594 case PPC::PRED_EQ_PLUS:
1595 SubIdx = PPC::sub_eq; SwapOps = false; break;
1596 case PPC::PRED_NE:
1597 case PPC::PRED_NE_MINUS:
1598 case PPC::PRED_NE_PLUS:
1599 SubIdx = PPC::sub_eq; SwapOps = true; break;
1600 case PPC::PRED_LT:
1601 case PPC::PRED_LT_MINUS:
1602 case PPC::PRED_LT_PLUS:
1603 SubIdx = PPC::sub_lt; SwapOps = false; break;
1604 case PPC::PRED_GE:
1605 case PPC::PRED_GE_MINUS:
1606 case PPC::PRED_GE_PLUS:
1607 SubIdx = PPC::sub_lt; SwapOps = true; break;
1608 case PPC::PRED_GT:
1609 case PPC::PRED_GT_MINUS:
1610 case PPC::PRED_GT_PLUS:
1611 SubIdx = PPC::sub_gt; SwapOps = false; break;
1612 case PPC::PRED_LE:
1613 case PPC::PRED_LE_MINUS:
1614 case PPC::PRED_LE_PLUS:
1615 SubIdx = PPC::sub_gt; SwapOps = true; break;
1616 case PPC::PRED_UN:
1617 case PPC::PRED_UN_MINUS:
1618 case PPC::PRED_UN_PLUS:
1619 SubIdx = PPC::sub_un; SwapOps = false; break;
1620 case PPC::PRED_NU:
1621 case PPC::PRED_NU_MINUS:
1622 case PPC::PRED_NU_PLUS:
1623 SubIdx = PPC::sub_un; SwapOps = true; break;
1624 case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
1625 case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
1626 }
1627
1628 Register FirstReg = SwapOps ? FalseReg : TrueReg,
1629 SecondReg = SwapOps ? TrueReg : FalseReg;
1630
1631 // The first input register of isel cannot be r0. If it is a member
1632 // of a register class that can be r0, then copy it first (the
1633 // register allocator should eliminate the copy).
1634 if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
1635 MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
1636 const TargetRegisterClass *FirstRC =
1637 MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
1638 &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1639 Register OldFirstReg = FirstReg;
1640 FirstReg = MRI.createVirtualRegister(FirstRC);
1641 BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
1642 .addReg(OldFirstReg);
1643 }
1644
1645 BuildMI(MBB, MI, dl, get(OpCode), DestReg)
1646 .addReg(FirstReg)
1647 .addReg(SecondReg)
1648 .addReg(Cond[1].getReg(), {}, SubIdx);
1649}
1650
1651static unsigned getCRBitValue(unsigned CRBit) {
1652 unsigned Ret = 4;
1653 if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
1654 CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
1655 CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
1656 CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
1657 Ret = 3;
1658 if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
1659 CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
1660 CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
1661 CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
1662 Ret = 2;
1663 if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
1664 CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
1665 CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
1666 CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
1667 Ret = 1;
1668 if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
1669 CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
1670 CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
1671 CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
1672 Ret = 0;
1673
1674 assert(Ret != 4 && "Invalid CR bit register");
1675 return Ret;
1676}
1677
1680 const DebugLoc &DL, Register DestReg,
1681 Register SrcReg, bool KillSrc,
1682 bool RenamableDest, bool RenamableSrc) const {
1683 // We can end up with self copies and similar things as a result of VSX copy
1684 // legalization. Promote them here.
1686 if (PPC::F8RCRegClass.contains(DestReg) &&
1687 PPC::VSRCRegClass.contains(SrcReg)) {
1688 MCRegister SuperReg =
1689 TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
1690
1691 if (VSXSelfCopyCrash && SrcReg == SuperReg)
1692 llvm_unreachable("nop VSX copy");
1693
1694 DestReg = SuperReg;
1695 } else if (PPC::F8RCRegClass.contains(SrcReg) &&
1696 PPC::VSRCRegClass.contains(DestReg)) {
1697 MCRegister SuperReg =
1698 TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
1699
1700 if (VSXSelfCopyCrash && DestReg == SuperReg)
1701 llvm_unreachable("nop VSX copy");
1702
1703 SrcReg = SuperReg;
1704 }
1705
1706 // Different class register copy
1707 if (PPC::CRBITRCRegClass.contains(SrcReg) &&
1708 PPC::GPRCRegClass.contains(DestReg)) {
1709 MCRegister CRReg = getCRFromCRBit(SrcReg);
1710 BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
1711 getKillRegState(KillSrc);
1712 // Rotate the CR bit in the CR fields to be the least significant bit and
1713 // then mask with 0x1 (MB = ME = 31).
1714 BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
1715 .addReg(DestReg, RegState::Kill)
1716 .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
1717 .addImm(31)
1718 .addImm(31);
1719 return;
1720 } else if (PPC::CRRCRegClass.contains(SrcReg) &&
1721 (PPC::G8RCRegClass.contains(DestReg) ||
1722 PPC::GPRCRegClass.contains(DestReg))) {
1723 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1724 unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1725 unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1726 unsigned CRNum = TRI->getEncodingValue(SrcReg);
1727 BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
1728 getKillRegState(KillSrc);
1729 if (CRNum == 7)
1730 return;
1731 // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1732 BuildMI(MBB, I, DL, get(ShCode), DestReg)
1733 .addReg(DestReg, RegState::Kill)
1734 .addImm(CRNum * 4 + 4)
1735 .addImm(28)
1736 .addImm(31);
1737 return;
1738 } else if (PPC::G8RCRegClass.contains(SrcReg) &&
1739 PPC::VSFRCRegClass.contains(DestReg)) {
1740 assert(Subtarget.hasDirectMove() &&
1741 "Subtarget doesn't support directmove, don't know how to copy.");
1742 BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
1743 NumGPRtoVSRSpill++;
1744 getKillRegState(KillSrc);
1745 return;
1746 } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
1747 PPC::G8RCRegClass.contains(DestReg)) {
1748 assert(Subtarget.hasDirectMove() &&
1749 "Subtarget doesn't support directmove, don't know how to copy.");
1750 BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
1751 getKillRegState(KillSrc);
1752 return;
1753 } else if (PPC::SPERCRegClass.contains(SrcReg) &&
1754 PPC::GPRCRegClass.contains(DestReg)) {
1755 BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
1756 getKillRegState(KillSrc);
1757 return;
1758 } else if (PPC::GPRCRegClass.contains(SrcReg) &&
1759 PPC::SPERCRegClass.contains(DestReg)) {
1760 BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
1761 getKillRegState(KillSrc);
1762 return;
1763 } else if ((PPC::G8RCRegClass.contains(DestReg) ||
1764 PPC::GPRCRegClass.contains(DestReg)) &&
1765 SrcReg == PPC::CARRY) {
1766 bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
1767 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MFSPR8 : PPC::MFSPR), DestReg)
1768 .addImm(1)
1769 .addReg(PPC::CARRY, RegState::Implicit);
1770 return;
1771 } else if ((PPC::G8RCRegClass.contains(SrcReg) ||
1772 PPC::GPRCRegClass.contains(SrcReg)) &&
1773 DestReg == PPC::CARRY) {
1774 bool Is64Bit = PPC::G8RCRegClass.contains(SrcReg);
1775 BuildMI(MBB, I, DL, get(Is64Bit ? PPC::MTSPR8 : PPC::MTSPR))
1776 .addImm(1)
1777 .addReg(SrcReg)
1778 .addReg(PPC::CARRY, RegState::ImplicitDefine);
1779 return;
1780 }
1781
1782 unsigned Opc;
1783 if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
1784 Opc = PPC::OR;
1785 else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
1786 Opc = PPC::OR8;
1787 else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
1788 Opc = PPC::FMR;
1789 else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
1790 Opc = PPC::MCRF;
1791 else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
1792 Opc = PPC::VOR;
1793 else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
1794 // There are two different ways this can be done:
1795 // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1796 // issue in VSU pipeline 0.
1797 // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1798 // can go to either pipeline.
1799 // We'll always use xxlor here, because in practically all cases where
1800 // copies are generated, they are close enough to some use that the
1801 // lower-latency form is preferable.
1802 Opc = PPC::XXLOR;
1803 else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
1804 PPC::VSSRCRegClass.contains(DestReg, SrcReg))
1805 Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1806 else if (Subtarget.pairedVectorMemops() &&
1807 PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
1808 if (SrcReg > PPC::VSRp15)
1809 SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
1810 else
1811 SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
1812 if (DestReg > PPC::VSRp15)
1813 DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
1814 else
1815 DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
1816 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
1817 addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1818 BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
1819 addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
1820 return;
1821 }
1822 else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
1823 Opc = PPC::CROR;
1824 else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
1825 Opc = PPC::EVOR;
1826 else if ((PPC::ACCRCRegClass.contains(DestReg) ||
1827 PPC::UACCRCRegClass.contains(DestReg)) &&
1828 (PPC::ACCRCRegClass.contains(SrcReg) ||
1829 PPC::UACCRCRegClass.contains(SrcReg))) {
1830 // If primed, de-prime the source register, copy the individual registers
1831 // and prime the destination if needed. The vector subregisters are
1832 // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
1833 // source is primed, we need to re-prime it after the copy as well.
1834 PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1835 bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
1836 bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
1837 MCRegister VSLSrcReg =
1838 PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1839 MCRegister VSLDestReg =
1840 PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
1841 if (SrcPrimed)
1842 BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
1843 for (unsigned Idx = 0; Idx < 4; Idx++)
1844 BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
1845 .addReg(VSLSrcReg + Idx)
1846 .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
1847 if (DestPrimed)
1848 BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
1849 if (SrcPrimed && !KillSrc)
1850 BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
1851 return;
1852 } else if (PPC::G8pRCRegClass.contains(DestReg) &&
1853 PPC::G8pRCRegClass.contains(SrcReg)) {
1854 // TODO: Handle G8RC to G8pRC (and vice versa) copy.
1855 unsigned DestRegIdx = DestReg - PPC::G8p0;
1856 MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
1857 MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
1858 unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1859 MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
1860 MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
1861 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
1862 .addReg(SrcRegSub0)
1863 .addReg(SrcRegSub0, getKillRegState(KillSrc));
1864 BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
1865 .addReg(SrcRegSub1)
1866 .addReg(SrcRegSub1, getKillRegState(KillSrc));
1867 return;
1868 } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
1869 PPC::WACC_HIRCRegClass.contains(DestReg)) &&
1870 (PPC::WACCRCRegClass.contains(SrcReg) ||
1871 PPC::WACC_HIRCRegClass.contains(SrcReg))) {
1872
1873 Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
1874 : PPC::DMXXEXTFDMR512_HI;
1875
1876 RegScavenger RS;
1877 RS.enterBasicBlockEnd(MBB);
1878 RS.backward(std::next(I));
1879
1880 Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1881 /* RestoreAfter */ false, 0,
1882 /* AllowSpill */ false);
1883
1884 RS.setRegUsed(TmpReg1);
1885 Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
1886 /* RestoreAfter */ false, 0,
1887 /* AllowSpill */ false);
1888
1889 BuildMI(MBB, I, DL, get(Opc))
1890 .addReg(TmpReg1, RegState::Define)
1891 .addReg(TmpReg2, RegState::Define)
1892 .addReg(SrcReg, getKillRegState(KillSrc));
1893
1894 Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
1895 : PPC::DMXXINSTDMR512_HI;
1896
1897 BuildMI(MBB, I, DL, get(Opc), DestReg)
1898 .addReg(TmpReg1, RegState::Kill)
1899 .addReg(TmpReg2, RegState::Kill);
1900
1901 return;
1902 } else if (PPC::DMRRCRegClass.contains(DestReg) &&
1903 PPC::DMRRCRegClass.contains(SrcReg)) {
1904
1905 BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
1906 .addReg(SrcReg, getKillRegState(KillSrc));
1907
1908 return;
1909
1910 } else
1911 llvm_unreachable("Impossible reg-to-reg copy");
1912
1913 const MCInstrDesc &MCID = get(Opc);
1914 if (MCID.getNumOperands() == 3)
1915 BuildMI(MBB, I, DL, MCID, DestReg)
1916 .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
1917 else
1918 BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
1919}
1920
1921unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
1922 int OpcodeIndex = 0;
1923
1924 if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
1925 PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1927 } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
1928 PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1930 } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1932 } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1934 } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1936 } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1938 } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1940 } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1942 } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1944 } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1946 } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1948 } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1950 } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1951 assert(Subtarget.pairedVectorMemops() &&
1952 "Register unexpected when paired memops are disabled.");
1954 } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1955 assert(Subtarget.pairedVectorMemops() &&
1956 "Register unexpected when paired memops are disabled.");
1958 } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1959 assert(Subtarget.pairedVectorMemops() &&
1960 "Register unexpected when paired memops are disabled.");
1962 } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1963 assert(Subtarget.pairedVectorMemops() &&
1964 "Register unexpected when paired memops are disabled.");
1966 } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1968 } else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
1969 llvm_unreachable("TODO: Implement spill DMRROW regclass!");
1970 } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
1971 llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
1972 } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
1974 } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
1976 } else {
1977 llvm_unreachable("Unknown regclass!");
1978 }
1979 return OpcodeIndex;
1980}
1981
1982unsigned
1984 ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1985 return OpcodesForSpill[getSpillIndex(RC)];
1986}
1987
1988unsigned
1990 ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1991 return OpcodesForSpill[getSpillIndex(RC)];
1992}
1993
1994void PPCInstrInfo::StoreRegToStackSlot(
1995 MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1996 const TargetRegisterClass *RC,
1997 SmallVectorImpl<MachineInstr *> &NewMIs) const {
1998 unsigned Opcode = getStoreOpcodeForSpill(RC);
1999 DebugLoc DL;
2000
2001 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2002 FuncInfo->setHasSpills();
2003
2005 BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
2006 FrameIdx));
2007
2008 if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
2009 PPC::CRBITRCRegClass.hasSubClassEq(RC))
2010 FuncInfo->setSpillsCR();
2011
2012 if (isXFormMemOp(Opcode))
2013 FuncInfo->setHasNonRISpills();
2014}
2015
2018 bool isKill, int FrameIdx, const TargetRegisterClass *RC) const {
2019 MachineFunction &MF = *MBB.getParent();
2021
2022 StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
2023
2024 for (MachineInstr *NewMI : NewMIs)
2025 MBB.insert(MI, NewMI);
2026
2027 const MachineFrameInfo &MFI = MF.getFrameInfo();
2031 MFI.getObjectAlign(FrameIdx));
2032 NewMIs.back()->addMemOperand(MF, MMO);
2033}
2034
2037 bool isKill, int FrameIdx, const TargetRegisterClass *RC, Register VReg,
2038 MachineInstr::MIFlag Flags) const {
2039 // We need to avoid a situation in which the value from a VRRC register is
2040 // spilled using an Altivec instruction and reloaded into a VSRC register
2041 // using a VSX instruction. The issue with this is that the VSX
2042 // load/store instructions swap the doublewords in the vector and the Altivec
2043 // ones don't. The register classes on the spill/reload may be different if
2044 // the register is defined using an Altivec instruction and is then used by a
2045 // VSX instruction.
2046 RC = updatedRC(RC);
2047 storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC);
2048}
2049
2050void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
2051 unsigned DestReg, int FrameIdx,
2052 const TargetRegisterClass *RC,
2054 const {
2055 unsigned Opcode = getLoadOpcodeForSpill(RC);
2056 NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
2057 FrameIdx));
2058}
2059
2062 int FrameIdx, const TargetRegisterClass *RC) const {
2063 MachineFunction &MF = *MBB.getParent();
2065 DebugLoc DL;
2066 if (MI != MBB.end()) DL = MI->getDebugLoc();
2067
2068 LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2069
2070 for (MachineInstr *NewMI : NewMIs)
2071 MBB.insert(MI, NewMI);
2072
2073 const MachineFrameInfo &MFI = MF.getFrameInfo();
2077 MFI.getObjectAlign(FrameIdx));
2078 NewMIs.back()->addMemOperand(MF, MMO);
2079}
2080
2083 Register DestReg, int FrameIdx,
2084 const TargetRegisterClass *RC,
2085 Register VReg, unsigned SubReg,
2086 MachineInstr::MIFlag Flags) const {
2087 // We need to avoid a situation in which the value from a VRRC register is
2088 // spilled using an Altivec instruction and reloaded into a VSRC register
2089 // using a VSX instruction. The issue with this is that the VSX
2090 // load/store instructions swap the doublewords in the vector and the Altivec
2091 // ones don't. The register classes on the spill/reload may be different if
2092 // the register is defined using an Altivec instruction and is then used by a
2093 // VSX instruction.
2094 RC = updatedRC(RC);
2095
2096 loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC);
2097}
2098
2101 assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
2102 if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
2103 Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
2104 else
2105 // Leave the CR# the same, but invert the condition.
2107 return false;
2108}
2109
2110// For some instructions, it is legal to fold ZERO into the RA register field.
2111// This function performs that fold by replacing the operand with PPC::ZERO,
2112// it does not consider whether the load immediate zero is no longer in use.
2114 Register Reg) const {
2115 // A zero immediate should always be loaded with a single li.
2116 unsigned DefOpc = DefMI.getOpcode();
2117 if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2118 return false;
2119 if (!DefMI.getOperand(1).isImm())
2120 return false;
2121 if (DefMI.getOperand(1).getImm() != 0)
2122 return false;
2123
2124 // Note that we cannot here invert the arguments of an isel in order to fold
2125 // a ZERO into what is presented as the second argument. All we have here
2126 // is the condition bit, and that might come from a CR-logical bit operation.
2127
2128 const MCInstrDesc &UseMCID = UseMI.getDesc();
2129
2130 // Only fold into real machine instructions.
2131 if (UseMCID.isPseudo())
2132 return false;
2133
2134 // We need to find which of the User's operands is to be folded, that will be
2135 // the operand that matches the given register ID.
2136 unsigned UseIdx;
2137 for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2138 if (UseMI.getOperand(UseIdx).isReg() &&
2139 UseMI.getOperand(UseIdx).getReg() == Reg)
2140 break;
2141
2142 assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2143 assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2144
2145 // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2146 // register (which might also be specified as a pointer class kind).
2147
2148 const MCOperandInfo &UseInfo = UseMCID.operands()[UseIdx];
2149 int16_t RegClass = getOpRegClassID(UseInfo);
2150 if (UseInfo.RegClass != PPC::GPRC_NOR0RegClassID &&
2151 UseInfo.RegClass != PPC::G8RC_NOX0RegClassID)
2152 return false;
2153
2154 // Make sure this is not tied to an output register (or otherwise
2155 // constrained). This is true for ST?UX registers, for example, which
2156 // are tied to their output registers.
2157 if (UseInfo.Constraints != 0)
2158 return false;
2159
2160 MCRegister ZeroReg =
2161 RegClass == PPC::G8RC_NOX0RegClassID ? PPC::ZERO8 : PPC::ZERO;
2162
2163 LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2164 LLVM_DEBUG(UseMI.dump());
2165 UseMI.getOperand(UseIdx).setReg(ZeroReg);
2166 LLVM_DEBUG(dbgs() << "Into: ");
2167 LLVM_DEBUG(UseMI.dump());
2168 return true;
2169}
2170
2171// Folds zero into instructions which have a load immediate zero as an operand
2172// but also recognize zero as immediate zero. If the definition of the load
2173// has no more users it is deleted.
2175 Register Reg, MachineRegisterInfo *MRI) const {
2176 bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2177 if (MRI->use_nodbg_empty(Reg))
2178 DefMI.eraseFromParent();
2179 return Changed;
2180}
2181
2183 for (MachineInstr &MI : MBB)
2184 if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
2185 MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))
2186 return true;
2187 return false;
2188}
2189
2190// We should make sure that, if we're going to predicate both sides of a
2191// condition (a diamond), that both sides don't define the counter register. We
2192// can predicate counter-decrement-based branches, but while that predicates
2193// the branching, it does not predicate the counter decrement. If we tried to
2194// merge the triangle into one predicated block, we'd decrement the counter
2195// twice.
2197 unsigned NumT, unsigned ExtraT,
2198 MachineBasicBlock &FMBB,
2199 unsigned NumF, unsigned ExtraF,
2200 BranchProbability Probability) const {
2201 return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
2202}
2203
2204
2206 // The predicated branches are identified by their type, not really by the
2207 // explicit presence of a predicate. Furthermore, some of them can be
2208 // predicated more than once. Because if conversion won't try to predicate
2209 // any instruction which already claims to be predicated (by returning true
2210 // here), always return false. In doing so, we let isPredicable() be the
2211 // final word on whether not the instruction can be (further) predicated.
2212
2213 return false;
2214}
2215
2217 const MachineBasicBlock *MBB,
2218 const MachineFunction &MF) const {
2219 switch (MI.getOpcode()) {
2220 default:
2221 break;
2222 // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2223 // across them, since some FP operations may change content of FPSCR.
2224 // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2225 case PPC::MFFS:
2226 case PPC::MTFSF:
2227 case PPC::FENCE:
2228 return true;
2229 }
2231}
2232
2234 ArrayRef<MachineOperand> Pred) const {
2235 unsigned OpC = MI.getOpcode();
2236 if (OpC == PPC::BLR || OpC == PPC::BLR8) {
2237 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2238 bool isPPC64 = Subtarget.isPPC64();
2239 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2240 : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2241 // Need add Def and Use for CTR implicit operand.
2242 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2243 .addReg(Pred[1].getReg(), RegState::Implicit)
2245 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2246 MI.setDesc(get(PPC::BCLR));
2247 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2248 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2249 MI.setDesc(get(PPC::BCLRn));
2250 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2251 } else {
2252 MI.setDesc(get(PPC::BCCLR));
2253 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2254 .addImm(Pred[0].getImm())
2255 .add(Pred[1]);
2256 }
2257
2258 return true;
2259 } else if (OpC == PPC::B) {
2260 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
2261 bool isPPC64 = Subtarget.isPPC64();
2262 MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2263 : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2264 // Need add Def and Use for CTR implicit operand.
2265 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2266 .addReg(Pred[1].getReg(), RegState::Implicit)
2268 } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2269 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2270 MI.removeOperand(0);
2271
2272 MI.setDesc(get(PPC::BC));
2273 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2274 .add(Pred[1])
2275 .addMBB(MBB);
2276 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2277 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2278 MI.removeOperand(0);
2279
2280 MI.setDesc(get(PPC::BCn));
2281 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2282 .add(Pred[1])
2283 .addMBB(MBB);
2284 } else {
2285 MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
2286 MI.removeOperand(0);
2287
2288 MI.setDesc(get(PPC::BCC));
2289 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2290 .addImm(Pred[0].getImm())
2291 .add(Pred[1])
2292 .addMBB(MBB);
2293 }
2294
2295 return true;
2296 } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
2297 OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
2298 OpC == PPC::BCTRL8_RM) {
2299 if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
2300 llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2301
2302 bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
2303 OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
2304 bool isPPC64 = Subtarget.isPPC64();
2305
2306 if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
2307 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2308 : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2309 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2310 } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
2311 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2312 : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2313 MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
2314 } else {
2315 MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2316 : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2317 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2318 .addImm(Pred[0].getImm())
2319 .add(Pred[1]);
2320 }
2321
2322 // Need add Def and Use for LR implicit operand.
2323 if (setLR)
2324 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2325 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
2326 .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
2327 if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
2328 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
2330
2331 return true;
2332 }
2333
2334 return false;
2335}
2336
2338 ArrayRef<MachineOperand> Pred2) const {
2339 assert(Pred1.size() == 2 && "Invalid PPC first predicate");
2340 assert(Pred2.size() == 2 && "Invalid PPC second predicate");
2341
2342 if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
2343 return false;
2344 if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
2345 return false;
2346
2347 // P1 can only subsume P2 if they test the same condition register.
2348 if (Pred1[1].getReg() != Pred2[1].getReg())
2349 return false;
2350
2351 PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
2352 PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
2353
2354 if (P1 == P2)
2355 return true;
2356
2357 // Does P1 subsume P2, e.g. GE subsumes GT.
2358 if (P1 == PPC::PRED_LE &&
2359 (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
2360 return true;
2361 if (P1 == PPC::PRED_GE &&
2362 (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
2363 return true;
2364
2365 return false;
2366}
2367
2369 std::vector<MachineOperand> &Pred,
2370 bool SkipDead) const {
2371 // Note: At the present time, the contents of Pred from this function is
2372 // unused by IfConversion. This implementation follows ARM by pushing the
2373 // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2374 // predicate, instructions defining CTR or CTR8 are also included as
2375 // predicate-defining instructions.
2376
2377 const TargetRegisterClass *RCs[] =
2378 { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2379 &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2380
2381 bool Found = false;
2382 for (const MachineOperand &MO : MI.operands()) {
2383 for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
2384 const TargetRegisterClass *RC = RCs[c];
2385 if (MO.isReg()) {
2386 if (MO.isDef() && RC->contains(MO.getReg())) {
2387 Pred.push_back(MO);
2388 Found = true;
2389 }
2390 } else if (MO.isRegMask()) {
2391 for (MCPhysReg R : *RC)
2392 if (MO.clobbersPhysReg(R)) {
2393 Pred.push_back(MO);
2394 Found = true;
2395 }
2396 }
2397 }
2398 }
2399
2400 return Found;
2401}
2402
2404 Register &SrcReg2, int64_t &Mask,
2405 int64_t &Value) const {
2406 unsigned Opc = MI.getOpcode();
2407
2408 switch (Opc) {
2409 default: return false;
2410 case PPC::CMPWI:
2411 case PPC::CMPLWI:
2412 case PPC::CMPDI:
2413 case PPC::CMPLDI:
2414 SrcReg = MI.getOperand(1).getReg();
2415 SrcReg2 = 0;
2416 Value = MI.getOperand(2).getImm();
2417 Mask = 0xFFFF;
2418 return true;
2419 case PPC::CMPW:
2420 case PPC::CMPLW:
2421 case PPC::CMPD:
2422 case PPC::CMPLD:
2423 case PPC::FCMPUS:
2424 case PPC::FCMPUD:
2425 SrcReg = MI.getOperand(1).getReg();
2426 SrcReg2 = MI.getOperand(2).getReg();
2427 Value = 0;
2428 Mask = 0;
2429 return true;
2430 }
2431}
2432
2434 Register SrcReg2, int64_t Mask,
2435 int64_t Value,
2436 const MachineRegisterInfo *MRI) const {
2437 if (DisableCmpOpt)
2438 return false;
2439
2440 int OpC = CmpInstr.getOpcode();
2441 Register CRReg = CmpInstr.getOperand(0).getReg();
2442
2443 // FP record forms set CR1 based on the exception status bits, not a
2444 // comparison with zero.
2445 if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
2446 return false;
2447
2449 // The record forms set the condition register based on a signed comparison
2450 // with zero (so says the ISA manual). This is not as straightforward as it
2451 // seems, however, because this is always a 64-bit comparison on PPC64, even
2452 // for instructions that are 32-bit in nature (like slw for example).
2453 // So, on PPC32, for unsigned comparisons, we can use the record forms only
2454 // for equality checks (as those don't depend on the sign). On PPC64,
2455 // we are restricted to equality for unsigned 64-bit comparisons and for
2456 // signed 32-bit comparisons the applicability is more restricted.
2457 bool isPPC64 = Subtarget.isPPC64();
2458 bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
2459 bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
2460 bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
2461
2462 // Look through copies unless that gets us to a physical register.
2463 Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2464 if (ActualSrc.isVirtual())
2465 SrcReg = ActualSrc;
2466
2467 // Get the unique definition of SrcReg.
2468 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2469 if (!MI) return false;
2470
2471 bool equalityOnly = false;
2472 bool noSub = false;
2473 if (isPPC64) {
2474 if (is32BitSignedCompare) {
2475 // We can perform this optimization only if SrcReg is sign-extending.
2476 if (isSignExtended(SrcReg, MRI))
2477 noSub = true;
2478 else
2479 return false;
2480 } else if (is32BitUnsignedCompare) {
2481 // We can perform this optimization, equality only, if SrcReg is
2482 // zero-extending.
2483 if (isZeroExtended(SrcReg, MRI)) {
2484 noSub = true;
2485 equalityOnly = true;
2486 } else
2487 return false;
2488 } else
2489 equalityOnly = is64BitUnsignedCompare;
2490 } else
2491 equalityOnly = is32BitUnsignedCompare;
2492
2493 if (equalityOnly) {
2494 // We need to check the uses of the condition register in order to reject
2495 // non-equality comparisons.
2497 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2498 I != IE; ++I) {
2499 MachineInstr *UseMI = &*I;
2500 if (UseMI->getOpcode() == PPC::BCC) {
2501 PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
2502 unsigned PredCond = PPC::getPredicateCondition(Pred);
2503 // We ignore hint bits when checking for non-equality comparisons.
2504 if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2505 return false;
2506 } else if (UseMI->getOpcode() == PPC::ISEL ||
2507 UseMI->getOpcode() == PPC::ISEL8) {
2508 unsigned SubIdx = UseMI->getOperand(3).getSubReg();
2509 if (SubIdx != PPC::sub_eq)
2510 return false;
2511 } else
2512 return false;
2513 }
2514 }
2515
2516 MachineBasicBlock::iterator I = CmpInstr;
2517
2518 // Scan forward to find the first use of the compare.
2519 for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2520 ++I) {
2521 bool FoundUse = false;
2523 J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
2524 J != JE; ++J)
2525 if (&*J == &*I) {
2526 FoundUse = true;
2527 break;
2528 }
2529
2530 if (FoundUse)
2531 break;
2532 }
2533
2536
2537 // There are two possible candidates which can be changed to set CR[01].
2538 // One is MI, the other is a SUB instruction.
2539 // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2540 MachineInstr *Sub = nullptr;
2541 if (SrcReg2 != 0)
2542 // MI is not a candidate for CMPrr.
2543 MI = nullptr;
2544 // FIXME: Conservatively refuse to convert an instruction which isn't in the
2545 // same BB as the comparison. This is to allow the check below to avoid calls
2546 // (and other explicit clobbers); instead we should really check for these
2547 // more explicitly (in at least a few predecessors).
2548 else if (MI->getParent() != CmpInstr.getParent())
2549 return false;
2550 else if (Value != 0) {
2551 // The record-form instructions set CR bit based on signed comparison
2552 // against 0. We try to convert a compare against 1 or -1 into a compare
2553 // against 0 to exploit record-form instructions. For example, we change
2554 // the condition "greater than -1" into "greater than or equal to 0"
2555 // and "less than 1" into "less than or equal to 0".
2556
2557 // Since we optimize comparison based on a specific branch condition,
2558 // we don't optimize if condition code is used by more than once.
2559 if (equalityOnly || !MRI->hasOneUse(CRReg))
2560 return false;
2561
2562 MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
2563 if (UseMI->getOpcode() != PPC::BCC)
2564 return false;
2565
2566 PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
2567 unsigned PredCond = PPC::getPredicateCondition(Pred);
2568 unsigned PredHint = PPC::getPredicateHint(Pred);
2569 int16_t Immed = (int16_t)Value;
2570
2571 // When modifying the condition in the predicate, we propagate hint bits
2572 // from the original predicate to the new one.
2573 if (Immed == -1 && PredCond == PPC::PRED_GT)
2574 // We convert "greater than -1" into "greater than or equal to 0",
2575 // since we are assuming signed comparison by !equalityOnly
2576 Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
2577 else if (Immed == -1 && PredCond == PPC::PRED_LE)
2578 // We convert "less than or equal to -1" into "less than 0".
2579 Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
2580 else if (Immed == 1 && PredCond == PPC::PRED_LT)
2581 // We convert "less than 1" into "less than or equal to 0".
2582 Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
2583 else if (Immed == 1 && PredCond == PPC::PRED_GE)
2584 // We convert "greater than or equal to 1" into "greater than 0".
2585 Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
2586 else
2587 return false;
2588
2589 // Convert the comparison and its user to a compare against zero with the
2590 // appropriate predicate on the branch. Zero comparison might provide
2591 // optimization opportunities post-RA (see optimization in
2592 // PPCPreEmitPeephole.cpp).
2593 UseMI->getOperand(0).setImm(Pred);
2594 CmpInstr.getOperand(2).setImm(0);
2595 }
2596
2597 // Search for Sub.
2598 --I;
2599
2600 // Get ready to iterate backward from CmpInstr.
2601 MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2602
2603 for (; I != E && !noSub; --I) {
2604 const MachineInstr &Instr = *I;
2605 unsigned IOpC = Instr.getOpcode();
2606
2607 if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
2608 Instr.readsRegister(PPC::CR0, TRI)))
2609 // This instruction modifies or uses the record condition register after
2610 // the one we want to change. While we could do this transformation, it
2611 // would likely not be profitable. This transformation removes one
2612 // instruction, and so even forcing RA to generate one move probably
2613 // makes it unprofitable.
2614 return false;
2615
2616 // Check whether CmpInstr can be made redundant by the current instruction.
2617 if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
2618 OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
2619 (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
2620 ((Instr.getOperand(1).getReg() == SrcReg &&
2621 Instr.getOperand(2).getReg() == SrcReg2) ||
2622 (Instr.getOperand(1).getReg() == SrcReg2 &&
2623 Instr.getOperand(2).getReg() == SrcReg))) {
2624 Sub = &*I;
2625 break;
2626 }
2627
2628 if (I == B)
2629 // The 'and' is below the comparison instruction.
2630 return false;
2631 }
2632
2633 // Return false if no candidates exist.
2634 if (!MI && !Sub)
2635 return false;
2636
2637 // The single candidate is called MI.
2638 if (!MI) MI = Sub;
2639
2640 int NewOpC = -1;
2641 int MIOpC = MI->getOpcode();
2642 if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
2643 MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
2644 NewOpC = MIOpC;
2645 else {
2646 NewOpC = PPC::getRecordFormOpcode(MIOpC);
2647 if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
2648 NewOpC = MIOpC;
2649 }
2650
2651 // FIXME: On the non-embedded POWER architectures, only some of the record
2652 // forms are fast, and we should use only the fast ones.
2653
2654 // The defining instruction has a record form (or is already a record
2655 // form). It is possible, however, that we'll need to reverse the condition
2656 // code of the users.
2657 if (NewOpC == -1)
2658 return false;
2659
2660 // This transformation should not be performed if `nsw` is missing and is not
2661 // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2662 // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2663 // CRReg can reflect if compared values are equal, this optz is still valid.
2664 if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
2665 Sub && !Sub->getFlag(MachineInstr::NoSWrap))
2666 return false;
2667
2668 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2669 // needs to be updated to be based on SUB. Push the condition code
2670 // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2671 // condition code of these operands will be modified.
2672 // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2673 // comparison against 0, which may modify predicate.
2674 bool ShouldSwap = false;
2675 if (Sub && Value == 0) {
2676 ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2677 Sub->getOperand(2).getReg() == SrcReg;
2678
2679 // The operands to subf are the opposite of sub, so only in the fixed-point
2680 // case, invert the order.
2681 ShouldSwap = !ShouldSwap;
2682 }
2683
2684 if (ShouldSwap)
2686 I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
2687 I != IE; ++I) {
2688 MachineInstr *UseMI = &*I;
2689 if (UseMI->getOpcode() == PPC::BCC) {
2690 PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
2691 unsigned PredCond = PPC::getPredicateCondition(Pred);
2692 assert((!equalityOnly ||
2693 PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
2694 "Invalid predicate for equality-only optimization");
2695 (void)PredCond; // To suppress warning in release build.
2696 PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
2698 } else if (UseMI->getOpcode() == PPC::ISEL ||
2699 UseMI->getOpcode() == PPC::ISEL8) {
2700 unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
2701 assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
2702 "Invalid CR bit for equality-only optimization");
2703
2704 if (NewSubReg == PPC::sub_lt)
2705 NewSubReg = PPC::sub_gt;
2706 else if (NewSubReg == PPC::sub_gt)
2707 NewSubReg = PPC::sub_lt;
2708
2709 SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
2710 NewSubReg));
2711 } else // We need to abort on a user we don't understand.
2712 return false;
2713 }
2714 assert(!(Value != 0 && ShouldSwap) &&
2715 "Non-zero immediate support and ShouldSwap"
2716 "may conflict in updating predicate");
2717
2718 // Create a new virtual register to hold the value of the CR set by the
2719 // record-form instruction. If the instruction was not previously in
2720 // record form, then set the kill flag on the CR.
2721 CmpInstr.eraseFromParent();
2722
2724 BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
2725 get(TargetOpcode::COPY), CRReg)
2726 .addReg(PPC::CR0, getKillRegState(MIOpC != NewOpC));
2727
2728 // Even if CR0 register were dead before, it is alive now since the
2729 // instruction we just built uses it.
2730 MI->clearRegisterDeads(PPC::CR0);
2731
2732 if (MIOpC != NewOpC) {
2733 // We need to be careful here: we're replacing one instruction with
2734 // another, and we need to make sure that we get all of the right
2735 // implicit uses and defs. On the other hand, the caller may be holding
2736 // an iterator to this instruction, and so we can't delete it (this is
2737 // specifically the case if this is the instruction directly after the
2738 // compare).
2739
2740 // Rotates are expensive instructions. If we're emitting a record-form
2741 // rotate that can just be an andi/andis, we should just emit that.
2742 if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
2743 Register GPRRes = MI->getOperand(0).getReg();
2744 int64_t SH = MI->getOperand(2).getImm();
2745 int64_t MB = MI->getOperand(3).getImm();
2746 int64_t ME = MI->getOperand(4).getImm();
2747 // We can only do this if both the start and end of the mask are in the
2748 // same halfword.
2749 bool MBInLoHWord = MB >= 16;
2750 bool MEInLoHWord = ME >= 16;
2751 uint64_t Mask = ~0LLU;
2752
2753 if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
2754 Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
2755 // The mask value needs to shift right 16 if we're emitting andis.
2756 Mask >>= MBInLoHWord ? 0 : 16;
2757 NewOpC = MIOpC == PPC::RLWINM
2758 ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2759 : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2760 } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
2761 (ME - MB + 1 == SH) && (MB >= 16)) {
2762 // If we are rotating by the exact number of bits as are in the mask
2763 // and the mask is in the least significant bits of the register,
2764 // that's just an andis. (as long as the GPR result has no uses).
2765 Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
2766 Mask >>= 16;
2767 NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2768 }
2769 // If we've set the mask, we can transform.
2770 if (Mask != ~0LLU) {
2771 MI->removeOperand(4);
2772 MI->removeOperand(3);
2773 MI->getOperand(2).setImm(Mask);
2774 NumRcRotatesConvertedToRcAnd++;
2775 }
2776 } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
2777 int64_t MB = MI->getOperand(3).getImm();
2778 if (MB >= 48) {
2779 uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
2780 NewOpC = PPC::ANDI8_rec;
2781 MI->removeOperand(3);
2782 MI->getOperand(2).setImm(Mask);
2783 NumRcRotatesConvertedToRcAnd++;
2784 }
2785 }
2786
2787 const MCInstrDesc &NewDesc = get(NewOpC);
2788 MI->setDesc(NewDesc);
2789
2790 for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2791 if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {
2792 MI->addOperand(*MI->getParent()->getParent(),
2793 MachineOperand::CreateReg(ImpDef, true, true));
2794 }
2795 }
2796 for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2797 if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {
2798 MI->addOperand(*MI->getParent()->getParent(),
2799 MachineOperand::CreateReg(ImpUse, false, true));
2800 }
2801 }
2802 }
2803 assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2804 "Record-form instruction does not define cr0?");
2805
2806 // Modify the condition code of operands in OperandsToUpdate.
2807 // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2808 // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2809 for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
2810 PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
2811
2812 for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
2813 SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
2814
2815 return true;
2816}
2817
2819 MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
2820 if (MRI->isSSA())
2821 return false;
2822
2823 Register SrcReg, SrcReg2;
2824 int64_t CmpMask, CmpValue;
2825 if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2826 return false;
2827
2828 // Try to optimize the comparison against 0.
2829 if (CmpValue || !CmpMask || SrcReg2)
2830 return false;
2831
2832 // The record forms set the condition register based on a signed comparison
2833 // with zero (see comments in optimizeCompareInstr). Since we can't do the
2834 // equality checks in post-RA, we are more restricted on a unsigned
2835 // comparison.
2836 unsigned Opc = CmpMI.getOpcode();
2837 if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2838 return false;
2839
2840 // The record forms are always based on a 64-bit comparison on PPC64
2841 // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2842 // comparison. Since we can't do the equality checks in post-RA, we bail out
2843 // the case.
2844 if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2845 return false;
2846
2847 // CmpMI can't be deleted if it has implicit def.
2848 if (CmpMI.hasImplicitDef())
2849 return false;
2850
2851 bool SrcRegHasOtherUse = false;
2852 MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2853 if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))
2854 return false;
2855
2856 MachineOperand RegMO = CmpMI.getOperand(0);
2857 Register CRReg = RegMO.getReg();
2858 if (CRReg != PPC::CR0)
2859 return false;
2860
2861 // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2862 bool SeenUseOfCRReg = false;
2863 bool IsCRRegKilled = false;
2864 if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2865 SeenUseOfCRReg) ||
2866 SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)
2867 return false;
2868
2869 int SrcMIOpc = SrcMI->getOpcode();
2870 int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2871 if (NewOpC == -1)
2872 return false;
2873
2874 LLVM_DEBUG(dbgs() << "Replace Instr: ");
2875 LLVM_DEBUG(SrcMI->dump());
2876
2877 const MCInstrDesc &NewDesc = get(NewOpC);
2878 SrcMI->setDesc(NewDesc);
2879 MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2881 SrcMI->clearRegisterDeads(CRReg);
2882
2883 assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&
2884 "Record-form instruction does not define cr0?");
2885
2886 LLVM_DEBUG(dbgs() << "with: ");
2887 LLVM_DEBUG(SrcMI->dump());
2888 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2889 LLVM_DEBUG(CmpMI.dump());
2890 return true;
2891}
2892
2895 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2896 const TargetRegisterInfo *TRI) const {
2897 const MachineOperand *BaseOp;
2898 OffsetIsScalable = false;
2899 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2900 return false;
2901 BaseOps.push_back(BaseOp);
2902 return true;
2903}
2904
2905static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2906 const TargetRegisterInfo *TRI) {
2907 // If this is a volatile load/store, don't mess with it.
2908 if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
2909 return false;
2910
2911 if (LdSt.getOperand(2).isFI())
2912 return true;
2913
2914 assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
2915 // Can't cluster if the instruction modifies the base register
2916 // or it is update form. e.g. ld r2,3(r2)
2917 if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
2918 return false;
2919
2920 return true;
2921}
2922
2923// Only cluster instruction pair that have the same opcode, and they are
2924// clusterable according to PowerPC specification.
2925static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2926 const PPCSubtarget &Subtarget) {
2927 switch (FirstOpc) {
2928 default:
2929 return false;
2930 case PPC::STD:
2931 case PPC::STFD:
2932 case PPC::STXSD:
2933 case PPC::DFSTOREf64:
2934 return FirstOpc == SecondOpc;
2935 // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2936 // 32bit and 64bit instruction selection. They are clusterable pair though
2937 // they are different opcode.
2938 case PPC::STW:
2939 case PPC::STW8:
2940 return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
2941 }
2942}
2943
2945 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2946 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2947 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2948 unsigned NumBytes) const {
2949
2950 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2951 const MachineOperand &BaseOp1 = *BaseOps1.front();
2952 const MachineOperand &BaseOp2 = *BaseOps2.front();
2953 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2954 "Only base registers and frame indices are supported.");
2955
2956 // ClusterSize means the number of memory operations that will have been
2957 // clustered if this hook returns true.
2958 // Don't cluster memory op if there are already two ops clustered at least.
2959 if (ClusterSize > 2)
2960 return false;
2961
2962 // Cluster the load/store only when they have the same base
2963 // register or FI.
2964 if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
2965 (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
2966 (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2967 return false;
2968
2969 // Check if the load/store are clusterable according to the PowerPC
2970 // specification.
2971 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2972 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2973 unsigned FirstOpc = FirstLdSt.getOpcode();
2974 unsigned SecondOpc = SecondLdSt.getOpcode();
2976 // Cluster the load/store only when they have the same opcode, and they are
2977 // clusterable opcode according to PowerPC specification.
2978 if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2979 return false;
2980
2981 // Can't cluster load/store that have ordered or volatile memory reference.
2982 if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
2983 !isLdStSafeToCluster(SecondLdSt, TRI))
2984 return false;
2985
2986 int64_t Offset1 = 0, Offset2 = 0;
2988 Width2 = LocationSize::precise(0);
2989 const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
2990 if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
2991 !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
2992 Width1 != Width2)
2993 return false;
2994
2995 assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2996 "getMemOperandWithOffsetWidth return incorrect base op");
2997 // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2998 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2999 return Offset1 + (int64_t)Width1.getValue() == Offset2;
3000}
3001
3002/// GetInstSize - Return the number of bytes of code the specified
3003/// instruction may be. This returns the maximum number of bytes.
3004///
3006 unsigned Opcode = MI.getOpcode();
3007
3008 switch (Opcode) {
3009 case PPC::INLINEASM:
3010 case PPC::INLINEASM_BR: {
3011 const MachineFunction *MF = MI.getParent()->getParent();
3012 const char *AsmStr = MI.getOperand(0).getSymbolName();
3013 return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
3014 }
3015 case TargetOpcode::STACKMAP: {
3016 StackMapOpers Opers(&MI);
3017 return Opers.getNumPatchBytes();
3018 }
3019 case TargetOpcode::PATCHPOINT: {
3020 PatchPointOpers Opers(&MI);
3021 return Opers.getNumPatchBytes();
3022 }
3023 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
3024 const MachineFunction *MF = MI.getParent()->getParent();
3025 const Function &F = MF->getFunction();
3026 unsigned Num = 0;
3027 (void)F.getFnAttribute("patchable-function-entry")
3028 .getValueAsString()
3029 .getAsInteger(10, Num);
3030 if (Num || MF->getTarget().getTargetTriple().isOSAIX() ||
3032 return Num * 4;
3033 // Size of xray sled.
3034 return 7 * 4;
3035 }
3036 case TargetOpcode::PATCHABLE_RET: {
3037 // Size of xray sled.
3038 unsigned RetOpcode = MI.getOperand(0).getImm();
3039 bool IsConditional = RetOpcode == PPC::BCCLR;
3040 return (8 + IsConditional) * 4;
3041 }
3042 case TargetOpcode::BUNDLE:
3043 return getInstBundleSize(MI);
3044 default:
3045 return get(Opcode).getSize();
3046 }
3047}
3048
3051 // FIXME: The size of STACKMAP is currently over-estimated.
3052 return MI.getOpcode() == TargetOpcode::STACKMAP
3053 ? InstSizeVerifyMode::AllowOverEstimate
3054 : InstSizeVerifyMode::ExactSize;
3055}
3056
3057std::pair<unsigned, unsigned>
3059 // PPC always uses a direct mask.
3060 return std::make_pair(TF, 0u);
3061}
3062
3065 using namespace PPCII;
3066 static const std::pair<unsigned, const char *> TargetFlags[] = {
3067 {MO_PLT, "ppc-plt"},
3068 {MO_PIC_FLAG, "ppc-pic"},
3069 {MO_PCREL_FLAG, "ppc-pcrel"},
3070 {MO_GOT_FLAG, "ppc-got"},
3071 {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
3072 {MO_TLSGD_FLAG, "ppc-tlsgd"},
3073 {MO_TPREL_FLAG, "ppc-tprel"},
3074 {MO_TLSLDM_FLAG, "ppc-tlsldm"},
3075 {MO_TLSLD_FLAG, "ppc-tlsld"},
3076 {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
3077 {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
3078 {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
3079 {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
3080 {MO_LO, "ppc-lo"},
3081 {MO_HA, "ppc-ha"},
3082 {MO_TPREL_LO, "ppc-tprel-lo"},
3083 {MO_TPREL_HA, "ppc-tprel-ha"},
3084 {MO_DTPREL_LO, "ppc-dtprel-lo"},
3085 {MO_TLSLD_LO, "ppc-tlsld-lo"},
3086 {MO_TOC_LO, "ppc-toc-lo"},
3087 {MO_TLS, "ppc-tls"},
3088 {MO_PIC_HA_FLAG, "ppc-ha-pic"},
3089 {MO_PIC_LO_FLAG, "ppc-lo-pic"},
3090 {MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3091 {MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3092 {MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3093 };
3094 return ArrayRef(TargetFlags);
3095}
3096
3097// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3098// The VSX versions have the advantage of a full 64-register target whereas
3099// the FP ones have the advantage of lower latency and higher throughput. So
3100// what we are after is using the faster instructions in low register pressure
3101// situations and using the larger register file in high register pressure
3102// situations.
3104 unsigned UpperOpcode, LowerOpcode;
3105 switch (MI.getOpcode()) {
3106 case PPC::DFLOADf32:
3107 UpperOpcode = PPC::LXSSP;
3108 LowerOpcode = PPC::LFS;
3109 break;
3110 case PPC::DFLOADf64:
3111 UpperOpcode = PPC::LXSD;
3112 LowerOpcode = PPC::LFD;
3113 break;
3114 case PPC::DFSTOREf32:
3115 UpperOpcode = PPC::STXSSP;
3116 LowerOpcode = PPC::STFS;
3117 break;
3118 case PPC::DFSTOREf64:
3119 UpperOpcode = PPC::STXSD;
3120 LowerOpcode = PPC::STFD;
3121 break;
3122 case PPC::XFLOADf32:
3123 UpperOpcode = PPC::LXSSPX;
3124 LowerOpcode = PPC::LFSX;
3125 break;
3126 case PPC::XFLOADf64:
3127 UpperOpcode = PPC::LXSDX;
3128 LowerOpcode = PPC::LFDX;
3129 break;
3130 case PPC::XFSTOREf32:
3131 UpperOpcode = PPC::STXSSPX;
3132 LowerOpcode = PPC::STFSX;
3133 break;
3134 case PPC::XFSTOREf64:
3135 UpperOpcode = PPC::STXSDX;
3136 LowerOpcode = PPC::STFDX;
3137 break;
3138 case PPC::LIWAX:
3139 UpperOpcode = PPC::LXSIWAX;
3140 LowerOpcode = PPC::LFIWAX;
3141 break;
3142 case PPC::LIWZX:
3143 UpperOpcode = PPC::LXSIWZX;
3144 LowerOpcode = PPC::LFIWZX;
3145 break;
3146 case PPC::STIWX:
3147 UpperOpcode = PPC::STXSIWX;
3148 LowerOpcode = PPC::STFIWX;
3149 break;
3150 default:
3151 llvm_unreachable("Unknown Operation!");
3152 }
3153
3154 Register TargetReg = MI.getOperand(0).getReg();
3155 unsigned Opcode;
3156 if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
3157 (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3158 Opcode = LowerOpcode;
3159 else
3160 Opcode = UpperOpcode;
3161 MI.setDesc(get(Opcode));
3162 return true;
3163}
3164
3165static bool isAnImmediateOperand(const MachineOperand &MO) {
3166 return MO.isCPI() || MO.isGlobal() || MO.isImm();
3167}
3168
3170 auto &MBB = *MI.getParent();
3171 auto DL = MI.getDebugLoc();
3172
3173 switch (MI.getOpcode()) {
3174 case PPC::BUILD_UACC: {
3175 MCRegister ACC = MI.getOperand(0).getReg();
3176 MCRegister UACC = MI.getOperand(1).getReg();
3177 if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3178 MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
3179 MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
3180 // FIXME: This can easily be improved to look up to the top of the MBB
3181 // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3182 // we can just re-target any such XXLOR's to DstVSR + offset.
3183 for (int VecNo = 0; VecNo < 4; VecNo++)
3184 BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
3185 .addReg(SrcVSR + VecNo)
3186 .addReg(SrcVSR + VecNo);
3187 }
3188 // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3189 // So after building the 4 copies, we can replace the BUILD_UACC instruction
3190 // with a NOP.
3191 [[fallthrough]];
3192 }
3193 case PPC::KILL_PAIR: {
3194 MI.setDesc(get(PPC::UNENCODED_NOP));
3195 MI.removeOperand(1);
3196 MI.removeOperand(0);
3197 return true;
3198 }
3199 case TargetOpcode::LOAD_STACK_GUARD: {
3200 auto M = MBB.getParent()->getFunction().getParent();
3201 assert(
3202 (Subtarget.isTargetLinux() || M->getStackProtectorGuard() == "tls") &&
3203 "Only Linux target or tls mode are expected to contain "
3204 "LOAD_STACK_GUARD");
3205 int64_t Offset;
3206 if (M->getStackProtectorGuard() == "tls")
3207 Offset = M->getStackProtectorGuardOffset();
3208 else
3209 Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
3210 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3211 MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3212 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3213 .addImm(Offset)
3214 .addReg(Reg);
3215 return true;
3216 }
3217 case PPC::PPCLdFixedAddr: {
3218 assert((Subtarget.getTargetTriple().isOSGlibc() ||
3219 Subtarget.getTargetTriple().isMusl()) &&
3220 "Only targets with Glibc expected to contain PPCLdFixedAddr");
3221 int64_t Offset = 0;
3222 const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3223 MI.setDesc(get(PPC::LWZ));
3224 uint64_t FAType = MI.getOperand(1).getImm();
3225#undef PPC_LNX_FEATURE
3226#undef PPC_CPU
3227#define PPC_LNX_DEFINE_OFFSETS
3228#include "llvm/TargetParser/PPCTargetParser.def"
3229 bool IsLE = Subtarget.isLittleEndian();
3230 bool Is64 = Subtarget.isPPC64();
3231 if (FAType == PPC_FAWORD_HWCAP) {
3232 if (IsLE)
3233 Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3234 else
3235 Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3236 } else if (FAType == PPC_FAWORD_HWCAP2) {
3237 if (IsLE)
3238 Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3239 else
3240 Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3241 } else if (FAType == PPC_FAWORD_CPUID) {
3242 if (IsLE)
3243 Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3244 else
3245 Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3246 }
3247 assert(Offset && "Do not know the offset for this fixed addr load");
3248 MI.removeOperand(1);
3249 Subtarget.getTargetMachine().setGlibcHWCAPAccess();
3250 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3251 .addImm(Offset)
3252 .addReg(Reg);
3253 return true;
3254#define PPC_TGT_PARSER_UNDEF_MACROS
3255#include "llvm/TargetParser/PPCTargetParser.def"
3256#undef PPC_TGT_PARSER_UNDEF_MACROS
3257 }
3258 case PPC::DFLOADf32:
3259 case PPC::DFLOADf64:
3260 case PPC::DFSTOREf32:
3261 case PPC::DFSTOREf64: {
3262 assert(Subtarget.hasP9Vector() &&
3263 "Invalid D-Form Pseudo-ops on Pre-P9 target.");
3264 assert(MI.getOperand(2).isReg() &&
3265 isAnImmediateOperand(MI.getOperand(1)) &&
3266 "D-form op must have register and immediate operands");
3267 return expandVSXMemPseudo(MI);
3268 }
3269 case PPC::XFLOADf32:
3270 case PPC::XFSTOREf32:
3271 case PPC::LIWAX:
3272 case PPC::LIWZX:
3273 case PPC::STIWX: {
3274 assert(Subtarget.hasP8Vector() &&
3275 "Invalid X-Form Pseudo-ops on Pre-P8 target.");
3276 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3277 "X-form op must have register and register operands");
3278 return expandVSXMemPseudo(MI);
3279 }
3280 case PPC::XFLOADf64:
3281 case PPC::XFSTOREf64: {
3282 assert(Subtarget.hasVSX() &&
3283 "Invalid X-Form Pseudo-ops on target that has no VSX.");
3284 assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
3285 "X-form op must have register and register operands");
3286 return expandVSXMemPseudo(MI);
3287 }
3288 case PPC::SPILLTOVSR_LD: {
3289 Register TargetReg = MI.getOperand(0).getReg();
3290 if (PPC::VSFRCRegClass.contains(TargetReg)) {
3291 MI.setDesc(get(PPC::DFLOADf64));
3292 return expandPostRAPseudo(MI);
3293 }
3294 else
3295 MI.setDesc(get(PPC::LD));
3296 return true;
3297 }
3298 case PPC::SPILLTOVSR_ST: {
3299 Register SrcReg = MI.getOperand(0).getReg();
3300 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3301 NumStoreSPILLVSRRCAsVec++;
3302 MI.setDesc(get(PPC::DFSTOREf64));
3303 return expandPostRAPseudo(MI);
3304 } else {
3305 NumStoreSPILLVSRRCAsGpr++;
3306 MI.setDesc(get(PPC::STD));
3307 }
3308 return true;
3309 }
3310 case PPC::SPILLTOVSR_LDX: {
3311 Register TargetReg = MI.getOperand(0).getReg();
3312 if (PPC::VSFRCRegClass.contains(TargetReg))
3313 MI.setDesc(get(PPC::LXSDX));
3314 else
3315 MI.setDesc(get(PPC::LDX));
3316 return true;
3317 }
3318 case PPC::SPILLTOVSR_STX: {
3319 Register SrcReg = MI.getOperand(0).getReg();
3320 if (PPC::VSFRCRegClass.contains(SrcReg)) {
3321 NumStoreSPILLVSRRCAsVec++;
3322 MI.setDesc(get(PPC::STXSDX));
3323 } else {
3324 NumStoreSPILLVSRRCAsGpr++;
3325 MI.setDesc(get(PPC::STDX));
3326 }
3327 return true;
3328 }
3329
3330 // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3331 case PPC::CFENCE:
3332 case PPC::CFENCE8: {
3333 auto Val = MI.getOperand(0).getReg();
3334 unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3335 BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
3336 BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
3338 .addReg(PPC::CR7)
3339 .addImm(1);
3340 MI.setDesc(get(PPC::ISYNC));
3341 MI.removeOperand(0);
3342 return true;
3343 }
3344 case PPC::LWAT_CSNE_PSEUDO:
3345 case PPC::LDAT_CSNE_PSEUDO:
3346 return expandAMOCSNEPseudo(MI);
3347 }
3348 return false;
3349}
3350
3351// Essentially a compile-time implementation of a compare->isel sequence.
3352// It takes two constants to compare, along with the true/false registers
3353// and the comparison type (as a subreg to a CR field) and returns one
3354// of the true/false registers, depending on the comparison results.
3355static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3356 unsigned TrueReg, unsigned FalseReg,
3357 unsigned CRSubReg) {
3358 // Signed comparisons. The immediates are assumed to be sign-extended.
3359 if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
3360 switch (CRSubReg) {
3361 default: llvm_unreachable("Unknown integer comparison type.");
3362 case PPC::sub_lt:
3363 return Imm1 < Imm2 ? TrueReg : FalseReg;
3364 case PPC::sub_gt:
3365 return Imm1 > Imm2 ? TrueReg : FalseReg;
3366 case PPC::sub_eq:
3367 return Imm1 == Imm2 ? TrueReg : FalseReg;
3368 }
3369 }
3370 // Unsigned comparisons.
3371 else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
3372 switch (CRSubReg) {
3373 default: llvm_unreachable("Unknown integer comparison type.");
3374 case PPC::sub_lt:
3375 return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3376 case PPC::sub_gt:
3377 return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3378 case PPC::sub_eq:
3379 return Imm1 == Imm2 ? TrueReg : FalseReg;
3380 }
3381 }
3382 return PPC::NoRegister;
3383}
3384
3386 unsigned OpNo,
3387 int64_t Imm) const {
3388 assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3389 // Replace the REG with the Immediate.
3390 Register InUseReg = MI.getOperand(OpNo).getReg();
3391 MI.getOperand(OpNo).ChangeToImmediate(Imm);
3392
3393 // We need to make sure that the MI didn't have any implicit use
3394 // of this REG any more. We don't call MI.implicit_operands().empty() to
3395 // return early, since MI's MCID might be changed in calling context, as a
3396 // result its number of explicit operands may be changed, thus the begin of
3397 // implicit operand is changed.
3399 int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);
3400 if (UseOpIdx >= 0) {
3401 MachineOperand &MO = MI.getOperand(UseOpIdx);
3402 if (MO.isImplicit())
3403 // The operands must always be in the following order:
3404 // - explicit reg defs,
3405 // - other explicit operands (reg uses, immediates, etc.),
3406 // - implicit reg defs
3407 // - implicit reg uses
3408 // Therefore, removing the implicit operand won't change the explicit
3409 // operands layout.
3410 MI.removeOperand(UseOpIdx);
3411 }
3412}
3413
3414// Replace an instruction with one that materializes a constant (and sets
3415// CR0 if the original instruction was a record-form instruction).
3417 const LoadImmediateInfo &LII) const {
3418 // Remove existing operands.
3419 int OperandToKeep = LII.SetCR ? 1 : 0;
3420 for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
3421 MI.removeOperand(i);
3422
3423 // Replace the instruction.
3424 if (LII.SetCR) {
3425 MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3426 // Set the immediate.
3427 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3428 .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
3429 return;
3430 }
3431 else
3432 MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
3433
3434 // Set the immediate.
3435 MachineInstrBuilder(*MI.getParent()->getParent(), MI)
3436 .addImm(LII.Imm);
3437}
3438
3440 bool &SeenIntermediateUse) const {
3441 assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3442 "Should be called after register allocation.");
3444 MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3445 It++;
3446 SeenIntermediateUse = false;
3447 for (; It != E; ++It) {
3448 if (It->modifiesRegister(Reg, TRI))
3449 return &*It;
3450 if (It->readsRegister(Reg, TRI))
3451 SeenIntermediateUse = true;
3452 }
3453 return nullptr;
3454}
3455
3458 const DebugLoc &DL, Register Reg,
3459 int64_t Imm) const {
3460 assert(!MBB.getParent()->getRegInfo().isSSA() &&
3461 "Register should be in non-SSA form after RA");
3462 bool isPPC64 = Subtarget.isPPC64();
3463 // FIXME: Materialization here is not optimal.
3464 // For some special bit patterns we can use less instructions.
3465 // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3466 if (isInt<16>(Imm)) {
3467 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
3468 } else if (isInt<32>(Imm)) {
3469 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
3470 .addImm(Imm >> 16);
3471 if (Imm & 0xFFFF)
3472 BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
3473 .addReg(Reg, RegState::Kill)
3474 .addImm(Imm & 0xFFFF);
3475 } else {
3476 assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3477 "only supported in PPC64");
3478 BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
3479 if ((Imm >> 32) & 0xFFFF)
3480 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3481 .addReg(Reg, RegState::Kill)
3482 .addImm((Imm >> 32) & 0xFFFF);
3483 BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
3484 .addReg(Reg, RegState::Kill)
3485 .addImm(32)
3486 .addImm(31);
3487 BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
3488 .addReg(Reg, RegState::Kill)
3489 .addImm((Imm >> 16) & 0xFFFF);
3490 if (Imm & 0xFFFF)
3491 BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
3492 .addReg(Reg, RegState::Kill)
3493 .addImm(Imm & 0xFFFF);
3494 }
3495}
3496
3497MachineInstr *PPCInstrInfo::getForwardingDefMI(
3499 unsigned &OpNoForForwarding,
3500 bool &SeenIntermediateUse) const {
3501 OpNoForForwarding = ~0U;
3502 MachineInstr *DefMI = nullptr;
3503 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3505 // If we're in SSA, get the defs through the MRI. Otherwise, only look
3506 // within the basic block to see if the register is defined using an
3507 // LI/LI8/ADDI/ADDI8.
3508 if (MRI->isSSA()) {
3509 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3510 if (!MI.getOperand(i).isReg())
3511 continue;
3512 Register Reg = MI.getOperand(i).getReg();
3513 if (!Reg.isVirtual())
3514 continue;
3515 Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
3516 if (TrueReg.isVirtual()) {
3517 MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
3518 if (DefMIForTrueReg->getOpcode() == PPC::LI ||
3519 DefMIForTrueReg->getOpcode() == PPC::LI8 ||
3520 DefMIForTrueReg->getOpcode() == PPC::ADDI ||
3521 DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3522 OpNoForForwarding = i;
3523 DefMI = DefMIForTrueReg;
3524 // The ADDI and LI operand maybe exist in one instruction at same
3525 // time. we prefer to fold LI operand as LI only has one Imm operand
3526 // and is more possible to be converted. So if current DefMI is
3527 // ADDI/ADDI8, we continue to find possible LI/LI8.
3528 if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
3529 break;
3530 }
3531 }
3532 }
3533 } else {
3534 // Looking back through the definition for each operand could be expensive,
3535 // so exit early if this isn't an instruction that either has an immediate
3536 // form or is already an immediate form that we can handle.
3537 ImmInstrInfo III;
3538 unsigned Opc = MI.getOpcode();
3539 bool ConvertibleImmForm =
3540 Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
3541 Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
3542 Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
3543 Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
3544 Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
3545 Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
3546 Opc == PPC::RLWINM8_rec;
3547 bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
3548 ? PPC::isVFRegister(MI.getOperand(0).getReg())
3549 : false;
3550 if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
3551 return nullptr;
3552
3553 // Don't convert or %X, %Y, %Y since that's just a register move.
3554 if ((Opc == PPC::OR || Opc == PPC::OR8) &&
3555 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
3556 return nullptr;
3557 for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
3558 MachineOperand &MO = MI.getOperand(i);
3559 SeenIntermediateUse = false;
3560 if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3561 Register Reg = MI.getOperand(i).getReg();
3562 // If we see another use of this reg between the def and the MI,
3563 // we want to flag it so the def isn't deleted.
3564 MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3565 if (DefMI) {
3566 // Is this register defined by some form of add-immediate (including
3567 // load-immediate) within this basic block?
3568 switch (DefMI->getOpcode()) {
3569 default:
3570 break;
3571 case PPC::LI:
3572 case PPC::LI8:
3573 case PPC::ADDItocL8:
3574 case PPC::ADDI:
3575 case PPC::ADDI8:
3576 OpNoForForwarding = i;
3577 return DefMI;
3578 }
3579 }
3580 }
3581 }
3582 }
3583 return OpNoForForwarding == ~0U ? nullptr : DefMI;
3584}
3585
3586unsigned PPCInstrInfo::getSpillTarget() const {
3587 // With P10, we may need to spill paired vector registers or accumulator
3588 // registers. MMA implies paired vectors, so we can just check that.
3589 bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
3590 // P11 uses the P10 target.
3591 return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
3592 2 : Subtarget.hasP9Vector() ?
3593 1 : 0;
3594}
3595
3596ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3597 return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3598}
3599
3600ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3601 return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3602}
3603
3604// This opt tries to convert the following imm form to an index form to save an
3605// add for stack variables.
3606// Return false if no such pattern found.
3607//
3608// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3609// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3610// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3611//
3612// can be converted to:
3613//
3614// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3615// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3616//
3617// In order to eliminate ADD instr, make sure that:
3618// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3619// new ADDI instr and ADDI can only take int16 Imm.
3620// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3621// between ADDI and ADD instr since its original def in ADDI will be changed
3622// in new ADDI instr. And also there should be no new def for it between
3623// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3624// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3625// between ADD and Imm instr since ADD instr will be eliminated.
3626// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3627// moved to Index instr.
3629 MachineFunction *MF = MI.getParent()->getParent();
3630 MachineRegisterInfo *MRI = &MF->getRegInfo();
3631 bool PostRA = !MRI->isSSA();
3632 // Do this opt after PEI which is after RA. The reason is stack slot expansion
3633 // in PEI may expose such opportunities since in PEI, stack slot offsets to
3634 // frame base(OffsetAddi) are determined.
3635 if (!PostRA)
3636 return false;
3637 unsigned ToBeDeletedReg = 0;
3638 int64_t OffsetImm = 0;
3639 unsigned XFormOpcode = 0;
3640 ImmInstrInfo III;
3641
3642 // Check if Imm instr meets requirement.
3643 if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
3644 III))
3645 return false;
3646
3647 bool OtherIntermediateUse = false;
3648 MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
3649
3650 // Exit if there is other use between ADD and Imm instr or no def found.
3651 if (OtherIntermediateUse || !ADDMI)
3652 return false;
3653
3654 // Check if ADD instr meets requirement.
3655 if (!isADDInstrEligibleForFolding(*ADDMI))
3656 return false;
3657
3658 unsigned ScaleRegIdx = 0;
3659 int64_t OffsetAddi = 0;
3660 MachineInstr *ADDIMI = nullptr;
3661
3662 // Check if there is a valid ToBeChangedReg in ADDMI.
3663 // 1: It must be killed.
3664 // 2: Its definition must be a valid ADDIMI.
3665 // 3: It must satify int16 offset requirement.
3666 if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
3667 ScaleRegIdx = 2;
3668 else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
3669 ScaleRegIdx = 1;
3670 else
3671 return false;
3672
3673 assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3674 Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
3675 Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
3676 auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3678 for (auto It = ++Start; It != End; It++)
3679 if (It->modifiesRegister(Reg, &getRegisterInfo()))
3680 return true;
3681 return false;
3682 };
3683
3684 // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3685 // treated as special zero when ScaleReg is R0/X0 register.
3686 if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3687 (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
3688 return false;
3689
3690 // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3691 // and Imm Instr.
3692 if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
3693 return false;
3694
3695 // Now start to do the transformation.
3696 LLVM_DEBUG(dbgs() << "Replace instruction: "
3697 << "\n");
3698 LLVM_DEBUG(ADDIMI->dump());
3699 LLVM_DEBUG(ADDMI->dump());
3700 LLVM_DEBUG(MI.dump());
3701 LLVM_DEBUG(dbgs() << "with: "
3702 << "\n");
3703
3704 // Update ADDI instr.
3705 ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
3706
3707 // Update Imm instr.
3708 MI.setDesc(get(XFormOpcode));
3709 MI.getOperand(III.ImmOpNo)
3710 .ChangeToRegister(ScaleReg, false, false,
3711 ADDMI->getOperand(ScaleRegIdx).isKill());
3712
3713 MI.getOperand(III.OpNoForForwarding)
3714 .ChangeToRegister(ToBeChangedReg, false, false, true);
3715
3716 // Eliminate ADD instr.
3717 ADDMI->eraseFromParent();
3718
3719 LLVM_DEBUG(ADDIMI->dump());
3720 LLVM_DEBUG(MI.dump());
3721
3722 return true;
3723}
3724
3726 int64_t &Imm) const {
3727 unsigned Opc = ADDIMI.getOpcode();
3728
3729 // Exit if the instruction is not ADDI.
3730 if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3731 return false;
3732
3733 // The operand may not necessarily be an immediate - it could be a relocation.
3734 if (!ADDIMI.getOperand(2).isImm())
3735 return false;
3736
3737 Imm = ADDIMI.getOperand(2).getImm();
3738
3739 return true;
3740}
3741
3743 unsigned Opc = ADDMI.getOpcode();
3744
3745 // Exit if the instruction is not ADD.
3746 return Opc == PPC::ADD4 || Opc == PPC::ADD8;
3747}
3748
3750 unsigned &ToBeDeletedReg,
3751 unsigned &XFormOpcode,
3752 int64_t &OffsetImm,
3753 ImmInstrInfo &III) const {
3754 // Only handle load/store.
3755 if (!MI.mayLoadOrStore())
3756 return false;
3757
3758 unsigned Opc = MI.getOpcode();
3759
3760 XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
3761
3762 // Exit if instruction has no index form.
3763 if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3764 return false;
3765
3766 // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3767 if (!instrHasImmForm(XFormOpcode,
3768 PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))
3769 return false;
3770
3771 if (!III.IsSummingOperands)
3772 return false;
3773
3774 MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
3775 MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
3776 // Only support imm operands, not relocation slots or others.
3777 if (!ImmOperand.isImm())
3778 return false;
3779
3780 assert(RegOperand.isReg() && "Instruction format is not right");
3781
3782 // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3783 if (!RegOperand.isKill())
3784 return false;
3785
3786 ToBeDeletedReg = RegOperand.getReg();
3787 OffsetImm = ImmOperand.getImm();
3788
3789 return true;
3790}
3791
3793 MachineInstr *&ADDIMI,
3794 int64_t &OffsetAddi,
3795 int64_t OffsetImm) const {
3796 assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
3797 MachineOperand &MO = ADDMI->getOperand(Index);
3798
3799 if (!MO.isKill())
3800 return false;
3801
3802 bool OtherIntermediateUse = false;
3803
3804 ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
3805 // Currently handle only one "add + Imminstr" pair case, exit if other
3806 // intermediate use for ToBeChangedReg found.
3807 // TODO: handle the cases where there are other "add + Imminstr" pairs
3808 // with same offset in Imminstr which is like:
3809 //
3810 // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3811 // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3812 // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3813 // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3814 // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3815 //
3816 // can be converted to:
3817 //
3818 // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3819 // (OffsetAddi + OffsetImm)
3820 // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3821 // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3822
3823 if (OtherIntermediateUse || !ADDIMI)
3824 return false;
3825 // Check if ADDI instr meets requirement.
3826 if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
3827 return false;
3828
3829 if (isInt<16>(OffsetAddi + OffsetImm))
3830 return true;
3831 return false;
3832}
3833
3834// If this instruction has an immediate form and one of its operands is a
3835// result of a load-immediate or an add-immediate, convert it to
3836// the immediate form if the constant is in range.
3838 SmallSet<Register, 4> &RegsToUpdate,
3839 MachineInstr **KilledDef) const {
3840 MachineFunction *MF = MI.getParent()->getParent();
3841 MachineRegisterInfo *MRI = &MF->getRegInfo();
3842 bool PostRA = !MRI->isSSA();
3843 bool SeenIntermediateUse = true;
3844 unsigned ForwardingOperand = ~0U;
3845 MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
3846 SeenIntermediateUse);
3847 if (!DefMI)
3848 return false;
3849 assert(ForwardingOperand < MI.getNumOperands() &&
3850 "The forwarding operand needs to be valid at this point");
3851 bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
3852 bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3853 if (KilledDef && KillFwdDefMI)
3854 *KilledDef = DefMI;
3855
3856 // Conservatively add defs from DefMI and defs/uses from MI to the set of
3857 // registers that need their kill flags updated.
3858 for (const MachineOperand &MO : DefMI->operands())
3859 if (MO.isReg() && MO.isDef())
3860 RegsToUpdate.insert(MO.getReg());
3861 for (const MachineOperand &MO : MI.operands())
3862 if (MO.isReg())
3863 RegsToUpdate.insert(MO.getReg());
3864
3865 // If this is a imm instruction and its register operands is produced by ADDI,
3866 // put the imm into imm inst directly.
3867 if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
3868 PPC::INSTRUCTION_LIST_END &&
3869 transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
3870 return true;
3871
3872 ImmInstrInfo III;
3873 bool IsVFReg = MI.getOperand(0).isReg() &&
3874 MI.getOperand(0).getReg().isPhysical() &&
3875 PPC::isVFRegister(MI.getOperand(0).getReg());
3876 bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
3877 // If this is a reg+reg instruction that has a reg+imm form,
3878 // and one of the operands is produced by an add-immediate,
3879 // try to convert it.
3880 if (HasImmForm &&
3881 transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
3882 KillFwdDefMI))
3883 return true;
3884
3885 // If this is a reg+reg instruction that has a reg+imm form,
3886 // and one of the operands is produced by LI, convert it now.
3887 if (HasImmForm &&
3888 transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
3889 return true;
3890
3891 // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3892 // can be simpified to LI.
3893 if (!HasImmForm &&
3894 simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef, &RegsToUpdate))
3895 return true;
3896
3897 return false;
3898}
3899
3901 MachineInstr **ToErase) const {
3902 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3903 Register FoldingReg = MI.getOperand(1).getReg();
3904 if (!FoldingReg.isVirtual())
3905 return false;
3906 MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
3907 if (SrcMI->getOpcode() != PPC::RLWINM &&
3908 SrcMI->getOpcode() != PPC::RLWINM_rec &&
3909 SrcMI->getOpcode() != PPC::RLWINM8 &&
3910 SrcMI->getOpcode() != PPC::RLWINM8_rec)
3911 return false;
3912 assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
3913 MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
3914 SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
3915 "Invalid PPC::RLWINM Instruction!");
3916 uint64_t SHSrc = SrcMI->getOperand(2).getImm();
3917 uint64_t SHMI = MI.getOperand(2).getImm();
3918 uint64_t MBSrc = SrcMI->getOperand(3).getImm();
3919 uint64_t MBMI = MI.getOperand(3).getImm();
3920 uint64_t MESrc = SrcMI->getOperand(4).getImm();
3921 uint64_t MEMI = MI.getOperand(4).getImm();
3922
3923 assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
3924 "Invalid PPC::RLWINM Instruction!");
3925 // If MBMI is bigger than MEMI, we always can not get run of ones.
3926 // RotatedSrcMask non-wrap:
3927 // 0........31|32........63
3928 // RotatedSrcMask: B---E B---E
3929 // MaskMI: -----------|--E B------
3930 // Result: ----- --- (Bad candidate)
3931 //
3932 // RotatedSrcMask wrap:
3933 // 0........31|32........63
3934 // RotatedSrcMask: --E B----|--E B----
3935 // MaskMI: -----------|--E B------
3936 // Result: --- -----|--- ----- (Bad candidate)
3937 //
3938 // One special case is RotatedSrcMask is a full set mask.
3939 // RotatedSrcMask full:
3940 // 0........31|32........63
3941 // RotatedSrcMask: ------EB---|-------EB---
3942 // MaskMI: -----------|--E B------
3943 // Result: -----------|--- ------- (Good candidate)
3944
3945 // Mark special case.
3946 bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
3947
3948 // For other MBMI > MEMI cases, just return.
3949 if ((MBMI > MEMI) && !SrcMaskFull)
3950 return false;
3951
3952 // Handle MBMI <= MEMI cases.
3953 APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
3954 // In MI, we only need low 32 bits of SrcMI, just consider about low 32
3955 // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3956 // while in PowerPC ISA, lowerest bit is at index 63.
3957 APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
3958
3959 APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
3960 APInt FinalMask = RotatedSrcMask & MaskMI;
3961 uint32_t NewMB, NewME;
3962 bool Simplified = false;
3963
3964 // If final mask is 0, MI result should be 0 too.
3965 if (FinalMask.isZero()) {
3966 bool Is64Bit =
3967 (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
3968 Simplified = true;
3969 LLVM_DEBUG(dbgs() << "Replace Instr: ");
3970 LLVM_DEBUG(MI.dump());
3971
3972 if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
3973 // Replace MI with "LI 0"
3974 MI.removeOperand(4);
3975 MI.removeOperand(3);
3976 MI.removeOperand(2);
3977 MI.getOperand(1).ChangeToImmediate(0);
3978 MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
3979 } else {
3980 // Replace MI with "ANDI_rec reg, 0"
3981 MI.removeOperand(4);
3982 MI.removeOperand(3);
3983 MI.getOperand(2).setImm(0);
3984 MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3985 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
3986 if (SrcMI->getOperand(1).isKill()) {
3987 MI.getOperand(1).setIsKill(true);
3988 SrcMI->getOperand(1).setIsKill(false);
3989 } else
3990 // About to replace MI.getOperand(1), clear its kill flag.
3991 MI.getOperand(1).setIsKill(false);
3992 }
3993
3994 LLVM_DEBUG(dbgs() << "With: ");
3995 LLVM_DEBUG(MI.dump());
3996
3997 } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
3998 NewMB <= NewME) ||
3999 SrcMaskFull) {
4000 // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
4001 // than NewME. Otherwise we get a 64 bit value after folding, but MI
4002 // return a 32 bit value.
4003 Simplified = true;
4004 LLVM_DEBUG(dbgs() << "Converting Instr: ");
4005 LLVM_DEBUG(MI.dump());
4006
4007 uint16_t NewSH = (SHSrc + SHMI) % 32;
4008 MI.getOperand(2).setImm(NewSH);
4009 // If SrcMI mask is full, no need to update MBMI and MEMI.
4010 if (!SrcMaskFull) {
4011 MI.getOperand(3).setImm(NewMB);
4012 MI.getOperand(4).setImm(NewME);
4013 }
4014 MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
4015 if (SrcMI->getOperand(1).isKill()) {
4016 MI.getOperand(1).setIsKill(true);
4017 SrcMI->getOperand(1).setIsKill(false);
4018 } else
4019 // About to replace MI.getOperand(1), clear its kill flag.
4020 MI.getOperand(1).setIsKill(false);
4021
4022 LLVM_DEBUG(dbgs() << "To: ");
4023 LLVM_DEBUG(MI.dump());
4024 }
4025 if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
4026 !SrcMI->hasImplicitDef()) {
4027 // If FoldingReg has no non-debug use and it has no implicit def (it
4028 // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
4029 // Otherwise keep it.
4030 *ToErase = SrcMI;
4031 LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
4032 LLVM_DEBUG(SrcMI->dump());
4033 }
4034 return Simplified;
4035}
4036
4037bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
4038 ImmInstrInfo &III, bool PostRA) const {
4039 // The vast majority of the instructions would need their operand 2 replaced
4040 // with an immediate when switching to the reg+imm form. A marked exception
4041 // are the update form loads/stores for which a constant operand 2 would need
4042 // to turn into a displacement and move operand 1 to the operand 2 position.
4043 III.ImmOpNo = 2;
4044 III.OpNoForForwarding = 2;
4045 III.ImmWidth = 16;
4046 III.ImmMustBeMultipleOf = 1;
4047 III.TruncateImmTo = 0;
4048 III.IsSummingOperands = false;
4049 switch (Opc) {
4050 default: return false;
4051 case PPC::ADD4:
4052 case PPC::ADD8:
4053 III.SignedImm = true;
4054 III.ZeroIsSpecialOrig = 0;
4055 III.ZeroIsSpecialNew = 1;
4056 III.IsCommutative = true;
4057 III.IsSummingOperands = true;
4058 III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
4059 break;
4060 case PPC::ADDC:
4061 case PPC::ADDC8:
4062 III.SignedImm = true;
4063 III.ZeroIsSpecialOrig = 0;
4064 III.ZeroIsSpecialNew = 0;
4065 III.IsCommutative = true;
4066 III.IsSummingOperands = true;
4067 III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
4068 break;
4069 case PPC::ADDC_rec:
4070 III.SignedImm = true;
4071 III.ZeroIsSpecialOrig = 0;
4072 III.ZeroIsSpecialNew = 0;
4073 III.IsCommutative = true;
4074 III.IsSummingOperands = true;
4075 III.ImmOpcode = PPC::ADDIC_rec;
4076 break;
4077 case PPC::SUBFC:
4078 case PPC::SUBFC8:
4079 III.SignedImm = true;
4080 III.ZeroIsSpecialOrig = 0;
4081 III.ZeroIsSpecialNew = 0;
4082 III.IsCommutative = false;
4083 III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
4084 break;
4085 case PPC::CMPW:
4086 case PPC::CMPD:
4087 III.SignedImm = true;
4088 III.ZeroIsSpecialOrig = 0;
4089 III.ZeroIsSpecialNew = 0;
4090 III.IsCommutative = false;
4091 III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
4092 break;
4093 case PPC::CMPLW:
4094 case PPC::CMPLD:
4095 III.SignedImm = false;
4096 III.ZeroIsSpecialOrig = 0;
4097 III.ZeroIsSpecialNew = 0;
4098 III.IsCommutative = false;
4099 III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4100 break;
4101 case PPC::AND_rec:
4102 case PPC::AND8_rec:
4103 case PPC::OR:
4104 case PPC::OR8:
4105 case PPC::XOR:
4106 case PPC::XOR8:
4107 III.SignedImm = false;
4108 III.ZeroIsSpecialOrig = 0;
4109 III.ZeroIsSpecialNew = 0;
4110 III.IsCommutative = true;
4111 switch(Opc) {
4112 default: llvm_unreachable("Unknown opcode");
4113 case PPC::AND_rec:
4114 III.ImmOpcode = PPC::ANDI_rec;
4115 break;
4116 case PPC::AND8_rec:
4117 III.ImmOpcode = PPC::ANDI8_rec;
4118 break;
4119 case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4120 case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4121 case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4122 case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4123 }
4124 break;
4125 case PPC::RLWNM:
4126 case PPC::RLWNM8:
4127 case PPC::RLWNM_rec:
4128 case PPC::RLWNM8_rec:
4129 case PPC::SLW:
4130 case PPC::SLW8:
4131 case PPC::SLW_rec:
4132 case PPC::SLW8_rec:
4133 case PPC::SRW:
4134 case PPC::SRW8:
4135 case PPC::SRW_rec:
4136 case PPC::SRW8_rec:
4137 case PPC::SRAW:
4138 case PPC::SRAW_rec:
4139 III.SignedImm = false;
4140 III.ZeroIsSpecialOrig = 0;
4141 III.ZeroIsSpecialNew = 0;
4142 III.IsCommutative = false;
4143 // This isn't actually true, but the instructions ignore any of the
4144 // upper bits, so any immediate loaded with an LI is acceptable.
4145 // This does not apply to shift right algebraic because a value
4146 // out of range will produce a -1/0.
4147 III.ImmWidth = 16;
4148 if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
4149 Opc == PPC::RLWNM8_rec)
4150 III.TruncateImmTo = 5;
4151 else
4152 III.TruncateImmTo = 6;
4153 switch(Opc) {
4154 default: llvm_unreachable("Unknown opcode");
4155 case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4156 case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4157 case PPC::RLWNM_rec:
4158 III.ImmOpcode = PPC::RLWINM_rec;
4159 break;
4160 case PPC::RLWNM8_rec:
4161 III.ImmOpcode = PPC::RLWINM8_rec;
4162 break;
4163 case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4164 case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4165 case PPC::SLW_rec:
4166 III.ImmOpcode = PPC::RLWINM_rec;
4167 break;
4168 case PPC::SLW8_rec:
4169 III.ImmOpcode = PPC::RLWINM8_rec;
4170 break;
4171 case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4172 case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4173 case PPC::SRW_rec:
4174 III.ImmOpcode = PPC::RLWINM_rec;
4175 break;
4176 case PPC::SRW8_rec:
4177 III.ImmOpcode = PPC::RLWINM8_rec;
4178 break;
4179 case PPC::SRAW:
4180 III.ImmWidth = 5;
4181 III.TruncateImmTo = 0;
4182 III.ImmOpcode = PPC::SRAWI;
4183 break;
4184 case PPC::SRAW_rec:
4185 III.ImmWidth = 5;
4186 III.TruncateImmTo = 0;
4187 III.ImmOpcode = PPC::SRAWI_rec;
4188 break;
4189 }
4190 break;
4191 case PPC::RLDCL:
4192 case PPC::RLDCL_rec:
4193 case PPC::RLDCR:
4194 case PPC::RLDCR_rec:
4195 case PPC::SLD:
4196 case PPC::SLD_rec:
4197 case PPC::SRD:
4198 case PPC::SRD_rec:
4199 case PPC::SRAD:
4200 case PPC::SRAD_rec:
4201 III.SignedImm = false;
4202 III.ZeroIsSpecialOrig = 0;
4203 III.ZeroIsSpecialNew = 0;
4204 III.IsCommutative = false;
4205 // This isn't actually true, but the instructions ignore any of the
4206 // upper bits, so any immediate loaded with an LI is acceptable.
4207 // This does not apply to shift right algebraic because a value
4208 // out of range will produce a -1/0.
4209 III.ImmWidth = 16;
4210 if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
4211 Opc == PPC::RLDCR_rec)
4212 III.TruncateImmTo = 6;
4213 else
4214 III.TruncateImmTo = 7;
4215 switch(Opc) {
4216 default: llvm_unreachable("Unknown opcode");
4217 case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4218 case PPC::RLDCL_rec:
4219 III.ImmOpcode = PPC::RLDICL_rec;
4220 break;
4221 case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4222 case PPC::RLDCR_rec:
4223 III.ImmOpcode = PPC::RLDICR_rec;
4224 break;
4225 case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4226 case PPC::SLD_rec:
4227 III.ImmOpcode = PPC::RLDICR_rec;
4228 break;
4229 case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4230 case PPC::SRD_rec:
4231 III.ImmOpcode = PPC::RLDICL_rec;
4232 break;
4233 case PPC::SRAD:
4234 III.ImmWidth = 6;
4235 III.TruncateImmTo = 0;
4236 III.ImmOpcode = PPC::SRADI;
4237 break;
4238 case PPC::SRAD_rec:
4239 III.ImmWidth = 6;
4240 III.TruncateImmTo = 0;
4241 III.ImmOpcode = PPC::SRADI_rec;
4242 break;
4243 }
4244 break;
4245 // Loads and stores:
4246 case PPC::LBZX:
4247 case PPC::LBZX8:
4248 case PPC::LHZX:
4249 case PPC::LHZX8:
4250 case PPC::LHAX:
4251 case PPC::LHAX8:
4252 case PPC::LWZX:
4253 case PPC::LWZX8:
4254 case PPC::LWAX:
4255 case PPC::LDX:
4256 case PPC::LFSX:
4257 case PPC::LFDX:
4258 case PPC::STBX:
4259 case PPC::STBX8:
4260 case PPC::STHX:
4261 case PPC::STHX8:
4262 case PPC::STWX:
4263 case PPC::STWX8:
4264 case PPC::STDX:
4265 case PPC::STFSX:
4266 case PPC::STFDX:
4267 III.SignedImm = true;
4268 III.ZeroIsSpecialOrig = 1;
4269 III.ZeroIsSpecialNew = 2;
4270 III.IsCommutative = true;
4271 III.IsSummingOperands = true;
4272 III.ImmOpNo = 1;
4273 III.OpNoForForwarding = 2;
4274 switch(Opc) {
4275 default: llvm_unreachable("Unknown opcode");
4276 case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4277 case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4278 case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4279 case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4280 case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4281 case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4282 case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4283 case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4284 case PPC::LWAX:
4285 III.ImmOpcode = PPC::LWA;
4286 III.ImmMustBeMultipleOf = 4;
4287 break;
4288 case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
4289 case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4290 case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4291 case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4292 case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4293 case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4294 case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4295 case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4296 case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4297 case PPC::STDX:
4298 III.ImmOpcode = PPC::STD;
4299 III.ImmMustBeMultipleOf = 4;
4300 break;
4301 case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4302 case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4303 }
4304 break;
4305 case PPC::LBZUX:
4306 case PPC::LBZUX8:
4307 case PPC::LHZUX:
4308 case PPC::LHZUX8:
4309 case PPC::LHAUX:
4310 case PPC::LHAUX8:
4311 case PPC::LWZUX:
4312 case PPC::LWZUX8:
4313 case PPC::LDUX:
4314 case PPC::LFSUX:
4315 case PPC::LFDUX:
4316 case PPC::STBUX:
4317 case PPC::STBUX8:
4318 case PPC::STHUX:
4319 case PPC::STHUX8:
4320 case PPC::STWUX:
4321 case PPC::STWUX8:
4322 case PPC::STDUX:
4323 case PPC::STFSUX:
4324 case PPC::STFDUX:
4325 III.SignedImm = true;
4326 III.ZeroIsSpecialOrig = 2;
4327 III.ZeroIsSpecialNew = 3;
4328 III.IsCommutative = false;
4329 III.IsSummingOperands = true;
4330 III.ImmOpNo = 2;
4331 III.OpNoForForwarding = 3;
4332 switch(Opc) {
4333 default: llvm_unreachable("Unknown opcode");
4334 case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4335 case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4336 case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4337 case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4338 case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4339 case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4340 case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4341 case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4342 case PPC::LDUX:
4343 III.ImmOpcode = PPC::LDU;
4344 III.ImmMustBeMultipleOf = 4;
4345 break;
4346 case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4347 case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4348 case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4349 case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4350 case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4351 case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4352 case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4353 case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4354 case PPC::STDUX:
4355 III.ImmOpcode = PPC::STDU;
4356 III.ImmMustBeMultipleOf = 4;
4357 break;
4358 case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4359 case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4360 }
4361 break;
4362 // Power9 and up only. For some of these, the X-Form version has access to all
4363 // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4364 // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4365 // into or stored from is one of the VR registers.
4366 case PPC::LXVX:
4367 case PPC::LXSSPX:
4368 case PPC::LXSDX:
4369 case PPC::STXVX:
4370 case PPC::STXSSPX:
4371 case PPC::STXSDX:
4372 case PPC::XFLOADf32:
4373 case PPC::XFLOADf64:
4374 case PPC::XFSTOREf32:
4375 case PPC::XFSTOREf64:
4376 if (!Subtarget.hasP9Vector())
4377 return false;
4378 III.SignedImm = true;
4379 III.ZeroIsSpecialOrig = 1;
4380 III.ZeroIsSpecialNew = 2;
4381 III.IsCommutative = true;
4382 III.IsSummingOperands = true;
4383 III.ImmOpNo = 1;
4384 III.OpNoForForwarding = 2;
4385 III.ImmMustBeMultipleOf = 4;
4386 switch(Opc) {
4387 default: llvm_unreachable("Unknown opcode");
4388 case PPC::LXVX:
4389 III.ImmOpcode = PPC::LXV;
4390 III.ImmMustBeMultipleOf = 16;
4391 break;
4392 case PPC::LXSSPX:
4393 if (PostRA) {
4394 if (IsVFReg)
4395 III.ImmOpcode = PPC::LXSSP;
4396 else {
4397 III.ImmOpcode = PPC::LFS;
4398 III.ImmMustBeMultipleOf = 1;
4399 }
4400 break;
4401 }
4402 [[fallthrough]];
4403 case PPC::XFLOADf32:
4404 III.ImmOpcode = PPC::DFLOADf32;
4405 break;
4406 case PPC::LXSDX:
4407 if (PostRA) {
4408 if (IsVFReg)
4409 III.ImmOpcode = PPC::LXSD;
4410 else {
4411 III.ImmOpcode = PPC::LFD;
4412 III.ImmMustBeMultipleOf = 1;
4413 }
4414 break;
4415 }
4416 [[fallthrough]];
4417 case PPC::XFLOADf64:
4418 III.ImmOpcode = PPC::DFLOADf64;
4419 break;
4420 case PPC::STXVX:
4421 III.ImmOpcode = PPC::STXV;
4422 III.ImmMustBeMultipleOf = 16;
4423 break;
4424 case PPC::STXSSPX:
4425 if (PostRA) {
4426 if (IsVFReg)
4427 III.ImmOpcode = PPC::STXSSP;
4428 else {
4429 III.ImmOpcode = PPC::STFS;
4430 III.ImmMustBeMultipleOf = 1;
4431 }
4432 break;
4433 }
4434 [[fallthrough]];
4435 case PPC::XFSTOREf32:
4436 III.ImmOpcode = PPC::DFSTOREf32;
4437 break;
4438 case PPC::STXSDX:
4439 if (PostRA) {
4440 if (IsVFReg)
4441 III.ImmOpcode = PPC::STXSD;
4442 else {
4443 III.ImmOpcode = PPC::STFD;
4444 III.ImmMustBeMultipleOf = 1;
4445 }
4446 break;
4447 }
4448 [[fallthrough]];
4449 case PPC::XFSTOREf64:
4450 III.ImmOpcode = PPC::DFSTOREf64;
4451 break;
4452 }
4453 break;
4454 }
4455 return true;
4456}
4457
4458// Utility function for swaping two arbitrary operands of an instruction.
4459static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4460 assert(Op1 != Op2 && "Cannot swap operand with itself.");
4461
4462 unsigned MaxOp = std::max(Op1, Op2);
4463 unsigned MinOp = std::min(Op1, Op2);
4464 MachineOperand MOp1 = MI.getOperand(MinOp);
4465 MachineOperand MOp2 = MI.getOperand(MaxOp);
4466 MI.removeOperand(std::max(Op1, Op2));
4467 MI.removeOperand(std::min(Op1, Op2));
4468
4469 // If the operands we are swapping are the two at the end (the common case)
4470 // we can just remove both and add them in the opposite order.
4471 if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
4472 MI.addOperand(MOp2);
4473 MI.addOperand(MOp1);
4474 } else {
4475 // Store all operands in a temporary vector, remove them and re-add in the
4476 // right order.
4478 unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
4479 for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
4480 MOps.push_back(MI.getOperand(i));
4481 MI.removeOperand(i);
4482 }
4483 // MOp2 needs to be added next.
4484 MI.addOperand(MOp2);
4485 // Now add the rest.
4486 for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4487 if (i == MaxOp)
4488 MI.addOperand(MOp1);
4489 else {
4490 MI.addOperand(MOps.back());
4491 MOps.pop_back();
4492 }
4493 }
4494 }
4495}
4496
4497// Check if the 'MI' that has the index OpNoForForwarding
4498// meets the requirement described in the ImmInstrInfo.
4499bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4500 const ImmInstrInfo &III,
4501 unsigned OpNoForForwarding
4502 ) const {
4503 // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4504 // would not work pre-RA, we can only do the check post RA.
4505 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4506 if (MRI.isSSA())
4507 return false;
4508
4509 // Cannot do the transform if MI isn't summing the operands.
4510 if (!III.IsSummingOperands)
4511 return false;
4512
4513 // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4514 if (!III.ZeroIsSpecialOrig)
4515 return false;
4516
4517 // We cannot do the transform if the operand we are trying to replace
4518 // isn't the same as the operand the instruction allows.
4519 if (OpNoForForwarding != III.OpNoForForwarding)
4520 return false;
4521
4522 // Check if the instruction we are trying to transform really has
4523 // the special zero register as its operand.
4524 if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4525 MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4526 return false;
4527
4528 // This machine instruction is convertible if it is,
4529 // 1. summing the operands.
4530 // 2. one of the operands is special zero register.
4531 // 3. the operand we are trying to replace is allowed by the MI.
4532 return true;
4533}
4534
4535// Check if the DefMI is the add inst and set the ImmMO and RegMO
4536// accordingly.
4537bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4538 const ImmInstrInfo &III,
4539 MachineOperand *&ImmMO,
4540 MachineOperand *&RegMO) const {
4541 unsigned Opc = DefMI.getOpcode();
4542 if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4543 return false;
4544
4545 // Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL8
4546 // on AIX which is used for toc-data access. TODO: Follow up to see if it can
4547 // apply for AIX toc-data as well.
4548 if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4549 return false;
4550
4551 assert(DefMI.getNumOperands() >= 3 &&
4552 "Add inst must have at least three operands");
4553 RegMO = &DefMI.getOperand(1);
4554 ImmMO = &DefMI.getOperand(2);
4555
4556 // Before RA, ADDI first operand could be a frame index.
4557 if (!RegMO->isReg())
4558 return false;
4559
4560 // This DefMI is elgible for forwarding if it is:
4561 // 1. add inst
4562 // 2. one of the operands is Imm/CPI/Global.
4563 return isAnImmediateOperand(*ImmMO);
4564}
4565
4566bool PPCInstrInfo::isRegElgibleForForwarding(
4567 const MachineOperand &RegMO, const MachineInstr &DefMI,
4568 const MachineInstr &MI, bool KillDefMI,
4569 bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4570 // x = addi y, imm
4571 // ...
4572 // z = lfdx 0, x -> z = lfd imm(y)
4573 // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4574 // of "y" between the DEF of "x" and "z".
4575 // The query is only valid post RA.
4576 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4577 if (MRI.isSSA())
4578 return false;
4579
4580 Register Reg = RegMO.getReg();
4581
4582 // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4584 MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4585 It++;
4586 for (; It != E; ++It) {
4587 if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4588 return false;
4589 else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4590 IsFwdFeederRegKilled = true;
4591 if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4592 SeenIntermediateUse = true;
4593 // Made it to DefMI without encountering a clobber.
4594 if ((&*It) == &DefMI)
4595 break;
4596 }
4597 assert((&*It) == &DefMI && "DefMI is missing");
4598
4599 // If DefMI also defines the register to be forwarded, we can only forward it
4600 // if DefMI is being erased.
4601 if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
4602 return KillDefMI;
4603
4604 return true;
4605}
4606
4607bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4608 const MachineInstr &DefMI,
4609 const ImmInstrInfo &III,
4610 int64_t &Imm,
4611 int64_t BaseImm) const {
4612 assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4613 if (DefMI.getOpcode() == PPC::ADDItocL8) {
4614 // The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4615 // However, we know that, it is 16-bit width, and has the alignment of 4.
4616 // Check if the instruction met the requirement.
4617 if (III.ImmMustBeMultipleOf > 4 ||
4618 III.TruncateImmTo || III.ImmWidth != 16)
4619 return false;
4620
4621 // Going from XForm to DForm loads means that the displacement needs to be
4622 // not just an immediate but also a multiple of 4, or 16 depending on the
4623 // load. A DForm load cannot be represented if it is a multiple of say 2.
4624 // XForm loads do not have this restriction.
4625 if (ImmMO.isGlobal()) {
4626 const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4628 return false;
4629 }
4630
4631 return true;
4632 }
4633
4634 if (ImmMO.isImm()) {
4635 // It is Imm, we need to check if the Imm fit the range.
4636 // Sign-extend to 64-bits.
4637 // DefMI may be folded with another imm form instruction, the result Imm is
4638 // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4639 APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
4640 if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
4641 return false;
4642 if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
4643 return false;
4644 Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
4645
4646 if (Imm % III.ImmMustBeMultipleOf)
4647 return false;
4648 if (III.TruncateImmTo)
4649 Imm &= ((1 << III.TruncateImmTo) - 1);
4650 }
4651 else
4652 return false;
4653
4654 // This ImmMO is forwarded if it meets the requriement describle
4655 // in ImmInstrInfo
4656 return true;
4657}
4658
4659bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4660 unsigned OpNoForForwarding,
4661 MachineInstr **KilledDef,
4662 SmallSet<Register, 4> *RegsToUpdate) const {
4663 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
4664 !DefMI.getOperand(1).isImm())
4665 return false;
4666
4667 MachineFunction *MF = MI.getParent()->getParent();
4668 MachineRegisterInfo *MRI = &MF->getRegInfo();
4669 bool PostRA = !MRI->isSSA();
4670
4671 int64_t Immediate = DefMI.getOperand(1).getImm();
4672 // Sign-extend to 64-bits.
4673 int64_t SExtImm = SignExtend64<16>(Immediate);
4674
4675 bool ReplaceWithLI = false;
4676 bool Is64BitLI = false;
4677 int64_t NewImm = 0;
4678 bool SetCR = false;
4679 unsigned Opc = MI.getOpcode();
4680 switch (Opc) {
4681 default:
4682 return false;
4683
4684 // FIXME: Any branches conditional on such a comparison can be made
4685 // unconditional. At this time, this happens too infrequently to be worth
4686 // the implementation effort, but if that ever changes, we could convert
4687 // such a pattern here.
4688 case PPC::CMPWI:
4689 case PPC::CMPLWI:
4690 case PPC::CMPDI:
4691 case PPC::CMPLDI: {
4692 // Doing this post-RA would require dataflow analysis to reliably find uses
4693 // of the CR register set by the compare.
4694 // No need to fixup killed/dead flag since this transformation is only valid
4695 // before RA.
4696 if (PostRA)
4697 return false;
4698 // If a compare-immediate is fed by an immediate and is itself an input of
4699 // an ISEL (the most common case) into a COPY of the correct register.
4700 bool Changed = false;
4701 Register DefReg = MI.getOperand(0).getReg();
4702 int64_t Comparand = MI.getOperand(2).getImm();
4703 int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
4704 ? (Comparand | 0xFFFFFFFFFFFF0000)
4705 : Comparand;
4706
4707 for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
4708 unsigned UseOpc = CompareUseMI.getOpcode();
4709 if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4710 continue;
4711 unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
4712 Register TrueReg = CompareUseMI.getOperand(1).getReg();
4713 Register FalseReg = CompareUseMI.getOperand(2).getReg();
4714 unsigned RegToCopy =
4715 selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
4716 if (RegToCopy == PPC::NoRegister)
4717 continue;
4718 // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4719 if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
4720 CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4721 replaceInstrOperandWithImm(CompareUseMI, 1, 0);
4722 CompareUseMI.removeOperand(3);
4723 CompareUseMI.removeOperand(2);
4724 continue;
4725 }
4726 LLVM_DEBUG(
4727 dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4728 LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4729 LLVM_DEBUG(dbgs() << "Is converted to:\n");
4730 if (RegsToUpdate) {
4731 for (const MachineOperand &MO : CompareUseMI.operands())
4732 if (MO.isReg())
4733 RegsToUpdate->insert(MO.getReg());
4734 }
4735 // Convert to copy and remove unneeded operands.
4736 CompareUseMI.setDesc(get(PPC::COPY));
4737 CompareUseMI.removeOperand(3);
4738 CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
4739 CmpIselsConverted++;
4740 Changed = true;
4741 LLVM_DEBUG(CompareUseMI.dump());
4742 }
4743 if (Changed)
4744 return true;
4745 // This may end up incremented multiple times since this function is called
4746 // during a fixed-point transformation, but it is only meant to indicate the
4747 // presence of this opportunity.
4748 MissedConvertibleImmediateInstrs++;
4749 return false;
4750 }
4751
4752 // Immediate forms - may simply be convertable to an LI.
4753 case PPC::ADDI:
4754 case PPC::ADDI8: {
4755 // Does the sum fit in a 16-bit signed field?
4756 int64_t Addend = MI.getOperand(2).getImm();
4757 if (isInt<16>(Addend + SExtImm)) {
4758 ReplaceWithLI = true;
4759 Is64BitLI = Opc == PPC::ADDI8;
4760 NewImm = Addend + SExtImm;
4761 break;
4762 }
4763 return false;
4764 }
4765 case PPC::SUBFIC:
4766 case PPC::SUBFIC8: {
4767 // Only transform this if the CARRY implicit operand is dead.
4768 if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
4769 return false;
4770 int64_t Minuend = MI.getOperand(2).getImm();
4771 if (isInt<16>(Minuend - SExtImm)) {
4772 ReplaceWithLI = true;
4773 Is64BitLI = Opc == PPC::SUBFIC8;
4774 NewImm = Minuend - SExtImm;
4775 break;
4776 }
4777 return false;
4778 }
4779 case PPC::RLDICL:
4780 case PPC::RLDICL_rec:
4781 case PPC::RLDICL_32:
4782 case PPC::RLDICL_32_64: {
4783 // Use APInt's rotate function.
4784 int64_t SH = MI.getOperand(2).getImm();
4785 int64_t MB = MI.getOperand(3).getImm();
4786 APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
4787 SExtImm, true);
4788 InVal = InVal.rotl(SH);
4789 uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
4790 InVal &= Mask;
4791 // Can't replace negative values with an LI as that will sign-extend
4792 // and not clear the left bits. If we're setting the CR bit, we will use
4793 // ANDI_rec which won't sign extend, so that's safe.
4794 if (isUInt<15>(InVal.getSExtValue()) ||
4795 (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
4796 ReplaceWithLI = true;
4797 Is64BitLI = Opc != PPC::RLDICL_32;
4798 NewImm = InVal.getSExtValue();
4799 SetCR = Opc == PPC::RLDICL_rec;
4800 break;
4801 }
4802 return false;
4803 }
4804 case PPC::RLWINM:
4805 case PPC::RLWINM8:
4806 case PPC::RLWINM_rec:
4807 case PPC::RLWINM8_rec: {
4808 int64_t SH = MI.getOperand(2).getImm();
4809 int64_t MB = MI.getOperand(3).getImm();
4810 int64_t ME = MI.getOperand(4).getImm();
4811 APInt InVal(32, SExtImm, true);
4812 InVal = InVal.rotl(SH);
4813 APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
4814 InVal &= Mask;
4815 // Can't replace negative values with an LI as that will sign-extend
4816 // and not clear the left bits. If we're setting the CR bit, we will use
4817 // ANDI_rec which won't sign extend, so that's safe.
4818 bool ValueFits = isUInt<15>(InVal.getSExtValue());
4819 ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
4820 isUInt<16>(InVal.getSExtValue()));
4821 if (ValueFits) {
4822 ReplaceWithLI = true;
4823 Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
4824 NewImm = InVal.getSExtValue();
4825 SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
4826 break;
4827 }
4828 return false;
4829 }
4830 case PPC::ORI:
4831 case PPC::ORI8:
4832 case PPC::XORI:
4833 case PPC::XORI8: {
4834 int64_t LogicalImm = MI.getOperand(2).getImm();
4835 int64_t Result = 0;
4836 if (Opc == PPC::ORI || Opc == PPC::ORI8)
4837 Result = LogicalImm | SExtImm;
4838 else
4839 Result = LogicalImm ^ SExtImm;
4840 if (isInt<16>(Result)) {
4841 ReplaceWithLI = true;
4842 Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
4843 NewImm = Result;
4844 break;
4845 }
4846 return false;
4847 }
4848 }
4849
4850 if (ReplaceWithLI) {
4851 // We need to be careful with CR-setting instructions we're replacing.
4852 if (SetCR) {
4853 // We don't know anything about uses when we're out of SSA, so only
4854 // replace if the new immediate will be reproduced.
4855 bool ImmChanged = (SExtImm & NewImm) != NewImm;
4856 if (PostRA && ImmChanged)
4857 return false;
4858
4859 if (!PostRA) {
4860 // If the defining load-immediate has no other uses, we can just replace
4861 // the immediate with the new immediate.
4862 if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
4863 DefMI.getOperand(1).setImm(NewImm);
4864
4865 // If we're not using the GPR result of the CR-setting instruction, we
4866 // just need to and with zero/non-zero depending on the new immediate.
4867 else if (MRI->use_empty(MI.getOperand(0).getReg())) {
4868 if (NewImm) {
4869 assert(Immediate && "Transformation converted zero to non-zero?");
4870 NewImm = Immediate;
4871 }
4872 } else if (ImmChanged)
4873 return false;
4874 }
4875 }
4876
4877 LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4878 LLVM_DEBUG(MI.dump());
4879 LLVM_DEBUG(dbgs() << "Fed by:\n");
4880 LLVM_DEBUG(DefMI.dump());
4881 LoadImmediateInfo LII;
4882 LII.Imm = NewImm;
4883 LII.Is64Bit = Is64BitLI;
4884 LII.SetCR = SetCR;
4885 // If we're setting the CR, the original load-immediate must be kept (as an
4886 // operand to ANDI_rec/ANDI8_rec).
4887 if (KilledDef && SetCR)
4888 *KilledDef = nullptr;
4889 replaceInstrWithLI(MI, LII);
4890
4891 if (PostRA)
4892 recomputeLivenessFlags(*MI.getParent());
4893
4894 LLVM_DEBUG(dbgs() << "With:\n");
4895 LLVM_DEBUG(MI.dump());
4896 return true;
4897 }
4898 return false;
4899}
4900
4901bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4902 MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4903 MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4904 bool PostRA = !MRI->isSSA();
4905 // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4906 // for post-ra.
4907 if (PostRA)
4908 return false;
4909
4910 // Only handle load/store.
4911 if (!MI.mayLoadOrStore())
4912 return false;
4913
4914 unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
4915
4916 assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4917 "MI must have x-form opcode");
4918
4919 // get Imm Form info.
4920 ImmInstrInfo III;
4921 bool IsVFReg = MI.getOperand(0).isReg() &&
4922 MI.getOperand(0).getReg().isPhysical() &&
4923 PPC::isVFRegister(MI.getOperand(0).getReg());
4924
4925 if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
4926 return false;
4927
4928 if (!III.IsSummingOperands)
4929 return false;
4930
4931 if (OpNoForForwarding != III.OpNoForForwarding)
4932 return false;
4933
4934 MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
4935 if (!ImmOperandMI.isImm())
4936 return false;
4937
4938 // Check DefMI.
4939 MachineOperand *ImmMO = nullptr;
4940 MachineOperand *RegMO = nullptr;
4941 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4942 return false;
4943 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4944
4945 // Check Imm.
4946 // Set ImmBase from imm instruction as base and get new Imm inside
4947 // isImmElgibleForForwarding.
4948 int64_t ImmBase = ImmOperandMI.getImm();
4949 int64_t Imm = 0;
4950 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
4951 return false;
4952
4953 // Do the transform
4954 LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4955 LLVM_DEBUG(MI.dump());
4956 LLVM_DEBUG(dbgs() << "Fed by:\n");
4957 LLVM_DEBUG(DefMI.dump());
4958
4959 MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
4960 MI.getOperand(III.ImmOpNo).setImm(Imm);
4961
4962 LLVM_DEBUG(dbgs() << "With:\n");
4963 LLVM_DEBUG(MI.dump());
4964 return true;
4965}
4966
4967// If an X-Form instruction is fed by an add-immediate and one of its operands
4968// is the literal zero, attempt to forward the source of the add-immediate to
4969// the corresponding D-Form instruction with the displacement coming from
4970// the immediate being added.
4971bool PPCInstrInfo::transformToImmFormFedByAdd(
4972 MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4973 MachineInstr &DefMI, bool KillDefMI) const {
4974 // RegMO ImmMO
4975 // | |
4976 // x = addi reg, imm <----- DefMI
4977 // y = op 0 , x <----- MI
4978 // |
4979 // OpNoForForwarding
4980 // Check if the MI meet the requirement described in the III.
4981 if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4982 return false;
4983
4984 // Check if the DefMI meet the requirement
4985 // described in the III. If yes, set the ImmMO and RegMO accordingly.
4986 MachineOperand *ImmMO = nullptr;
4987 MachineOperand *RegMO = nullptr;
4988 if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4989 return false;
4990 assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4991
4992 // As we get the Imm operand now, we need to check if the ImmMO meet
4993 // the requirement described in the III. If yes set the Imm.
4994 int64_t Imm = 0;
4995 if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
4996 return false;
4997
4998 bool IsFwdFeederRegKilled = false;
4999 bool SeenIntermediateUse = false;
5000 // Check if the RegMO can be forwarded to MI.
5001 if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
5002 IsFwdFeederRegKilled, SeenIntermediateUse))
5003 return false;
5004
5005 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5006 bool PostRA = !MRI.isSSA();
5007
5008 // We know that, the MI and DefMI both meet the pattern, and
5009 // the Imm also meet the requirement with the new Imm-form.
5010 // It is safe to do the transformation now.
5011 LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
5012 LLVM_DEBUG(MI.dump());
5013 LLVM_DEBUG(dbgs() << "Fed by:\n");
5014 LLVM_DEBUG(DefMI.dump());
5015
5016 // Update the base reg first.
5017 MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
5018 false, false,
5019 RegMO->isKill());
5020
5021 // Then, update the imm.
5022 if (ImmMO->isImm()) {
5023 // If the ImmMO is Imm, change the operand that has ZERO to that Imm
5024 // directly.
5026 }
5027 else {
5028 // Otherwise, it is Constant Pool Index(CPI) or Global,
5029 // which is relocation in fact. We need to replace the special zero
5030 // register with ImmMO.
5031 // Before that, we need to fixup the target flags for imm.
5032 // For some reason, we miss to set the flag for the ImmMO if it is CPI.
5033 if (DefMI.getOpcode() == PPC::ADDItocL8)
5035
5036 // MI didn't have the interface such as MI.setOperand(i) though
5037 // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
5038 // ImmMO, we need to remove ZERO operand and all the operands behind it,
5039 // and, add the ImmMO, then, move back all the operands behind ZERO.
5041 for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
5042 MOps.push_back(MI.getOperand(i));
5043 MI.removeOperand(i);
5044 }
5045
5046 // Remove the last MO in the list, which is ZERO operand in fact.
5047 MOps.pop_back();
5048 // Add the imm operand.
5049 MI.addOperand(*ImmMO);
5050 // Now add the rest back.
5051 for (auto &MO : MOps)
5052 MI.addOperand(MO);
5053 }
5054
5055 // Update the opcode.
5056 MI.setDesc(get(III.ImmOpcode));
5057
5058 if (PostRA)
5059 recomputeLivenessFlags(*MI.getParent());
5060 LLVM_DEBUG(dbgs() << "With:\n");
5061 LLVM_DEBUG(MI.dump());
5062
5063 return true;
5064}
5065
5066bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
5067 const ImmInstrInfo &III,
5068 unsigned ConstantOpNo,
5069 MachineInstr &DefMI) const {
5070 // DefMI must be LI or LI8.
5071 if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
5072 !DefMI.getOperand(1).isImm())
5073 return false;
5074
5075 // Get Imm operand and Sign-extend to 64-bits.
5076 int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
5077
5078 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5079 bool PostRA = !MRI.isSSA();
5080 // Exit early if we can't convert this.
5081 if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
5082 return false;
5083 if (Imm % III.ImmMustBeMultipleOf)
5084 return false;
5085 if (III.TruncateImmTo)
5086 Imm &= ((1 << III.TruncateImmTo) - 1);
5087 if (III.SignedImm) {
5088 APInt ActualValue(64, Imm, true);
5089 if (!ActualValue.isSignedIntN(III.ImmWidth))
5090 return false;
5091 } else {
5092 uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
5093 if ((uint64_t)Imm > UnsignedMax)
5094 return false;
5095 }
5096
5097 // If we're post-RA, the instructions don't agree on whether register zero is
5098 // special, we can transform this as long as the register operand that will
5099 // end up in the location where zero is special isn't R0.
5100 if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5101 unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
5102 III.ZeroIsSpecialNew + 1;
5103 Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
5104 Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5105 // If R0 is in the operand where zero is special for the new instruction,
5106 // it is unsafe to transform if the constant operand isn't that operand.
5107 if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
5108 ConstantOpNo != III.ZeroIsSpecialNew)
5109 return false;
5110 if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
5111 ConstantOpNo != PosForOrigZero)
5112 return false;
5113 }
5114
5115 unsigned Opc = MI.getOpcode();
5116 bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
5117 Opc == PPC::SRW || Opc == PPC::SRW_rec ||
5118 Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
5119 Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
5120 bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
5121 Opc == PPC::SRD || Opc == PPC::SRD_rec;
5122 bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
5123 Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
5124 bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
5125 Opc == PPC::SRD_rec;
5126
5127 LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5128 LLVM_DEBUG(MI.dump());
5129 LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5130 LLVM_DEBUG(DefMI.dump());
5131 MI.setDesc(get(III.ImmOpcode));
5132 if (ConstantOpNo == III.OpNoForForwarding) {
5133 // Converting shifts to immediate form is a bit tricky since they may do
5134 // one of three things:
5135 // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
5136 // 2. If the shift amount is zero, the result is unchanged (save for maybe
5137 // setting CR0)
5138 // 3. If the shift amount is in [1, OpSize), it's just a shift
5139 if (SpecialShift32 || SpecialShift64) {
5140 LoadImmediateInfo LII;
5141 LII.Imm = 0;
5142 LII.SetCR = SetCR;
5143 LII.Is64Bit = SpecialShift64;
5144 uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
5145 if (Imm & (SpecialShift32 ? 0x20 : 0x40))
5146 replaceInstrWithLI(MI, LII);
5147 // Shifts by zero don't change the value. If we don't need to set CR0,
5148 // just convert this to a COPY. Can't do this post-RA since we've already
5149 // cleaned up the copies.
5150 else if (!SetCR && ShAmt == 0 && !PostRA) {
5151 MI.removeOperand(2);
5152 MI.setDesc(get(PPC::COPY));
5153 } else {
5154 // The 32 bit and 64 bit instructions are quite different.
5155 if (SpecialShift32) {
5156 // Left shifts use (N, 0, 31-N).
5157 // Right shifts use (32-N, N, 31) if 0 < N < 32.
5158 // use (0, 0, 31) if N == 0.
5159 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
5160 uint64_t MB = RightShift ? ShAmt : 0;
5161 uint64_t ME = RightShift ? 31 : 31 - ShAmt;
5163 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
5164 .addImm(ME);
5165 } else {
5166 // Left shifts use (N, 63-N).
5167 // Right shifts use (64-N, N) if 0 < N < 64.
5168 // use (0, 0) if N == 0.
5169 uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
5170 uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
5172 MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
5173 }
5174 }
5175 } else
5176 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5177 }
5178 // Convert commutative instructions (switch the operands and convert the
5179 // desired one to an immediate.
5180 else if (III.IsCommutative) {
5181 replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
5182 swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
5183 } else
5184 llvm_unreachable("Should have exited early!");
5185
5186 // For instructions for which the constant register replaces a different
5187 // operand than where the immediate goes, we need to swap them.
5188 if (III.OpNoForForwarding != III.ImmOpNo)
5190
5191 // If the special R0/X0 register index are different for original instruction
5192 // and new instruction, we need to fix up the register class in new
5193 // instruction.
5194 if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5195 if (III.ZeroIsSpecialNew) {
5196 // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5197 // need to fix up register class.
5198 Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
5199 if (RegToModify.isVirtual()) {
5200 const TargetRegisterClass *NewRC =
5201 MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
5202 &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5203 MRI.setRegClass(RegToModify, NewRC);
5204 }
5205 }
5206 }
5207
5208 if (PostRA)
5209 recomputeLivenessFlags(*MI.getParent());
5210
5211 LLVM_DEBUG(dbgs() << "With: ");
5212 LLVM_DEBUG(MI.dump());
5213 LLVM_DEBUG(dbgs() << "\n");
5214 return true;
5215}
5216
5217const TargetRegisterClass *
5219 if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5220 return &PPC::VSRCRegClass;
5221 return RC;
5222}
5223
5225 return PPC::getRecordFormOpcode(Opcode);
5226}
5227
5228static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5229 return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
5230 Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
5231 Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
5232 Opcode == PPC::LHZUX8);
5233}
5234
5235// This function checks for sign extension from 32 bits to 64 bits.
5236static bool definedBySignExtendingOp(const unsigned Reg,
5237 const MachineRegisterInfo *MRI) {
5239 return false;
5240
5241 MachineInstr *MI = MRI->getVRegDef(Reg);
5242 if (!MI)
5243 return false;
5244
5245 int Opcode = MI->getOpcode();
5246 const PPCInstrInfo *TII =
5247 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5248 if (TII->isSExt32To64(Opcode))
5249 return true;
5250
5251 // The first def of LBZU/LHZU is sign extended.
5252 if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
5253 return true;
5254
5255 // RLDICL generates sign-extended output if it clears at least
5256 // 33 bits from the left (MSB).
5257 if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
5258 return true;
5259
5260 // If at least one bit from left in a lower word is masked out,
5261 // all of 0 to 32-th bits of the output are cleared.
5262 // Hence the output is already sign extended.
5263 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5264 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
5265 MI->getOperand(3).getImm() > 0 &&
5266 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5267 return true;
5268
5269 // If the most significant bit of immediate in ANDIS is zero,
5270 // all of 0 to 32-th bits are cleared.
5271 if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
5272 uint16_t Imm = MI->getOperand(2).getImm();
5273 if ((Imm & 0x8000) == 0)
5274 return true;
5275 }
5276
5277 return false;
5278}
5279
5280// This function checks the machine instruction that defines the input register
5281// Reg. If that machine instruction always outputs a value that has only zeros
5282// in the higher 32 bits then this function will return true.
5283static bool definedByZeroExtendingOp(const unsigned Reg,
5284 const MachineRegisterInfo *MRI) {
5286 return false;
5287
5288 MachineInstr *MI = MRI->getVRegDef(Reg);
5289 if (!MI)
5290 return false;
5291
5292 int Opcode = MI->getOpcode();
5293 const PPCInstrInfo *TII =
5294 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5295 if (TII->isZExt32To64(Opcode))
5296 return true;
5297
5298 // The first def of LBZU/LHZU/LWZU are zero extended.
5299 if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
5300 Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
5301 MI->getOperand(0).getReg() == Reg)
5302 return true;
5303
5304 // The 16-bit immediate is sign-extended in li/lis.
5305 // If the most significant bit is zero, all higher bits are zero.
5306 if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
5307 Opcode == PPC::LIS || Opcode == PPC::LIS8) {
5308 int64_t Imm = MI->getOperand(1).getImm();
5309 if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
5310 return true;
5311 }
5312
5313 // We have some variations of rotate-and-mask instructions
5314 // that clear higher 32-bits.
5315 if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
5316 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
5317 Opcode == PPC::RLDICL_32_64) &&
5318 MI->getOperand(3).getImm() >= 32)
5319 return true;
5320
5321 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
5322 MI->getOperand(3).getImm() >= 32 &&
5323 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
5324 return true;
5325
5326 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
5327 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
5328 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
5329 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
5330 return true;
5331
5332 return false;
5333}
5334
5335// This function returns true if the input MachineInstr is a TOC save
5336// instruction.
5338 if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
5339 return false;
5340 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5341 unsigned StackOffset = MI.getOperand(1).getImm();
5342 Register StackReg = MI.getOperand(2).getReg();
5343 Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5344 if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5345 return true;
5346
5347 return false;
5348}
5349
5350// We limit the max depth to track incoming values of PHIs or binary ops
5351// (e.g. AND) to avoid excessive cost.
5352const unsigned MAX_BINOP_DEPTH = 1;
5353
5354// This function will promote the instruction which defines the register `Reg`
5355// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5356// used to check whether an instruction needs to be promoted or not is similar
5357// to the logic used to check whether or not a defined register is sign or zero
5358// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5359// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5360// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5361// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5362// than once. This is done to prevent exponential recursion.
5365 unsigned BinOpDepth,
5366 LiveVariables *LV) const {
5367 if (!Reg.isVirtual())
5368 return;
5369
5370 MachineInstr *MI = MRI->getVRegDef(Reg);
5371 if (!MI)
5372 return;
5373
5374 unsigned Opcode = MI->getOpcode();
5375
5376 switch (Opcode) {
5377 case PPC::OR:
5378 case PPC::ISEL:
5379 case PPC::OR8:
5380 case PPC::PHI: {
5381 if (BinOpDepth >= MAX_BINOP_DEPTH)
5382 break;
5383 unsigned OperandEnd = 3, OperandStride = 1;
5384 if (Opcode == PPC::PHI) {
5385 OperandEnd = MI->getNumOperands();
5386 OperandStride = 2;
5387 }
5388
5389 for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
5390 assert(MI->getOperand(I).isReg() && "Operand must be register");
5391 promoteInstr32To64ForElimEXTSW(MI->getOperand(I).getReg(), MRI,
5392 BinOpDepth + 1, LV);
5393 }
5394
5395 break;
5396 }
5397 case PPC::COPY: {
5398 // Refers to the logic of the `case PPC::COPY` statement in the function
5399 // PPCInstrInfo::isSignOrZeroExtended().
5400
5401 Register SrcReg = MI->getOperand(1).getReg();
5402 // In both ELFv1 and v2 ABI, method parameters and the return value
5403 // are sign- or zero-extended.
5404 const MachineFunction *MF = MI->getMF();
5405 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5406 // If this is a copy from another register, we recursively promote the
5407 // source.
5408 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5409 return;
5410 }
5411
5412 // From here on everything is SVR4ABI. COPY will be eliminated in the other
5413 // pass, we do not need promote the COPY pseudo opcode.
5414
5415 if (SrcReg != PPC::X3)
5416 // If this is a copy from another register, we recursively promote the
5417 // source.
5418 promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
5419 return;
5420 }
5421 case PPC::ORI:
5422 case PPC::XORI:
5423 case PPC::ORIS:
5424 case PPC::XORIS:
5425 case PPC::ORI8:
5426 case PPC::XORI8:
5427 case PPC::ORIS8:
5428 case PPC::XORIS8:
5429 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI, BinOpDepth,
5430 LV);
5431 break;
5432 case PPC::AND:
5433 case PPC::AND8:
5434 if (BinOpDepth >= MAX_BINOP_DEPTH)
5435 break;
5436
5437 promoteInstr32To64ForElimEXTSW(MI->getOperand(1).getReg(), MRI,
5438 BinOpDepth + 1, LV);
5439 promoteInstr32To64ForElimEXTSW(MI->getOperand(2).getReg(), MRI,
5440 BinOpDepth + 1, LV);
5441 break;
5442 }
5443
5444 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5445 if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5446 return;
5447
5448 const PPCInstrInfo *TII =
5449 MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5450
5451 // Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5452 // extended themselves, but may have operands who's destination registers of
5453 // signed or zero extended instructions.
5454 std::unordered_map<unsigned, unsigned> OpcodeMap = {
5455 {PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5456 {PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5457 {PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5458 {PPC::AND, PPC::AND8}};
5459
5460 int NewOpcode = -1;
5461 auto It = OpcodeMap.find(Opcode);
5462 if (It != OpcodeMap.end()) {
5463 // Set the new opcode to the mapped 64-bit version.
5464 NewOpcode = It->second;
5465 } else {
5466 if (!TII->isSExt32To64(Opcode))
5467 return;
5468
5469 // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5470 // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5471 // instruction with the same opcode.
5472 NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5473 }
5474
5475 assert(NewOpcode != -1 &&
5476 "Must have a 64-bit opcode to map the 32-bit opcode!");
5477
5479 const MCInstrDesc &MCID = TII->get(NewOpcode);
5480 const TargetRegisterClass *NewRC =
5481 TRI->getRegClass(MCID.operands()[0].RegClass);
5482
5483 Register SrcReg = MI->getOperand(0).getReg();
5484 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
5485
5486 // If the register class of the defined register in the 32-bit instruction
5487 // is the same as the register class of the defined register in the promoted
5488 // 64-bit instruction, we do not need to promote the instruction.
5489 if (NewRC == SrcRC)
5490 return;
5491
5492 DebugLoc DL = MI->getDebugLoc();
5493 auto MBB = MI->getParent();
5494
5495 // Since the pseudo-opcode of the instruction is promoted from 32-bit to
5496 // 64-bit, if the source reg class of the original instruction belongs to
5497 // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5498 // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5499 // respectively.
5500 DenseMap<unsigned, Register> PromoteRegs;
5501 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5502 MachineOperand &Operand = MI->getOperand(i);
5503 if (!Operand.isReg())
5504 continue;
5505
5506 Register OperandReg = Operand.getReg();
5507 if (!OperandReg.isVirtual())
5508 continue;
5509
5510 const TargetRegisterClass *NewUsedRegRC =
5511 TRI->getRegClass(MCID.operands()[i].RegClass);
5512 const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
5513 if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
5514 OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5515 // Promote the used 32-bit register to 64-bit register.
5516 Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5517 Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
5518 BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
5519 BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
5520 .addReg(TmpReg)
5521 .addReg(OperandReg)
5522 .addImm(PPC::sub_32);
5523 PromoteRegs[i] = DstTmpReg;
5524 }
5525 }
5526
5527 Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
5528
5529 BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
5531 --Iter;
5532 MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
5533 for (unsigned i = 1; i < MI->getNumOperands(); i++) {
5534 if (auto It = PromoteRegs.find(i); It != PromoteRegs.end())
5535 MIBuilder.addReg(It->second, RegState::Kill);
5536 else
5537 Iter->addOperand(MI->getOperand(i));
5538 }
5539
5540 for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
5541 MachineOperand &Operand = Iter->getOperand(i);
5542 if (!Operand.isReg())
5543 continue;
5544 Register OperandReg = Operand.getReg();
5545 if (!OperandReg.isVirtual())
5546 continue;
5547 LV->recomputeForSingleDefVirtReg(OperandReg);
5548 }
5549
5550 MI->eraseFromParent();
5551
5552 // A defined register may be used by other instructions that are 32-bit.
5553 // After the defined register is promoted to 64-bit for the promoted
5554 // instruction, we need to demote the 64-bit defined register back to a
5555 // 32-bit register
5556 BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
5557 .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
5558 LV->recomputeForSingleDefVirtReg(NewDefinedReg);
5559}
5560
5561// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5562// does not count all of the recursions. The parameter BinOpDepth is incremented
5563// only when isSignOrZeroExtended calls itself more than once. This is done to
5564// prevent expontential recursion. There is no parameter to track linear
5565// recursion.
5566std::pair<bool, bool>
5568 const unsigned BinOpDepth,
5569 const MachineRegisterInfo *MRI) const {
5571 return std::pair<bool, bool>(false, false);
5572
5573 MachineInstr *MI = MRI->getVRegDef(Reg);
5574 if (!MI)
5575 return std::pair<bool, bool>(false, false);
5576
5577 bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5578 bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5579
5580 // If we know the instruction always returns sign- and zero-extended result,
5581 // return here.
5582 if (IsSExt && IsZExt)
5583 return std::pair<bool, bool>(IsSExt, IsZExt);
5584
5585 switch (MI->getOpcode()) {
5586 case PPC::COPY: {
5587 Register SrcReg = MI->getOperand(1).getReg();
5588
5589 // In both ELFv1 and v2 ABI, method parameters and the return value
5590 // are sign- or zero-extended.
5591 const MachineFunction *MF = MI->getMF();
5592
5593 if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5594 // If this is a copy from another register, we recursively check source.
5595 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5596 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5597 SrcExt.second || IsZExt);
5598 }
5599
5600 // From here on everything is SVR4ABI
5601 const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5602 // We check the ZExt/SExt flags for a method parameter.
5603 if (MI->getParent()->getBasicBlock() ==
5604 &MF->getFunction().getEntryBlock()) {
5605 Register VReg = MI->getOperand(0).getReg();
5606 if (MF->getRegInfo().isLiveIn(VReg)) {
5607 IsSExt |= FuncInfo->isLiveInSExt(VReg);
5608 IsZExt |= FuncInfo->isLiveInZExt(VReg);
5609 return std::pair<bool, bool>(IsSExt, IsZExt);
5610 }
5611 }
5612
5613 if (SrcReg != PPC::X3) {
5614 // If this is a copy from another register, we recursively check source.
5615 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5616 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5617 SrcExt.second || IsZExt);
5618 }
5619
5620 // For a method return value, we check the ZExt/SExt flags in attribute.
5621 // We assume the following code sequence for method call.
5622 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5623 // BL8_NOP @func,...
5624 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5625 // %5 = COPY %x3; G8RC:%5
5626 const MachineBasicBlock *MBB = MI->getParent();
5627 std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5630 if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5631 return IsExtendPair;
5632
5633 const MachineInstr &CallMI = *(--II);
5634 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
5635 return IsExtendPair;
5636
5637 const Function *CalleeFn =
5639 if (!CalleeFn)
5640 return IsExtendPair;
5641 const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
5642 if (IntTy && IntTy->getBitWidth() <= 32) {
5643 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5644 IsSExt |= Attrs.hasAttribute(Attribute::SExt);
5645 IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
5646 return std::pair<bool, bool>(IsSExt, IsZExt);
5647 }
5648
5649 return IsExtendPair;
5650 }
5651
5652 // OR, XOR with 16-bit immediate does not change the upper 48 bits.
5653 // So, we track the operand register as we do for register copy.
5654 case PPC::ORI:
5655 case PPC::XORI:
5656 case PPC::ORI8:
5657 case PPC::XORI8: {
5658 Register SrcReg = MI->getOperand(1).getReg();
5659 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5660 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5661 SrcExt.second || IsZExt);
5662 }
5663
5664 // OR, XOR with shifted 16-bit immediate does not change the upper
5665 // 32 bits. So, we track the operand register for zero extension.
5666 // For sign extension when the MSB of the immediate is zero, we also
5667 // track the operand register since the upper 33 bits are unchanged.
5668 case PPC::ORIS:
5669 case PPC::XORIS:
5670 case PPC::ORIS8:
5671 case PPC::XORIS8: {
5672 Register SrcReg = MI->getOperand(1).getReg();
5673 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
5674 uint16_t Imm = MI->getOperand(2).getImm();
5675 if (Imm & 0x8000)
5676 return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
5677 else
5678 return std::pair<bool, bool>(SrcExt.first || IsSExt,
5679 SrcExt.second || IsZExt);
5680 }
5681
5682 // If all incoming values are sign-/zero-extended,
5683 // the output of OR, ISEL or PHI is also sign-/zero-extended.
5684 case PPC::OR:
5685 case PPC::OR8:
5686 case PPC::ISEL:
5687 case PPC::PHI: {
5688 if (BinOpDepth >= MAX_BINOP_DEPTH)
5689 return std::pair<bool, bool>(false, false);
5690
5691 // The input registers for PHI are operand 1, 3, ...
5692 // The input registers for others are operand 1 and 2.
5693 unsigned OperandEnd = 3, OperandStride = 1;
5694 if (MI->getOpcode() == PPC::PHI) {
5695 OperandEnd = MI->getNumOperands();
5696 OperandStride = 2;
5697 }
5698
5699 IsSExt = true;
5700 IsZExt = true;
5701 for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
5702 if (!MI->getOperand(I).isReg())
5703 return std::pair<bool, bool>(false, false);
5704
5705 Register SrcReg = MI->getOperand(I).getReg();
5706 auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
5707 IsSExt &= SrcExt.first;
5708 IsZExt &= SrcExt.second;
5709 }
5710 return std::pair<bool, bool>(IsSExt, IsZExt);
5711 }
5712
5713 // If at least one of the incoming values of an AND is zero extended
5714 // then the output is also zero-extended. If both of the incoming values
5715 // are sign-extended then the output is also sign extended.
5716 case PPC::AND:
5717 case PPC::AND8: {
5718 if (BinOpDepth >= MAX_BINOP_DEPTH)
5719 return std::pair<bool, bool>(false, false);
5720
5721 Register SrcReg1 = MI->getOperand(1).getReg();
5722 Register SrcReg2 = MI->getOperand(2).getReg();
5723 auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
5724 auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
5725 return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5726 Src1Ext.second || Src2Ext.second);
5727 }
5728
5729 default:
5730 break;
5731 }
5732 return std::pair<bool, bool>(IsSExt, IsZExt);
5733}
5734
5735bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5736 return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5737}
5738
5739namespace {
5740class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5741 MachineInstr *Loop, *EndLoop, *LoopCount;
5742 MachineFunction *MF;
5743 const TargetInstrInfo *TII;
5744 int64_t TripCount;
5745
5746public:
5747 PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
5748 MachineInstr *LoopCount)
5749 : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5750 MF(Loop->getParent()->getParent()),
5751 TII(MF->getSubtarget().getInstrInfo()) {
5752 // Inspect the Loop instruction up-front, as it may be deleted when we call
5753 // createTripCountGreaterCondition.
5754 if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
5755 TripCount = LoopCount->getOperand(1).getImm();
5756 else
5757 TripCount = -1;
5758 }
5759
5760 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
5761 // Only ignore the terminator.
5762 return MI == EndLoop;
5763 }
5764
5765 std::optional<bool> createTripCountGreaterCondition(
5766 int TC, MachineBasicBlock &MBB,
5767 SmallVectorImpl<MachineOperand> &Cond) override {
5768 if (TripCount == -1) {
5769 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5770 // so we don't need to generate any thing here.
5771 Cond.push_back(MachineOperand::CreateImm(0));
5773 MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5774 true));
5775 return {};
5776 }
5777
5778 return TripCount > TC;
5779 }
5780
5781 void setPreheader(MachineBasicBlock *NewPreheader) override {
5782 // Do nothing. We want the LOOP setup instruction to stay in the *old*
5783 // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5784 }
5785
5786 void adjustTripCount(int TripCountAdjust) override {
5787 // If the loop trip count is a compile-time value, then just change the
5788 // value.
5789 if (LoopCount->getOpcode() == PPC::LI8 ||
5790 LoopCount->getOpcode() == PPC::LI) {
5791 int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
5792 LoopCount->getOperand(1).setImm(TripCount);
5793 return;
5794 }
5795
5796 // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5797 // so we don't need to generate any thing here.
5798 }
5799
5800 void disposed(LiveIntervals *LIS) override {
5801 if (LIS) {
5802 LIS->RemoveMachineInstrFromMaps(*Loop);
5803 LIS->RemoveMachineInstrFromMaps(*LoopCount);
5804 }
5805 Loop->eraseFromParent();
5806 // Ensure the loop setup instruction is deleted too.
5807 LoopCount->eraseFromParent();
5808 }
5809};
5810} // namespace
5811
5812std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5814 // We really "analyze" only hardware loops right now.
5816 MachineBasicBlock *Preheader = *LoopBB->pred_begin();
5817 if (Preheader == LoopBB)
5818 Preheader = *std::next(LoopBB->pred_begin());
5819 MachineFunction *MF = Preheader->getParent();
5820
5821 if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
5823 if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
5824 Register LoopCountReg = LoopInst->getOperand(0).getReg();
5825 MachineRegisterInfo &MRI = MF->getRegInfo();
5826 MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
5827 return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
5828 }
5829 }
5830 return nullptr;
5831}
5832
5834 MachineBasicBlock &PreHeader,
5835 SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
5836
5837 unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5838
5839 // The loop set-up instruction should be in preheader
5840 for (auto &I : PreHeader.instrs())
5841 if (I.getOpcode() == LOOPi)
5842 return &I;
5843 return nullptr;
5844}
5845
5846// Return true if get the base operand, byte offset of an instruction and the
5847// memory width. Width is the size of memory that is being loaded/stored.
5849 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5850 LocationSize &Width, const TargetRegisterInfo *TRI) const {
5851 if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
5852 return false;
5853
5854 // Handle only loads/stores with base register followed by immediate offset.
5855 if (!LdSt.getOperand(1).isImm() ||
5856 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
5857 return false;
5858
5859 if (!LdSt.hasOneMemOperand())
5860 return false;
5861
5862 Width = (*LdSt.memoperands_begin())->getSize();
5863 Offset = LdSt.getOperand(1).getImm();
5864 BaseReg = &LdSt.getOperand(2);
5865 return true;
5866}
5867
5869 const MachineInstr &MIa, const MachineInstr &MIb) const {
5870 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5871 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5872
5875 return false;
5876
5877 // Retrieve the base register, offset from the base register and width. Width
5878 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5879 // base registers are identical, and the offset of a lower memory access +
5880 // the width doesn't overlap the offset of a higher memory access,
5881 // then the memory accesses are different.
5883 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
5884 int64_t OffsetA = 0, OffsetB = 0;
5886 WidthB = LocationSize::precise(0);
5887 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
5888 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
5889 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
5890 int LowOffset = std::min(OffsetA, OffsetB);
5891 int HighOffset = std::max(OffsetA, OffsetB);
5892 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5893 if (LowWidth.hasValue() &&
5894 LowOffset + (int)LowWidth.getValue() <= HighOffset)
5895 return true;
5896 }
5897 }
5898 return false;
5899}
5900
5901// Expands LWAT_CSNE_PSEUDO/LDAT_CSNE_PSEUDO post register allocation.
5902// lwat/ldat FC=16 requires 3 consecutive registers. X8/X9/X10 are
5903// hardcoded post-RA to satisfy this constraint without a dedicated
5904// register class.
5906 MachineBasicBlock &MBB = *MI.getParent();
5907 DebugLoc DL = MI.getDebugLoc();
5908 bool IsLDAT = MI.getOpcode() == PPC::LDAT_CSNE_PSEUDO;
5909
5910 Register DstReg = MI.getOperand(0).getReg();
5911 Register PtrReg = MI.getOperand(1).getReg();
5912
5913 Register ScratchReg = PtrReg;
5914 if (PtrReg == PPC::X8 || PtrReg == PPC::X9 || PtrReg == PPC::X10) {
5915 // If ptr is in X8/X9/X10, use $dst as scratch to move ptr away from
5916 // X8/X9/X10 since lwat FC=16 always writes its result to X8. After lwat
5917 // copy X8 into $dst.
5918 Register DstReg64 = IsLDAT ? DstReg
5919 : Register(getRegisterInfo().getMatchingSuperReg(
5920 DstReg, PPC::sub_32, &PPC::G8RCRegClass));
5921 BuildMI(MBB, MI, DL, get(PPC::OR8), DstReg64).addReg(PtrReg).addReg(PtrReg);
5922 ScratchReg = DstReg64;
5923 }
5924
5925 BuildMI(MBB, MI, DL, get(IsLDAT ? PPC::LDAT_CSNE : PPC::LWAT_CSNE), PPC::X8)
5926 .addReg(ScratchReg)
5927 .addImm(16)
5928 .addReg(PPC::X9, RegState::Implicit)
5929 .addReg(PPC::X10, RegState::Implicit);
5930
5931 if (DstReg != (IsLDAT ? PPC::X8 : PPC::R8)) {
5932 BuildMI(MBB, MI, DL, get(IsLDAT ? PPC::OR8 : PPC::OR), DstReg)
5933 .addReg(IsLDAT ? PPC::X8 : PPC::R8)
5934 .addReg(IsLDAT ? PPC::X8 : PPC::R8);
5935 }
5936 MI.eraseFromParent();
5937 return true;
5938}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis false
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t IntrinsicInst * II
static bool isOpZeroOfSubwordPreincLoad(int Opcode)
static bool MBBDefinesCTR(MachineBasicBlock &MBB)
static bool definedByZeroExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< float > FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), cl::desc("register pressure factor for the transformations."))
#define InfoArrayIdxMULOpIdx
static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, unsigned TrueReg, unsigned FalseReg, unsigned CRSubReg)
static unsigned getCRBitValue(unsigned CRBit)
static bool isAnImmediateOperand(const MachineOperand &MO)
static const uint16_t FMAOpIdxInfo[][6]
static cl::opt< bool > DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops"))
#define InfoArrayIdxAddOpIdx
static cl::opt< bool > UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation"))
#define InfoArrayIdxFMAInst
static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, const PPCSubtarget &Subtarget)
static cl::opt< bool > EnableFMARegPressureReduction("ppc-fma-rp-reduction", cl::Hidden, cl::init(true), cl::desc("enable register pressure reduce in machine combiner pass."))
static bool isLdStSafeToCluster(const MachineInstr &LdSt, const TargetRegisterInfo *TRI)
const unsigned MAX_BINOP_DEPTH
static cl::opt< bool > DisableCmpOpt("disable-ppc-cmp-opt", cl::desc("Disable compare instruction optimization"), cl::Hidden)
#define InfoArrayIdxFSubInst
#define InfoArrayIdxFAddInst
#define InfoArrayIdxFMULInst
static bool definedBySignExtendingOp(const unsigned Reg, const MachineRegisterInfo *MRI)
static cl::opt< bool > VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden)
static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool isPhysical(const MachineOperand &MO)
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
void changeSign()
Definition APFloat.h:1352
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt rotl(unsigned rotateAmt) const
Rotate left by rotateAmt.
Definition APInt.cpp:1183
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
const BasicBlock & getEntryBlock() const
Definition Function.h:809
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141
Itinerary data supplied by a subtarget to be used by a target.
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
Class to represent integer types.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LLVM_ABI void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
void setOpcode(unsigned Op)
Definition MCInst.h:201
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineInstrBundleIterator< const MachineInstr, true > const_reverse_iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool hasImplicitDef() const
Returns true if the instruction has implicit definition.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void dump() const
LLVM_ABI void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
LLVM_ABI bool isLiveIn(Register Reg) const
static use_instr_iterator use_instr_end()
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based hazard recognizer for P...
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
bool isLiveInSExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and sign-extended.
bool isLiveInZExt(Register VReg) const
This function returns true if the specified vreg is a live-in register and zero-extended.
PPCHazardRecognizer970 - This class defines a finite state automata that models the dispatch logic on...
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for a fma chain ending in Root.
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase=nullptr) const
bool isReMaterializableImpl(const MachineInstr &MI) const override
PPCInstrInfo(const PPCSubtarget &STI)
const TargetRegisterClass * updatedRC(const TargetRegisterClass *RC) const
bool isPredicated(const MachineInstr &MI) const override
bool expandVSXMemPseudo(MachineInstr &MI) const
bool onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg) const
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void finalizeInsInstrs(MachineInstr &Root, unsigned &Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs) const override
Fixup the placeholders we put in genAlternativeCodeSequence() for MachineCombiner.
MCInst getNop() const override
Return the noop instruction to use for a noop.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static int getRecordFormOpcode(unsigned Opcode)
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const override
Commutes the operands in the given instruction.
bool isXFormMemOp(unsigned Opcode) const
const PPCRegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
CombinerObjective getCombinerObjective(unsigned Pattern) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
unsigned getStoreOpcodeForSpill(const TargetRegisterClass *RC) const
unsigned getLoadOpcodeForSpill(const TargetRegisterClass *RC) const
bool expandAMOCSNEPseudo(MachineInstr &MI) const
void promoteInstr32To64ForElimEXTSW(const Register &Reg, MachineRegisterInfo *MRI, unsigned BinOpDepth, LiveVariables *LV) const
bool isTOCSaveMI(const MachineInstr &MI) const
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer to use for this target when ...
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
bool isBDNZ(unsigned Opcode) const
Check Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
bool isZeroExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
std::pair< bool, bool > isSignOrZeroExtended(const unsigned Reg, const unsigned BinOpDepth, const MachineRegisterInfo *MRI) const
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, BranchProbability Probability) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
bool isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index, MachineInstr *&ADDIMI, int64_t &OffsetAddi, int64_t OffsetImm) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t Mask, int64_t Value, const MachineRegisterInfo *MRI) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
void materializeImmPostRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, int64_t Imm) const
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
Return true if two MIs access different memory addresses and false otherwise.
bool SubsumesPredicate(ArrayRef< MachineOperand > Pred1, ArrayRef< MachineOperand > Pred2) const override
ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override
CreateTargetHazardRecognizer - Return the hazard recognizer to use for this target when scheduling th...
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
Get the base operand and byte offset of an instruction that reads/writes memory.
void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const
void loadRegFromStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const
bool foldFrameOffset(MachineInstr &MI) const
bool isLoadFromConstantPool(MachineInstr *I) const
MachineInstr * findLoopInstr(MachineBasicBlock &PreHeader, SmallPtrSet< MachineBasicBlock *, 8 > &Visited) const
Find the hardware loop instruction used to set-up the specified loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
void storeRegToStackSlotNoUpd(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool convertToImmediateForm(MachineInstr &MI, SmallSet< Register, 4 > &RegsToUpdate, MachineInstr **KilledDef=nullptr) const
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, LocationSize &Width, const TargetRegisterInfo *TRI) const
Return true if get the base operand, byte offset of an instruction and the memory width.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
bool shouldReduceRegisterPressure(const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const override
On PowerPC, we leverage machine combiner pass to reduce register pressure when the register pressure ...
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
InstSizeVerifyMode getInstSizeVerifyMode(const MachineInstr &MI) const override
bool isSignExtended(const unsigned Reg, const MachineRegisterInfo *MRI) const
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, int64_t Imm) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Returns true if the two given memory operations should be scheduled adjacent.
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, unsigned &XFormOpcode, int64_t &OffsetOfImmInstr, ImmInstrInfo &III) const
bool PredicateInstruction(MachineInstr &MI, ArrayRef< MachineOperand > Pred) const override
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in <Root...
bool optimizeCmpPostRA(MachineInstr &MI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const Constant * getConstantFromConstantPool(MachineInstr *I) const
bool ClobbersPredicate(MachineInstr &MI, std::vector< MachineOperand > &Pred, bool SkipDead) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
bool instrHasImmForm(unsigned Opc, bool IsVFReg, ImmInstrInfo &III, bool PostRA) const
MachineInstr * getDefMIPostRA(unsigned Reg, MachineInstr &MI, bool &SeenIntermediateUse) const
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg)
bool isSVR4ABI() const
const PPCTargetMachine & getTargetMachine() const
void dump() const
Definition Pass.cpp:146
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
LLVM_ABI void recede(SmallVectorImpl< VRegMaskOrUnit > *LiveUses=nullptr)
Recede across the previous instruction.
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
LLVM_ABI void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
LLVM_ABI void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
const Triple & getTargetTriple() const
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
LLVM_ABI bool isLittleEndian() const
Tests whether the target triple is little endian.
Definition Triple.cpp:2439
bool isOSAIX() const
Tests whether the OS is AIX.
Definition Triple.h:770
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
PPCII - This namespace holds all of the PowerPC target-specific per-instruction flags.
@ MO_TOC_LO
Definition PPC.h:187
Define some predicates that are used for node matching.
Predicate getSwappedPredicate(Predicate Opcode)
Assume the condition register is set by MI(a,b), return the predicate if we modify the instructions s...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
unsigned getPredicateCondition(Predicate Opcode)
Return the condition without hint bits.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
unsigned getPredicateHint(Predicate Opcode)
Return the hint bits of the predicate.
Predicate InvertPredicate(Predicate Opcode)
Invert the specified predicate. != -> ==, < -> >=.
int32_t getNonRecordFormOpcode(uint32_t)
int32_t getAltVSXFMAOpcode(uint32_t Opcode)
static bool isVFRegister(MCRegister Reg)
template class LLVM_TEMPLATE_ABI opt< bool >
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static unsigned getCRFromCRBit(unsigned SrcReg)
constexpr RegState getDeadRegState(bool B)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
@ REASSOC_XY_BCA
@ REASSOC_XY_BAC
@ REASSOC_XY_AMM_BMM
@ REASSOC_XMM_AMM_BMM
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
@ SOK_CRBitSpill
@ SOK_VSXVectorSpill
@ SOK_SpillToVSR
@ SOK_Int4Spill
@ SOK_PairedVecSpill
@ SOK_VectorFloat8Spill
@ SOK_UAccumulatorSpill
@ SOK_PairedG8Spill
@ SOK_DMRSpill
@ SOK_VectorFloat4Spill
@ SOK_Float8Spill
@ SOK_Float4Spill
@ SOK_VRVectorSpill
@ SOK_WAccumulatorSpill
@ SOK_SPESpill
@ SOK_CRSpill
@ SOK_AccumulatorSpill
@ SOK_Int8Spill
@ SOK_LastOpcodeSpill
@ SOK_DMRpSpill
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t IsSummingOperands
uint64_t OpNoForForwarding
uint64_t ImmMustBeMultipleOf
uint64_t ZeroIsSpecialNew
uint64_t ZeroIsSpecialOrig
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.