41#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
46 "Number of MFMA instructions rewritten to use AGPR form");
51class AMDGPURewriteAGPRCopyMFMAImpl {
73 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
108 bool recomputeRegClassExceptRewritable(
117 void replaceSpillWithCopyToVReg(
MachineInstr &SpillMI,
int SpillFI,
124 SpillReferenceMap &Map)
const;
128 void eliminateSpillsOfReassignedVGPRs()
const;
133bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
139 while (!Worklist.
empty()) {
157 if (isRewriteCandidate(*
MI)) {
159 TII.getNamedOperand(*
MI, AMDGPU::OpName::vdst);
161 TII.getNamedOperand(*
MI, AMDGPU::OpName::src2);
170 if (OtherReg !=
Reg && RewriteRegs.
insert(OtherReg))
177 dbgs() <<
"Attempting to replace VGPR MFMA with AGPR version:"
196 unsigned OpNo = &MO - &
MI->getOperand(0);
197 NewRC =
MI->getRegClassConstraintEffect(OpNo, NewRC, &
TII, &
TRI);
198 if (!NewRC || NewRC == OldRC) {
200 <<
" cannot be reassigned to "
201 <<
TRI.getRegClassName(NewRC) <<
": " << *
MI);
210bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
219 RewriteRegs.
insert(MFMAHintReg);
230 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
232 LLVM_DEBUG(
dbgs() <<
"Could not recompute the regclass of dst reg "
253 using RecoloringStack =
255 RecoloringStack TentativeReassignments;
257 for (
Register RewriteReg : RewriteRegs) {
259 TentativeReassignments.push_back({&LI, VRM.
getPhys(RewriteReg)});
263 if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
265 for (
auto [LI, OldAssign] : TentativeReassignments) {
268 LRM.
assign(*LI, OldAssign);
276 for (
Register InterferingReg : RewriteRegs) {
278 TRI.getEquivalentAGPRClass(
MRI.getRegClass(InterferingReg));
279 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
282 for (
MachineInstr *RewriteCandidate : RewriteCandidates) {
285 RewriteCandidate->setDesc(
TII.get(NewMFMAOp));
286 ++NumMFMAsRewrittenToAGPR;
295bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
301 for (
Register InterferingReg : InterferingRegs) {
304 TRI.getEquivalentAGPRClass(
MRI.getRegClass(InterferingReg));
306 MCPhysReg Assignable = AMDGPU::NoRegister;
307 if (EquivalentAGPRRegClass->
contains(PrefPhysReg) &&
317 Assignable = PrefPhysReg;
320 RegClassInfo.
getOrder(EquivalentAGPRRegClass);
332 <<
" to a free AGPR\n");
338 LRM.
assign(ReassignLI, Assignable);
350bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
352 bool MadeChange =
false;
375 if (isRewriteCandidate(CopySrcDefMI) &&
376 tryReassigningMFMAChain(
377 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
392bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
394 bool MadeChange =
false;
403 if (!CopyUseMO.readsReg())
407 if (isRewriteCandidate(CopyUseMI)) {
408 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
418void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
436void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
441 NeededFrameIndexes.
insert(LI->reg().stackSlotIndex());
446 if (!MO.isFI() || !NeededFrameIndexes.
count(MO.getIndex()))
449 if (
TII.isVGPRSpill(
MI)) {
459 NeededFrameIndexes.
erase(MO.getIndex());
460 Map.erase(MO.getIndex());
466void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs()
const {
474 StackIntervals.
reserve(NumSlots);
476 for (
auto &[Slot, LI] : LSS) {
481 if (
TRI.hasVGPRs(RC))
488 if (
A->weight() !=
B->weight())
489 return A->weight() >
B->weight();
491 if (
A->getSize() !=
B->getSize())
492 return A->getSize() >
B->getSize();
495 return A->reg().stackSlotIndex() <
B->reg().stackSlotIndex();
511 collectSpillIndexUses(StackIntervals, SpillSlotReferences);
514 int Slot = LI->reg().stackSlotIndex();
515 auto SpillReferences = SpillSlotReferences.find(Slot);
516 if (SpillReferences == SpillSlotReferences.end())
522 <<
" by reassigning\n");
537 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);
544 LRM.
assign(NewLI, PhysReg);
559 LLVM_DEBUG(
dbgs() <<
"skipping function that did not allocate AGPRs\n");
563 bool MadeChange =
false;
565 for (
unsigned I = 0,
E =
MRI.getNumVirtRegs();
I !=
E; ++
I) {
567 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
571 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
573 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
581 eliminateSpillsOfReassignedVGPRs();
599 return "AMDGPU Rewrite AGPR-Copy-MFMA";
621 "AMDGPU Rewrite AGPR-Copy-MFMA",
false,
false)
629char AMDGPURewriteAGPRCopyMFMALegacy::
ID = 0;
632 AMDGPURewriteAGPRCopyMFMALegacy::
ID;
634bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
636 if (skipFunction(MF.getFunction()))
639 RegClassInfo.runOnMachineFunction(MF);
641 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
642 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
643 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
644 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
645 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
659 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
664 .preserve<LiveStacksAnalysis>()
666 .preserve<SlotIndexesAnalysis>()
668 .preserve<LiveRegMatrixAnalysis>();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Rewrite AGPR Copy MFMA
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
bool hasGFX90AInsts() const
LiveInterval - This class represents the liveness of a register, or stack slot.
LiveInterval & getInterval(Register Reg)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_Free
No interference, go ahead and assign.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
unsigned getNumIntervals() const
Wrapper class representing physical registers. Should be passed by value.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPURewriteAGPRCopyMFMALegacyID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.