LLVM 22.0.0git
AMDGPURewriteAGPRCopyMFMA.cpp
Go to the documentation of this file.
1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
42
43namespace {
44
45STATISTIC(NumMFMAsRewrittenToAGPR,
46 "Number of MFMA instructions rewritten to use AGPR form");
47
48/// Map from spill slot frame index to list of instructions which reference it.
49using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
50
51class AMDGPURewriteAGPRCopyMFMAImpl {
53 const GCNSubtarget &ST;
54 const SIInstrInfo &TII;
55 const SIRegisterInfo &TRI;
57 VirtRegMap &VRM;
58 LiveRegMatrix &LRM;
59 LiveIntervals &LIS;
60 LiveStacks &LSS;
61 const RegisterClassInfo &RegClassInfo;
62
63 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
64 MCPhysReg PrefPhysReg) const;
65
66public:
67 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
69 LiveStacks &LSS,
70 const RegisterClassInfo &RegClassInfo)
71 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
72 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
73 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
74
75 bool isRewriteCandidate(const MachineInstr &MI) const {
76 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
77 }
78
79 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
80 /// already required to be AGPR).
81 ///
82 /// \return the assigned physical register that \p VReg is assigned to if it
83 /// is an AGPR, otherwise MCRegister().
84 MCRegister getAssignedAGPR(Register VReg) const {
85 MCRegister PhysReg = VRM.getPhys(VReg);
86 if (!PhysReg)
87 return MCRegister();
88
89 // If this is an AV register, we have to check if the actual assignment is
90 // to an AGPR
91 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
92 return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
93 }
94
95 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
96 MCPhysReg PhysRegHint) const;
97
98 /// Compute the register class constraints based on the uses of \p Reg,
99 /// excluding MFMA uses from which can be rewritten to change the register
100 /// class constraint. This should be nearly identical to
101 /// MachineRegisterInfo::recomputeRegClass.
102
103 /// \p RewriteCandidates will collect the set of MFMA instructions that need
104 /// to have the opcode mutated to perform the replacement.
105 ///
106 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
107 /// that need to have the register classes adjusted.
108 bool recomputeRegClassExceptRewritable(
109 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
110 SmallSetVector<Register, 4> &RewriteRegs) const;
111
112 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
113 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
114
115 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
116 /// with a COPY to the replacement register value \p VReg.
117 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
118 Register VReg) const;
119
120 /// Create a map from frame index to use instructions for spills. If a use of
121 /// the frame index does not consist only of spill instructions, it will not
122 /// be included in the map.
123 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
124 SpillReferenceMap &Map) const;
125
126 /// Attempt to unspill VGPRs by finding a free register and replacing the
127 /// spill instructions with copies.
128 void eliminateSpillsOfReassignedVGPRs() const;
129
130 bool run(MachineFunction &MF) const;
131};
132
133bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
134 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
135 SmallSetVector<Register, 4> &RewriteRegs) const {
136 SmallVector<Register, 8> Worklist = {StartReg};
137
138 // Recursively visit all transitive MFMA users
139 while (!Worklist.empty()) {
140 Register Reg = Worklist.pop_back_val();
141 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
142
143 // Inflate to the equivalent AV_* class.
144 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
145 if (OldRC == NewRC)
146 return false;
147
148 // Accumulate constraints from all uses.
149 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
150 // Apply the effect of the given operand to NewRC.
151 MachineInstr *MI = MO.getParent();
152
153 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
154 // effects of rewrite candidates. It just so happens that we can use
155 // either AGPR or VGPR in src0/src1, so don't bother checking the
156 // constraint effects of the individual operands.
157 if (isRewriteCandidate(*MI)) {
158 const MachineOperand *VDst =
159 TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
160 const MachineOperand *Src2 =
161 TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
162 for (const MachineOperand *Op : {VDst, Src2}) {
163 if (!Op->isReg())
164 continue;
165
166 Register OtherReg = Op->getReg();
167 if (OtherReg.isPhysical())
168 return false;
169
170 if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
171 Worklist.push_back(OtherReg);
172 }
173
174 if (!is_contained(RewriteCandidates, MI)) {
175 LLVM_DEBUG({
176 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
177 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
178 << " Dst=[" << printReg(VDst->getReg()) << " => "
179 << printReg(VDstPhysReg, &TRI);
180
181 if (Src2->isReg()) {
182 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
183 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
184 << printReg(Src2PhysReg, &TRI);
185 }
186
187 dbgs() << "]: " << MI;
188 });
189
190 RewriteCandidates.push_back(MI);
191 }
192
193 continue;
194 }
195
196 unsigned OpNo = &MO - &MI->getOperand(0);
197 NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
198 if (!NewRC || NewRC == OldRC) {
199 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
200 << " cannot be reassigned to "
201 << TRI.getRegClassName(NewRC) << ": " << *MI);
202 return false;
203 }
204 }
205 }
206
207 return true;
208}
209
210bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
211 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
212 // src2 and dst have the same physical class constraint; try to preserve
213 // the original src2 subclass if one were to exist.
214 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
215 SmallSetVector<Register, 4> RewriteRegs;
216
217 // Make sure we reassign the MFMA we found the copy from first. We want
218 // to ensure dst ends up in the physreg we were originally copying to.
219 RewriteRegs.insert(MFMAHintReg);
220
221 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
222 // that we can trivially rewrite src2 to use the new AGPR. If we can't
223 // trivially replace it, we're going to induce as many copies as we would have
224 // emitted in the first place, as well as need to assign another register, and
225 // need to figure out where to put them. The live range splitting is smarter
226 // than anything we're doing here, so trust it did something reasonable.
227 //
228 // Note recomputeRegClassExceptRewritable will consider the constraints of
229 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
230 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
231 RewriteRegs)) {
232 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
233 << printReg(MFMAHintReg, &TRI) << '\n');
234 return false;
235 }
236
237 // If src2 and dst are different registers, we need to also reassign the
238 // input to an available AGPR if it is compatible with all other uses.
239 //
240 // If we can't reassign it, we'd need to introduce a different copy
241 // which is likely worse than the copy we'd be saving.
242 //
243 // It's likely that the MFMA is used in sequence with other MFMAs; if we
244 // cannot migrate the full use/def chain of MFMAs, we would need to
245 // introduce intermediate copies somewhere. So we only make the
246 // transform if all the interfering MFMAs can also be migrated. Collect
247 // the set of rewritable MFMAs and check if we can assign an AGPR at
248 // that point.
249 //
250 // If any of the MFMAs aren't reassignable, we give up and rollback to
251 // the original register assignments.
252
253 using RecoloringStack =
255 RecoloringStack TentativeReassignments;
256
257 for (Register RewriteReg : RewriteRegs) {
258 LiveInterval &LI = LIS.getInterval(RewriteReg);
259 TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
260 LRM.unassign(LI);
261 }
262
263 if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
264 // Roll back the register assignments to the original state.
265 for (auto [LI, OldAssign] : TentativeReassignments) {
266 if (VRM.hasPhys(LI->reg()))
267 LRM.unassign(*LI);
268 LRM.assign(*LI, OldAssign);
269 }
270
271 return false;
272 }
273
274 // Fixup the register classes of the virtual registers now that we've
275 // committed to the reassignments.
276 for (Register InterferingReg : RewriteRegs) {
277 const TargetRegisterClass *EquivalentAGPRRegClass =
278 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
279 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
280 }
281
282 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
283 int NewMFMAOp =
284 AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
285 RewriteCandidate->setDesc(TII.get(NewMFMAOp));
286 ++NumMFMAsRewrittenToAGPR;
287 }
288
289 return true;
290}
291
292/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
293/// preference to use \p PhysReg first. Returns false if the reassignments
294/// cannot be trivially performed.
295bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
296 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
297 // FIXME: The ordering may matter here, but we're just taking uselistorder
298 // with the special case of ensuring to process the starting instruction
299 // first. We probably should extract the priority advisor out of greedy and
300 // use that ordering.
301 for (Register InterferingReg : InterferingRegs) {
302 LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
303 const TargetRegisterClass *EquivalentAGPRRegClass =
304 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
305
306 MCPhysReg Assignable = AMDGPU::NoRegister;
307 if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
308 LRM.checkInterference(ReassignLI, PrefPhysReg) ==
310 // First try to assign to the AGPR we were already copying to. This
311 // should be the first assignment we attempt. We have to guard
312 // against the use being a subregister (which doesn't have an exact
313 // class match).
314
315 // TODO: If this does happen to be a subregister use, we should
316 // still try to assign to a subregister of the original copy result.
317 Assignable = PrefPhysReg;
318 } else {
319 ArrayRef<MCPhysReg> AllocOrder =
320 RegClassInfo.getOrder(EquivalentAGPRRegClass);
321 for (MCPhysReg Reg : AllocOrder) {
322 if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
323 Assignable = Reg;
324 break;
325 }
326 }
327 }
328
329 if (!Assignable) {
330 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
331 << printReg(InterferingReg, &TRI)
332 << " to a free AGPR\n");
333 return false;
334 }
335
336 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
337 << " to " << printReg(Assignable, &TRI) << '\n');
338 LRM.assign(ReassignLI, Assignable);
339 }
340
341 return true;
342}
343
344/// Identify copies that look like:
345/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
346/// %agpr = COPY %vgpr
347///
348/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
349/// versions of the MFMA. This should cover the common case.
350bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
351 Register VReg, MCRegister AssignedAGPR) const {
352 bool MadeChange = false;
353 for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
354 if (!UseMI.isCopy())
355 continue;
356
357 Register CopySrcReg = UseMI.getOperand(1).getReg();
358 if (!CopySrcReg.isVirtual())
359 continue;
360
361 // TODO: Handle loop phis copied to AGPR. e.g.
362 //
363 // loop:
364 // %phi:vgpr = COPY %mfma:vgpr
365 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
366 // s_cbranch_vccnz loop
367 //
368 // endloop:
369 // %agpr = mfma
370 //
371 // We need to be sure that %phi is assigned to the same physical register as
372 // %mfma, or else we will just be moving copies into the loop.
373
374 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
375 if (isRewriteCandidate(CopySrcDefMI) &&
376 tryReassigningMFMAChain(
377 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
378 MadeChange = true;
379 }
380 }
381
382 return MadeChange;
383}
384
385/// Identify copies that look like:
386/// %src:vgpr = COPY %src:agpr
387/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
388///
389/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
390/// versions of the MFMA. This should cover rarer cases, and will generally be
391/// redundant with tryFoldCopiesToAGPR.
392bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
393 Register VReg, MCRegister AssignedAGPR) const {
394 bool MadeChange = false;
395 for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
396 if (!UseMI.isCopy())
397 continue;
398
399 Register CopyDstReg = UseMI.getOperand(0).getReg();
400 if (!CopyDstReg.isVirtual())
401 continue;
402 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
403 if (!CopyUseMO.readsReg())
404 continue;
405
406 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
407 if (isRewriteCandidate(CopyUseMI)) {
408 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
409 VRM.getPhys(CopyDstReg)))
410 MadeChange = true;
411 }
412 }
413 }
414
415 return MadeChange;
416}
417
418void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
419 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
420 const DebugLoc &DL = SpillMI.getDebugLoc();
421 MachineBasicBlock &MBB = *SpillMI.getParent();
422 MachineInstr *NewCopy;
423 if (SpillMI.mayStore()) {
424 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)
425 .add(SpillMI.getOperand(0));
426 } else {
427 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))
428 .add(SpillMI.getOperand(0))
429 .addReg(VReg);
430 }
431
432 LIS.ReplaceMachineInstrInMaps(SpillMI, *NewCopy);
433 SpillMI.eraseFromParent();
434}
435
436void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
437 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
438
439 SmallSet<int, 4> NeededFrameIndexes;
440 for (const LiveInterval *LI : StackIntervals)
441 NeededFrameIndexes.insert(LI->reg().stackSlotIndex());
442
443 for (MachineBasicBlock &MBB : MF) {
444 for (MachineInstr &MI : MBB) {
445 for (MachineOperand &MO : MI.operands()) {
446 if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))
447 continue;
448
449 if (TII.isVGPRSpill(MI)) {
450 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
451 References.push_back(&MI);
452 break;
453 }
454
455 // Verify this was really a spill instruction, if it's not just ignore
456 // all uses.
457
458 // TODO: This should probably be verifier enforced.
459 NeededFrameIndexes.erase(MO.getIndex());
460 Map.erase(MO.getIndex());
461 }
462 }
463 }
464}
465
466void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
467 unsigned NumSlots = LSS.getNumIntervals();
468 if (NumSlots == 0)
469 return;
470
471 MachineFrameInfo &MFI = MF.getFrameInfo();
472
473 SmallVector<LiveInterval *, 32> StackIntervals;
474 StackIntervals.reserve(NumSlots);
475
476 for (auto &[Slot, LI] : LSS) {
477 if (!MFI.isSpillSlotObjectIndex(Slot) || MFI.isDeadObjectIndex(Slot))
478 continue;
479
480 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
481 if (TRI.hasVGPRs(RC))
482 StackIntervals.push_back(&LI);
483 }
484
485 sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
486 // The ordering has to be strictly weak.
487 /// Sort heaviest intervals first to prioritize their unspilling
488 if (A->weight() != B->weight())
489 return A->weight() > B->weight();
490
491 if (A->getSize() != B->getSize())
492 return A->getSize() > B->getSize();
493
494 // Tie breaker by number to avoid need for stable sort
495 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
496 });
497
498 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
499 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
500 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
501 // and must create a substitute virtual register to do so. This makes
502 // incremental updating here difficult; we need to actually perform the IR
503 // mutation to get the new vreg references in place to compute the register
504 // LiveInterval to perform an assignment to track the new interference
505 // correctly, and we can't simply migrate the LiveInterval we already have.
506 //
507 // To avoid walking through the entire function for each index, pre-collect
508 // all the instructions slot referencess.
509
511 collectSpillIndexUses(StackIntervals, SpillSlotReferences);
512
513 for (LiveInterval *LI : StackIntervals) {
514 int Slot = LI->reg().stackSlotIndex();
515 auto SpillReferences = SpillSlotReferences.find(Slot);
516 if (SpillReferences == SpillSlotReferences.end())
517 continue;
518
519 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
520
521 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
522 << " by reassigning\n");
523
524 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
525
526 for (MCPhysReg PhysReg : AllocOrder) {
527 if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free)
528 continue;
529
530 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
531 << printReg(PhysReg, &TRI) << '\n');
532
533 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
534 Register NewVReg = MRI.createVirtualRegister(RC);
535
536 for (MachineInstr *SpillMI : SpillReferences->second)
537 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);
538
539 // TODO: We should be able to transfer the information from the stack
540 // slot's LiveInterval without recomputing from scratch with the
541 // replacement vreg uses.
542 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewVReg);
543 VRM.grow();
544 LRM.assign(NewLI, PhysReg);
545 MFI.RemoveStackObject(Slot);
546 break;
547 }
548 }
549}
550
551bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
552 // This only applies on subtargets that have a configurable AGPR vs. VGPR
553 // allocation.
554 if (!ST.hasGFX90AInsts())
555 return false;
556
557 // Early exit if no AGPRs were assigned.
558 if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
559 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
560 return false;
561 }
562
563 bool MadeChange = false;
564
565 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
567 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
568 if (!AssignedAGPR)
569 continue;
570
571 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
572 MadeChange = true;
573 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
574 MadeChange = true;
575 }
576
577 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
578 // pressure. See if we can eliminate some spills now that those registers are
579 // more available.
580 if (MadeChange)
581 eliminateSpillsOfReassignedVGPRs();
582
583 return MadeChange;
584}
585
586class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
587public:
588 static char ID;
589 RegisterClassInfo RegClassInfo;
590
591 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
594 }
595
596 bool runOnMachineFunction(MachineFunction &MF) override;
597
598 StringRef getPassName() const override {
599 return "AMDGPU Rewrite AGPR-Copy-MFMA";
600 }
601
602 void getAnalysisUsage(AnalysisUsage &AU) const override {
607
612
613 AU.setPreservesAll();
615 }
616};
617
618} // End anonymous namespace.
619
620INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
621 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
626INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
627 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
628
629char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
630
632 AMDGPURewriteAGPRCopyMFMALegacy::ID;
633
634bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
635 MachineFunction &MF) {
636 if (skipFunction(MF.getFunction()))
637 return false;
638
639 RegClassInfo.runOnMachineFunction(MF);
640
641 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
642 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
643 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
644 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
645 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
646 return Impl.run(MF);
647}
648
652 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
655 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
656 RegisterClassInfo RegClassInfo;
657 RegClassInfo.runOnMachineFunction(MF);
658
659 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
660 if (!Impl.run(MF))
661 return PreservedAnalyses::all();
663 PA.preserveSet<CFGAnalyses>()
664 .preserve<LiveStacksAnalysis>()
665 .preserve<VirtRegMapAnalysis>()
666 .preserve<SlotIndexesAnalysis>()
667 .preserve<LiveIntervalsAnalysis>()
668 .preserve<LiveRegMatrixAnalysis>();
669 return PA;
670}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Rewrite AGPR Copy MFMA
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A debug info location.
Definition DebugLoc.h:124
bool hasGFX90AInsts() const
LiveInterval - This class represents the liveness of a register, or stack slot.
LiveInterval & getInterval(Register Reg)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_Free
No interference, go ahead and assign.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
unsigned getNumIntervals() const
Definition LiveStacks.h:59
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
LLVM_ABI void grow()
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
char & AMDGPURewriteAGPRCopyMFMALegacyID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.