LLVM 22.0.0git
AMDGPURewriteAGPRCopyMFMA.cpp
Go to the documentation of this file.
1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
42
43namespace {
44
45STATISTIC(NumMFMAsRewrittenToAGPR,
46 "Number of MFMA instructions rewritten to use AGPR form");
47
48/// Map from spill slot frame index to list of instructions which reference it.
49using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
50
51class AMDGPURewriteAGPRCopyMFMAImpl {
53 const GCNSubtarget &ST;
54 const SIInstrInfo &TII;
55 const SIRegisterInfo &TRI;
57 VirtRegMap &VRM;
58 LiveRegMatrix &LRM;
59 LiveIntervals &LIS;
60 LiveStacks &LSS;
61 const RegisterClassInfo &RegClassInfo;
62
63 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
64 MCPhysReg PrefPhysReg) const;
65
66public:
67 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
69 LiveStacks &LSS,
70 const RegisterClassInfo &RegClassInfo)
71 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
72 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
73 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
74
75 bool isRewriteCandidate(const MachineInstr &MI) const {
76 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
77 }
78
79 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
80 /// already required to be AGPR).
81 ///
82 /// \return the assigned physical register that \p VReg is assigned to if it
83 /// is an AGPR, otherwise MCRegister().
84 MCRegister getAssignedAGPR(Register VReg) const {
85 MCRegister PhysReg = VRM.getPhys(VReg);
86 if (!PhysReg)
87 return MCRegister();
88
89 // If this is an AV register, we have to check if the actual assignment is
90 // to an AGPR
91 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
92 return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
93 }
94
95 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
96 MCPhysReg PhysRegHint) const;
97
98 /// Compute the register class constraints based on the uses of \p Reg,
99 /// excluding MFMA uses from which can be rewritten to change the register
100 /// class constraint. MFMA scale operands need to be constraint checked.
101 /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
102
103 /// \p RewriteCandidates will collect the set of MFMA instructions that need
104 /// to have the opcode mutated to perform the replacement.
105 ///
106 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
107 /// that need to have the register classes adjusted.
108 bool recomputeRegClassExceptRewritable(
109 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
110 SmallSetVector<Register, 4> &RewriteRegs) const;
111
112 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
113 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
114
115 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
116 /// with a COPY to the replacement register value \p VReg.
117 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
118 Register VReg) const;
119
120 /// Create a map from frame index to use instructions for spills. If a use of
121 /// the frame index does not consist only of spill instructions, it will not
122 /// be included in the map.
123 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
124 SpillReferenceMap &Map) const;
125
126 /// Attempt to unspill VGPRs by finding a free register and replacing the
127 /// spill instructions with copies.
128 void eliminateSpillsOfReassignedVGPRs() const;
129
130 bool run(MachineFunction &MF) const;
131};
132
133bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
134 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
135 SmallSetVector<Register, 4> &RewriteRegs) const {
136 SmallVector<Register, 8> Worklist = {StartReg};
137
138 // Recursively visit all transitive MFMA users
139 while (!Worklist.empty()) {
140 Register Reg = Worklist.pop_back_val();
141 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
142
143 // Inflate to the equivalent AV_* class.
144 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(OldRC, MF);
145 if (OldRC == NewRC)
146 return false;
147
148 // Accumulate constraints from all uses.
149 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
150 // Apply the effect of the given operand to NewRC.
151 MachineInstr *MI = MO.getParent();
152
153 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
154 // effects of rewrite candidates. It just so happens that we can use
155 // either AGPR or VGPR in src0/src1. We still need to check constraint
156 // effects for scale variant, which does not allow AGPR.
157 if (isRewriteCandidate(*MI)) {
158 int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());
159 const MCInstrDesc &AGPRDesc = TII.get(AGPROp);
160 const TargetRegisterClass *NewRC =
161 TII.getRegClass(AGPRDesc, MO.getOperandNo());
162 if (!TRI.hasAGPRs(NewRC))
163 return false;
164
165 const MachineOperand *VDst =
166 TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
167 const MachineOperand *Src2 =
168 TII.getNamedOperand(*MI, AMDGPU::OpName::src2);
169 for (const MachineOperand *Op : {VDst, Src2}) {
170 if (!Op->isReg())
171 continue;
172
173 Register OtherReg = Op->getReg();
174 if (OtherReg.isPhysical())
175 return false;
176
177 if (OtherReg != Reg && RewriteRegs.insert(OtherReg))
178 Worklist.push_back(OtherReg);
179 }
180
181 if (!is_contained(RewriteCandidates, MI)) {
182 LLVM_DEBUG({
183 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
184 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
185 << " Dst=[" << printReg(VDst->getReg()) << " => "
186 << printReg(VDstPhysReg, &TRI);
187
188 if (Src2->isReg()) {
189 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
190 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
191 << printReg(Src2PhysReg, &TRI);
192 }
193
194 dbgs() << "]: " << MI;
195 });
196
197 RewriteCandidates.push_back(MI);
198 }
199
200 continue;
201 }
202
203 unsigned OpNo = &MO - &MI->getOperand(0);
204 NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, &TII, &TRI);
205 if (!NewRC || NewRC == OldRC) {
206 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
207 << " cannot be reassigned to "
208 << TRI.getRegClassName(NewRC) << ": " << *MI);
209 return false;
210 }
211 }
212 }
213
214 return true;
215}
216
217bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
218 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
219 // src2 and dst have the same physical class constraint; try to preserve
220 // the original src2 subclass if one were to exist.
221 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
222 SmallSetVector<Register, 4> RewriteRegs;
223
224 // Make sure we reassign the MFMA we found the copy from first. We want
225 // to ensure dst ends up in the physreg we were originally copying to.
226 RewriteRegs.insert(MFMAHintReg);
227
228 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
229 // that we can trivially rewrite src2 to use the new AGPR. If we can't
230 // trivially replace it, we're going to induce as many copies as we would have
231 // emitted in the first place, as well as need to assign another register, and
232 // need to figure out where to put them. The live range splitting is smarter
233 // than anything we're doing here, so trust it did something reasonable.
234 //
235 // Note recomputeRegClassExceptRewritable will consider the constraints of
236 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
237 if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
238 RewriteRegs)) {
239 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
240 << printReg(MFMAHintReg, &TRI) << '\n');
241 return false;
242 }
243
244 // If src2 and dst are different registers, we need to also reassign the
245 // input to an available AGPR if it is compatible with all other uses.
246 //
247 // If we can't reassign it, we'd need to introduce a different copy
248 // which is likely worse than the copy we'd be saving.
249 //
250 // It's likely that the MFMA is used in sequence with other MFMAs; if we
251 // cannot migrate the full use/def chain of MFMAs, we would need to
252 // introduce intermediate copies somewhere. So we only make the
253 // transform if all the interfering MFMAs can also be migrated. Collect
254 // the set of rewritable MFMAs and check if we can assign an AGPR at
255 // that point.
256 //
257 // If any of the MFMAs aren't reassignable, we give up and rollback to
258 // the original register assignments.
259
260 using RecoloringStack =
262 RecoloringStack TentativeReassignments;
263
264 for (Register RewriteReg : RewriteRegs) {
265 LiveInterval &LI = LIS.getInterval(RewriteReg);
266 TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
267 LRM.unassign(LI);
268 }
269
270 if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
271 // Roll back the register assignments to the original state.
272 for (auto [LI, OldAssign] : TentativeReassignments) {
273 if (VRM.hasPhys(LI->reg()))
274 LRM.unassign(*LI);
275 LRM.assign(*LI, OldAssign);
276 }
277
278 return false;
279 }
280
281 // Fixup the register classes of the virtual registers now that we've
282 // committed to the reassignments.
283 for (Register InterferingReg : RewriteRegs) {
284 const TargetRegisterClass *EquivalentAGPRRegClass =
285 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
286 MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
287 }
288
289 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
290 int NewMFMAOp =
291 AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
292 RewriteCandidate->setDesc(TII.get(NewMFMAOp));
293 ++NumMFMAsRewrittenToAGPR;
294 }
295
296 return true;
297}
298
299/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
300/// preference to use \p PhysReg first. Returns false if the reassignments
301/// cannot be trivially performed.
302bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
303 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
304 // FIXME: The ordering may matter here, but we're just taking uselistorder
305 // with the special case of ensuring to process the starting instruction
306 // first. We probably should extract the priority advisor out of greedy and
307 // use that ordering.
308 for (Register InterferingReg : InterferingRegs) {
309 LiveInterval &ReassignLI = LIS.getInterval(InterferingReg);
310 const TargetRegisterClass *EquivalentAGPRRegClass =
311 TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
312
313 MCPhysReg Assignable = AMDGPU::NoRegister;
314 if (EquivalentAGPRRegClass->contains(PrefPhysReg) &&
315 LRM.checkInterference(ReassignLI, PrefPhysReg) ==
317 // First try to assign to the AGPR we were already copying to. This
318 // should be the first assignment we attempt. We have to guard
319 // against the use being a subregister (which doesn't have an exact
320 // class match).
321
322 // TODO: If this does happen to be a subregister use, we should
323 // still try to assign to a subregister of the original copy result.
324 Assignable = PrefPhysReg;
325 } else {
326 ArrayRef<MCPhysReg> AllocOrder =
327 RegClassInfo.getOrder(EquivalentAGPRRegClass);
328 for (MCPhysReg Reg : AllocOrder) {
329 if (LRM.checkInterference(ReassignLI, Reg) == LiveRegMatrix::IK_Free) {
330 Assignable = Reg;
331 break;
332 }
333 }
334 }
335
336 if (!Assignable) {
337 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
338 << printReg(InterferingReg, &TRI)
339 << " to a free AGPR\n");
340 return false;
341 }
342
343 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
344 << " to " << printReg(Assignable, &TRI) << '\n');
345 LRM.assign(ReassignLI, Assignable);
346 }
347
348 return true;
349}
350
351/// Identify copies that look like:
352/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
353/// %agpr = COPY %vgpr
354///
355/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
356/// versions of the MFMA. This should cover the common case.
357bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
358 Register VReg, MCRegister AssignedAGPR) const {
359 bool MadeChange = false;
360 for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
361 if (!UseMI.isCopy())
362 continue;
363
364 Register CopySrcReg = UseMI.getOperand(1).getReg();
365 if (!CopySrcReg.isVirtual())
366 continue;
367
368 // TODO: Handle loop phis copied to AGPR. e.g.
369 //
370 // loop:
371 // %phi:vgpr = COPY %mfma:vgpr
372 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
373 // s_cbranch_vccnz loop
374 //
375 // endloop:
376 // %agpr = mfma
377 //
378 // We need to be sure that %phi is assigned to the same physical register as
379 // %mfma, or else we will just be moving copies into the loop.
380
381 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
382 if (isRewriteCandidate(CopySrcDefMI) &&
383 tryReassigningMFMAChain(
384 CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
385 MadeChange = true;
386 }
387 }
388
389 return MadeChange;
390}
391
392/// Identify copies that look like:
393/// %src:vgpr = COPY %src:agpr
394/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
395///
396/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
397/// versions of the MFMA. This should cover rarer cases, and will generally be
398/// redundant with tryFoldCopiesToAGPR.
399bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
400 Register VReg, MCRegister AssignedAGPR) const {
401 bool MadeChange = false;
402 for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
403 if (!UseMI.isCopy())
404 continue;
405
406 Register CopyDstReg = UseMI.getOperand(0).getReg();
407 if (!CopyDstReg.isVirtual())
408 continue;
409 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
410 if (!CopyUseMO.readsReg())
411 continue;
412
413 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
414 if (isRewriteCandidate(CopyUseMI)) {
415 if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
416 VRM.getPhys(CopyDstReg)))
417 MadeChange = true;
418 }
419 }
420 }
421
422 return MadeChange;
423}
424
425void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
426 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
427 const DebugLoc &DL = SpillMI.getDebugLoc();
428 MachineBasicBlock &MBB = *SpillMI.getParent();
429 MachineInstr *NewCopy;
430 if (SpillMI.mayStore()) {
431 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY), VReg)
432 .add(SpillMI.getOperand(0));
433 } else {
434 NewCopy = BuildMI(MBB, SpillMI, DL, TII.get(TargetOpcode::COPY))
435 .add(SpillMI.getOperand(0))
436 .addReg(VReg);
437 }
438
439 LIS.ReplaceMachineInstrInMaps(SpillMI, *NewCopy);
440 SpillMI.eraseFromParent();
441}
442
443void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
444 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
445
446 SmallSet<int, 4> NeededFrameIndexes;
447 for (const LiveInterval *LI : StackIntervals)
448 NeededFrameIndexes.insert(LI->reg().stackSlotIndex());
449
450 for (MachineBasicBlock &MBB : MF) {
451 for (MachineInstr &MI : MBB) {
452 for (MachineOperand &MO : MI.operands()) {
453 if (!MO.isFI() || !NeededFrameIndexes.count(MO.getIndex()))
454 continue;
455
456 if (TII.isVGPRSpill(MI)) {
457 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
458 References.push_back(&MI);
459 break;
460 }
461
462 // Verify this was really a spill instruction, if it's not just ignore
463 // all uses.
464
465 // TODO: This should probably be verifier enforced.
466 NeededFrameIndexes.erase(MO.getIndex());
467 Map.erase(MO.getIndex());
468 }
469 }
470 }
471}
472
473void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
474 unsigned NumSlots = LSS.getNumIntervals();
475 if (NumSlots == 0)
476 return;
477
478 MachineFrameInfo &MFI = MF.getFrameInfo();
479
480 SmallVector<LiveInterval *, 32> StackIntervals;
481 StackIntervals.reserve(NumSlots);
482
483 for (auto &[Slot, LI] : LSS) {
484 if (!MFI.isSpillSlotObjectIndex(Slot) || MFI.isDeadObjectIndex(Slot))
485 continue;
486
487 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
488 if (TRI.hasVGPRs(RC))
489 StackIntervals.push_back(&LI);
490 }
491
492 sort(StackIntervals, [](const LiveInterval *A, const LiveInterval *B) {
493 // The ordering has to be strictly weak.
494 /// Sort heaviest intervals first to prioritize their unspilling
495 if (A->weight() != B->weight())
496 return A->weight() > B->weight();
497
498 if (A->getSize() != B->getSize())
499 return A->getSize() > B->getSize();
500
501 // Tie breaker by number to avoid need for stable sort
502 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
503 });
504
505 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
506 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
507 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
508 // and must create a substitute virtual register to do so. This makes
509 // incremental updating here difficult; we need to actually perform the IR
510 // mutation to get the new vreg references in place to compute the register
511 // LiveInterval to perform an assignment to track the new interference
512 // correctly, and we can't simply migrate the LiveInterval we already have.
513 //
514 // To avoid walking through the entire function for each index, pre-collect
515 // all the instructions slot referencess.
516
518 collectSpillIndexUses(StackIntervals, SpillSlotReferences);
519
520 for (LiveInterval *LI : StackIntervals) {
521 int Slot = LI->reg().stackSlotIndex();
522 auto SpillReferences = SpillSlotReferences.find(Slot);
523 if (SpillReferences == SpillSlotReferences.end())
524 continue;
525
526 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
527
528 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
529 << " by reassigning\n");
530
531 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
532
533 for (MCPhysReg PhysReg : AllocOrder) {
534 if (LRM.checkInterference(*LI, PhysReg) != LiveRegMatrix::IK_Free)
535 continue;
536
537 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
538 << printReg(PhysReg, &TRI) << '\n');
539
540 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
541 Register NewVReg = MRI.createVirtualRegister(RC);
542
543 for (MachineInstr *SpillMI : SpillReferences->second)
544 replaceSpillWithCopyToVReg(*SpillMI, Slot, NewVReg);
545
546 // TODO: We should be able to transfer the information from the stack
547 // slot's LiveInterval without recomputing from scratch with the
548 // replacement vreg uses.
549 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(NewVReg);
550 VRM.grow();
551 LRM.assign(NewLI, PhysReg);
552 MFI.RemoveStackObject(Slot);
553 break;
554 }
555 }
556}
557
558bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
559 // This only applies on subtargets that have a configurable AGPR vs. VGPR
560 // allocation.
561 if (!ST.hasGFX90AInsts())
562 return false;
563
564 // Early exit if no AGPRs were assigned.
565 if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
566 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
567 return false;
568 }
569
570 bool MadeChange = false;
571
572 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
574 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
575 if (!AssignedAGPR)
576 continue;
577
578 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
579 MadeChange = true;
580 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
581 MadeChange = true;
582 }
583
584 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
585 // pressure. See if we can eliminate some spills now that those registers are
586 // more available.
587 if (MadeChange)
588 eliminateSpillsOfReassignedVGPRs();
589
590 return MadeChange;
591}
592
593class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
594public:
595 static char ID;
596 RegisterClassInfo RegClassInfo;
597
598 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
601 }
602
603 bool runOnMachineFunction(MachineFunction &MF) override;
604
605 StringRef getPassName() const override {
606 return "AMDGPU Rewrite AGPR-Copy-MFMA";
607 }
608
609 void getAnalysisUsage(AnalysisUsage &AU) const override {
614
619
620 AU.setPreservesAll();
622 }
623};
624
625} // End anonymous namespace.
626
627INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
628 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
633INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
634 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
635
636char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
637
639 AMDGPURewriteAGPRCopyMFMALegacy::ID;
640
641bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
642 MachineFunction &MF) {
643 if (skipFunction(MF.getFunction()))
644 return false;
645
646 RegClassInfo.runOnMachineFunction(MF);
647
648 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
649 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
650 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
651 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
652 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
653 return Impl.run(MF);
654}
655
659 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF);
662 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(MF);
663 RegisterClassInfo RegClassInfo;
664 RegClassInfo.runOnMachineFunction(MF);
665
666 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
667 if (!Impl.run(MF))
668 return PreservedAnalyses::all();
670 PA.preserveSet<CFGAnalyses>()
671 .preserve<LiveStacksAnalysis>()
672 .preserve<VirtRegMapAnalysis>()
673 .preserve<SlotIndexesAnalysis>()
674 .preserve<LiveIntervalsAnalysis>()
675 .preserve<LiveRegMatrixAnalysis>();
676 return PA;
677}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
AMDGPU Rewrite AGPR Copy MFMA
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Interface definition for SIRegisterInfo.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A debug info location.
Definition DebugLoc.h:123
bool hasGFX90AInsts() const
LiveInterval - This class represents the liveness of a register, or stack slot.
LiveInterval & getInterval(Register Reg)
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_Free
No interference, go ahead and assign.
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
unsigned getNumIntervals() const
Definition LiveStacks.h:59
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF, bool Rev=false)
runOnFunction - Prepare to answer questions about MF.
ArrayRef< MCPhysReg > getOrder(const TargetRegisterClass *RC) const
getOrder - Returns the preferred allocation order for RC.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
LLVM_ABI void grow()
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
char & AMDGPURewriteAGPRCopyMFMALegacyID
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.