LLVM 23.0.0git
GCNRegPressure.cpp
Go to the documentation of this file.
1//===- GCNRegPressure.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the GCNRegPressure class.
11///
12//===----------------------------------------------------------------------===//
13
14#include "GCNRegPressure.h"
15#include "AMDGPU.h"
20
21using namespace llvm;
22
23#define DEBUG_TYPE "machine-scheduler"
24
26 const GCNRPTracker::LiveRegSet &S2) {
27 if (S1.size() != S2.size())
28 return false;
29
30 for (const auto &P : S1) {
31 auto I = S2.find(P.first);
32 if (I == S2.end() || I->second != P.second)
33 return false;
34 }
35 return true;
36}
37
38///////////////////////////////////////////////////////////////////////////////
39// GCNRegPressure
40
42 const SIRegisterInfo *STI) {
43 return STI->isSGPRClass(RC)
44 ? SGPR
45 : (STI->isAGPRClass(RC)
46 ? AGPR
47 : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
48}
49
50void GCNRegPressure::inc(unsigned Reg,
51 LaneBitmask PrevMask,
52 LaneBitmask NewMask,
53 const MachineRegisterInfo &MRI) {
54 unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
55 unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
56 if (NewNumCoveredRegs == PrevNumCoveredRegs)
57 return;
58
59 int Sign = 1;
60 if (NewMask < PrevMask) {
61 std::swap(NewMask, PrevMask);
62 std::swap(NewNumCoveredRegs, PrevNumCoveredRegs);
63 Sign = -1;
64 }
65 assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
66 "prev mask should always be lesser than new");
67
68 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
70 const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
71 unsigned RegKind = getRegKind(RC, STI);
72 if (TRI->getRegSizeInBits(*RC) != 32) {
73 // Reg is from a tuple register class.
74 if (PrevMask.none()) {
75 unsigned TupleIdx = TOTAL_KINDS + RegKind;
76 Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
77 }
78 // Pressure scales with number of new registers covered by the new mask.
79 // Note when true16 is enabled, we can no longer safely use the following
80 // approach to calculate the difference in the number of 32-bit registers
81 // between two masks:
82 //
83 // Sign *= SIRegisterInfo::getNumCoveredRegs(~PrevMask & NewMask);
84 //
85 // The issue is that the mask calculation `~PrevMask & NewMask` doesn't
86 // properly account for partial usage of a 32-bit register when dealing with
87 // 16-bit registers.
88 //
89 // Consider this example:
90 // Assume PrevMask = 0b0010 and NewMask = 0b1111. Here, the correct register
91 // usage difference should be 1, because even though PrevMask uses only half
92 // of a 32-bit register, it should still be counted as a full register use.
93 // However, the mask calculation yields `~PrevMask & NewMask = 0b1101`, and
94 // calling `getNumCoveredRegs` returns 2 instead of 1. This incorrect
95 // calculation can lead to integer overflow when Sign = -1.
96 Sign *= NewNumCoveredRegs - PrevNumCoveredRegs;
97 }
98 Value[RegKind] += Sign;
99}
100
101namespace {
102struct RegExcess {
103 unsigned SGPR = 0;
104 unsigned VGPR = 0;
105 unsigned ArchVGPR = 0;
106 unsigned AGPR = 0;
107
108 bool anyExcess() const { return SGPR || VGPR || ArchVGPR || AGPR; }
109 bool hasVectorRegisterExcess() const { return VGPR || ArchVGPR || AGPR; }
110
111 RegExcess(const MachineFunction &MF, const GCNRegPressure &RP)
112 : RegExcess(MF, RP, GCNRPTarget(MF, RP)) {}
113 RegExcess(const MachineFunction &MF, const GCNRegPressure &RP,
114 const GCNRPTarget &Target) {
115 unsigned MaxSGPRs = Target.getMaxSGPRs();
116 unsigned MaxVGPRs = Target.getMaxVGPRs();
117
118 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
119 SGPR = std::max(static_cast<int>(RP.getSGPRNum() - MaxSGPRs), 0);
120
121 // The number of virtual VGPRs required to handle excess SGPR
122 unsigned WaveSize = ST.getWavefrontSize();
123 unsigned VGPRForSGPRSpills = divideCeil(SGPR, WaveSize);
124
125 unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
126
127 // Unified excess pressure conditions, accounting for VGPRs used for SGPR
128 // spills
129 VGPR = std::max(static_cast<int>(RP.getVGPRNum(ST.hasGFX90AInsts()) +
130 VGPRForSGPRSpills - MaxVGPRs),
131 0);
132
133 unsigned ArchVGPRLimit = ST.hasGFX90AInsts() ? MaxArchVGPRs : MaxVGPRs;
134 // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
135 // spills
136 ArchVGPR = std::max(static_cast<int>(RP.getArchVGPRNum() +
137 VGPRForSGPRSpills - ArchVGPRLimit),
138 0);
139
140 // AGPR excess pressure conditions
141 AGPR = std::max(static_cast<int>(RP.getAGPRNum() - ArchVGPRLimit), 0);
142 }
143};
144} // namespace
145
147 unsigned MaxOccupancy) const {
148 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
149 unsigned DynamicVGPRBlockSize =
150 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
151
152 const auto SGPROcc = std::min(MaxOccupancy,
153 ST.getOccupancyWithNumSGPRs(getSGPRNum()));
154 const auto VGPROcc = std::min(
155 MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
156 DynamicVGPRBlockSize));
157 const auto OtherSGPROcc = std::min(MaxOccupancy,
158 ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
159 const auto OtherVGPROcc =
160 std::min(MaxOccupancy,
161 ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
162 DynamicVGPRBlockSize));
163
164 const auto Occ = std::min(SGPROcc, VGPROcc);
165 const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
166
167 // Give first precedence to the better occupancy.
168 if (Occ != OtherOcc)
169 return Occ > OtherOcc;
170
171 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
172
173 RegExcess Excess(MF, *this);
174 RegExcess OtherExcess(MF, O);
175
176 unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
177
178 bool ExcessRP = Excess.anyExcess();
179 bool OtherExcessRP = OtherExcess.anyExcess();
180
181 // Give second precedence to the reduced number of spills to hold the register
182 // pressure.
183 if (ExcessRP || OtherExcessRP) {
184 // The difference in excess VGPR pressure, after including VGPRs used for
185 // SGPR spills
186 int VGPRDiff =
187 ((OtherExcess.VGPR + OtherExcess.ArchVGPR + OtherExcess.AGPR) -
188 (Excess.VGPR + Excess.ArchVGPR + Excess.AGPR));
189
190 int SGPRDiff = OtherExcess.SGPR - Excess.SGPR;
191
192 if (VGPRDiff != 0)
193 return VGPRDiff > 0;
194 if (SGPRDiff != 0) {
195 unsigned PureExcessVGPR =
196 std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
197 0) +
198 std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
199 unsigned OtherPureExcessVGPR =
200 std::max(
201 static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
202 0) +
203 std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
204
205 // If we have a special case where there is a tie in excess VGPR, but one
206 // of the pressures has VGPR usage from SGPR spills, prefer the pressure
207 // with SGPR spills.
208 if (PureExcessVGPR != OtherPureExcessVGPR)
209 return SGPRDiff < 0;
210 // If both pressures have the same excess pressure before and after
211 // accounting for SGPR spills, prefer fewer SGPR spills.
212 return SGPRDiff > 0;
213 }
214 }
215
216 bool SGPRImportant = SGPROcc < VGPROcc;
217 const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
218
219 // If both pressures disagree on what is more important compare vgprs.
220 if (SGPRImportant != OtherSGPRImportant) {
221 SGPRImportant = false;
222 }
223
224 // Give third precedence to lower register tuple pressure.
225 bool SGPRFirst = SGPRImportant;
226 for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
227 if (SGPRFirst) {
228 auto SW = getSGPRTuplesWeight();
229 auto OtherSW = O.getSGPRTuplesWeight();
230 if (SW != OtherSW)
231 return SW < OtherSW;
232 } else {
233 auto VW = getVGPRTuplesWeight();
234 auto OtherVW = O.getVGPRTuplesWeight();
235 if (VW != OtherVW)
236 return VW < OtherVW;
237 }
238 }
239
240 // Give final precedence to lower general RP.
241 return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
242 (getVGPRNum(ST.hasGFX90AInsts()) <
243 O.getVGPRNum(ST.hasGFX90AInsts()));
244}
245
247 unsigned DynamicVGPRBlockSize) {
248 return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
249 OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
250 << "AGPRs: " << RP.getAGPRNum();
251 if (ST)
252 OS << "(O"
253 << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
254 DynamicVGPRBlockSize)
255 << ')';
256 OS << ", SGPRs: " << RP.getSGPRNum();
257 if (ST)
258 OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
259 OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
260 << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
261 if (ST)
262 OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
263 OS << '\n';
264 });
265}
266
268 const MachineRegisterInfo &MRI) {
269 assert(MO.isDef() && MO.isReg() && MO.getReg().isVirtual());
270
271 // We don't rely on read-undef flag because in case of tentative schedule
272 // tracking it isn't set correctly yet. This works correctly however since
273 // use mask has been tracked before using LIS.
274 return MO.getSubReg() == 0 ?
275 MRI.getMaxLaneMaskForVReg(MO.getReg()) :
277}
278
279static void
281 const MachineInstr &MI, const LiveIntervals &LIS,
282 const MachineRegisterInfo &MRI) {
283
284 auto &TRI = *MRI.getTargetRegisterInfo();
285 for (const auto &MO : MI.operands()) {
286 if (!MO.isReg() || !MO.getReg().isVirtual())
287 continue;
288 if (!MO.isUse() || !MO.readsReg())
289 continue;
290
291 Register Reg = MO.getReg();
292 auto I = llvm::find_if(VRegMaskOrUnits, [Reg](const VRegMaskOrUnit &RM) {
293 return RM.VRegOrUnit.asVirtualReg() == Reg;
294 });
295
296 auto &P = I == VRegMaskOrUnits.end()
297 ? VRegMaskOrUnits.emplace_back(VirtRegOrUnit(Reg),
299 : *I;
300
301 P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(MO.getSubReg())
303 }
304
305 SlotIndex InstrSI;
306 for (auto &P : VRegMaskOrUnits) {
307 auto &LI = LIS.getInterval(P.VRegOrUnit.asVirtualReg());
308 if (!LI.hasSubRanges())
309 continue;
310
311 // For a tentative schedule LIS isn't updated yet but livemask should
312 // remain the same on any schedule. Subreg defs can be reordered but they
313 // all must dominate uses anyway.
314 if (!InstrSI)
315 InstrSI = LIS.getInstructionIndex(MI).getBaseIndex();
316
317 P.LaneMask = getLiveLaneMask(LI, InstrSI, MRI, P.LaneMask);
318 }
319}
320
321/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
323 const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
324 bool TrackLaneMasks, Register Reg, SlotIndex Pos,
325 function_ref<bool(const LiveRange &LR, SlotIndex Pos)> Property) {
326 assert(Reg.isVirtual());
327 const LiveInterval &LI = LIS.getInterval(Reg);
328 LaneBitmask Result;
329 if (TrackLaneMasks && LI.hasSubRanges()) {
330 for (const LiveInterval::SubRange &SR : LI.subranges()) {
331 if (Property(SR, Pos))
332 Result |= SR.LaneMask;
333 }
334 } else if (Property(LI, Pos)) {
335 Result =
336 TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(Reg) : LaneBitmask::getAll();
337 }
338
339 return Result;
340}
341
342/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
343/// Helper to find a vreg use between two indices {PriorUseIdx, NextUseIdx}.
344/// The query starts with a lane bitmask which gets lanes/bits removed for every
345/// use we find.
346static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
347 SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
348 const MachineRegisterInfo &MRI,
349 const SIRegisterInfo *TRI,
350 const LiveIntervals *LIS,
351 bool Upward = false) {
352 for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
353 if (MO.isUndef())
354 continue;
355 const MachineInstr *MI = MO.getParent();
356 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
357 bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
358 : (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
359 if (!InRange)
360 continue;
361
362 unsigned SubRegIdx = MO.getSubReg();
363 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
364 LastUseMask &= ~UseMask;
365 if (LastUseMask.none())
366 return LaneBitmask::getNone();
367 }
368 return LastUseMask;
369}
370
371////////////////////////////////////////////////////////////////////////////////
372// GCNRPTarget
373
375 : GCNRPTarget(RP, MF) {
376 const Function &F = MF.getFunction();
377 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
378 setTarget(ST.getMaxNumSGPRs(F), ST.getMaxNumVGPRs(F));
379}
380
381GCNRPTarget::GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs,
382 const MachineFunction &MF, const GCNRegPressure &RP)
383 : GCNRPTarget(RP, MF) {
384 setTarget(NumSGPRs, NumVGPRs);
385}
386
387GCNRPTarget::GCNRPTarget(unsigned Occupancy, const MachineFunction &MF,
388 const GCNRegPressure &RP)
389 : GCNRPTarget(RP, MF) {
390 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
391 unsigned DynamicVGPRBlockSize =
393 setTarget(ST.getMaxNumSGPRs(Occupancy, /*Addressable=*/false),
394 ST.getMaxNumVGPRs(Occupancy, DynamicVGPRBlockSize));
395}
396
397void GCNRPTarget::setTarget(unsigned NumSGPRs, unsigned NumVGPRs) {
398 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
399 MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
400 MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
401 if (UnifiedRF) {
402 unsigned DynamicVGPRBlockSize =
403 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
404 MaxUnifiedVGPRs =
405 std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs);
406 } else {
407 MaxUnifiedVGPRs = 0;
408 }
409}
410
412 const MachineRegisterInfo &MRI = MF.getRegInfo();
413 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
415 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
416
417 RegExcess Excess(MF, RP, *this);
418
419 if (SRI->isSGPRClass(RC))
420 return Excess.SGPR;
421
422 if (SRI->isAGPRClass(RC))
423 return (UnifiedRF && Excess.VGPR) || Excess.AGPR;
424
425 return (UnifiedRF && Excess.VGPR) || Excess.ArchVGPR;
426}
427
429 RegExcess Excess(MF, RP, *this);
430 if (SaveRP.getSGPRNum() != 0 && Excess.SGPR != 0)
431 return true;
432 if (SaveRP.getArchVGPRNum() != 0 && Excess.ArchVGPR != 0)
433 return true;
434 if (SaveRP.getAGPRNum() != 0 && Excess.AGPR != 0)
435 return true;
436 if (UnifiedRF && Excess.VGPR != 0)
437 return SaveRP.getArchVGPRNum() != 0 || SaveRP.getAGPRNum() != 0;
438 return false;
439}
440
441unsigned GCNRPTarget::getNumRegsBenefit(const GCNRegPressure &SaveRP) const {
442 RegExcess Excess(MF, RP, *this);
443 const unsigned NumVGPRAboveAddrLimit =
444 std::min(Excess.ArchVGPR, SaveRP.getArchVGPRNum()) +
445 std::min(Excess.AGPR, SaveRP.getAGPRNum());
446 unsigned NumRegsSaved =
447 std::min(Excess.SGPR, SaveRP.getSGPRNum()) + NumVGPRAboveAddrLimit;
448
449 if (UnifiedRF && Excess.VGPR) {
450 // We have already accounted for excess pressure above addressive limits for
451 // the individual VGPR classes. However for targets with unified RFs there
452 // is also a unified VGPR pressure (ArchVGPR + AGPR combination) limit to
453 // honor that may be more restrictive that the per-VGPR-class limits. We
454 // must also be careful not to double-count VGPR saves that may contribute
455 // to lowering pressure both above the addressable limit in their respective
456 // class as well as in the unified VGPR limit.
457 const unsigned VGPRSave = SaveRP.getArchVGPRNum() + SaveRP.getAGPRNum();
458 if (NumVGPRAboveAddrLimit < VGPRSave)
459 NumRegsSaved += std::min(Excess.VGPR, VGPRSave - NumVGPRAboveAddrLimit);
460 }
461
462 return NumRegsSaved;
463}
464
465bool GCNRPTarget::satisfied(const GCNRegPressure &TestRP) const {
466 if (TestRP.getSGPRNum() > MaxSGPRs || TestRP.getVGPRNum(false) > MaxVGPRs)
467 return false;
468 if (UnifiedRF && TestRP.getVGPRNum(true) > MaxUnifiedVGPRs)
469 return false;
470 return true;
471}
472
474 RegExcess Excess(MF, RP, *this);
475 return Excess.hasVectorRegisterExcess();
476}
477
478///////////////////////////////////////////////////////////////////////////////
479// GCNRPTracker
480
482 const LiveIntervals &LIS,
483 const MachineRegisterInfo &MRI,
484 LaneBitmask LaneMaskFilter) {
485 return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
486}
487
489 const MachineRegisterInfo &MRI,
490 LaneBitmask LaneMaskFilter) {
491 LaneBitmask LiveMask;
492 if (LI.hasSubRanges()) {
493 for (const auto &S : LI.subranges())
494 if ((S.LaneMask & LaneMaskFilter).any() && S.liveAt(SI)) {
495 LiveMask |= S.LaneMask;
496 assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
497 }
498 } else if (LI.liveAt(SI)) {
499 LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
500 }
501 LiveMask &= LaneMaskFilter;
502 return LiveMask;
503}
504
506 const LiveIntervals &LIS,
507 const MachineRegisterInfo &MRI,
508 GCNRegPressure::RegKind RegKind) {
510 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
511 auto Reg = Register::index2VirtReg(I);
512 if (RegKind != GCNRegPressure::TOTAL_KINDS &&
513 GCNRegPressure::getRegKind(Reg, MRI) != RegKind)
514 continue;
515 if (!LIS.hasInterval(Reg))
516 continue;
517 auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
518 if (LiveMask.any())
519 LiveRegs[Reg] = LiveMask;
520 }
521 return LiveRegs;
522}
523
524void GCNRPTracker::reset(const MachineInstr &MI, bool After) {
525 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
526 if (!MI.isDebugInstr()) {
527 SlotIndex SI = LIS.getInstructionIndex(MI);
528 if (After)
529 SI = SI.getDeadSlot();
530 reset(MRI, SI);
531 return;
532 }
533
534 // Look for the first valid index after the provided debug MI.
535 MachineBasicBlock::const_iterator It = MI.getIterator(),
536 MBBEnd = MI.getParent()->end();
539 if (NonDbgMI == MBBEnd) {
540 // There are no non-debug instruction between MI and the end of the
541 // block, so we reset the tracker at the end of the block.
542 reset(*MI.getParent(), /*End=*/true);
543 return;
544 }
545 // MI is a debug instruction so register pressure before or after it is
546 // identical. Since we moved forward to finding a non-debug instruction
547 // in the block, we reset the tracker before that instruction i.e., at its
548 // base index.
549 reset(MRI, LIS.getInstructionIndex(*NonDbgMI));
550}
551
553 SlotIndex SI = End ? LIS.getSlotIndexes()->getMBBLastIdx(&MBB)
554 : LIS.getMBBStartIdx(&MBB);
555 reset(MBB.getParent()->getRegInfo(), SI);
556}
557
564
566 const LiveRegSet &LiveRegs) {
567 this->MRI = &MRI;
568 LastTrackedMI = nullptr;
569 if (&this->LiveRegs != &LiveRegs)
570 this->LiveRegs = LiveRegs;
572}
573
574/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
577 LIS, *MRI, true, Reg, Pos.getBaseIndex(),
578 [](const LiveRange &LR, SlotIndex Pos) {
579 const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
580 return S != nullptr && S->end == Pos.getRegSlot();
581 });
582}
583
584////////////////////////////////////////////////////////////////////////////////
585// GCNUpwardRPTracker
586
588 assert(MRI && "call reset first");
589
590 LastTrackedMI = &MI;
591
592 if (MI.isDebugInstr())
593 return;
594
595 // Kill all defs.
596 GCNRegPressure DefPressure, ECDefPressure;
597 bool HasECDefs = false;
598 for (const MachineOperand &MO : MI.all_defs()) {
599 if (!MO.getReg().isVirtual())
600 continue;
601
602 Register Reg = MO.getReg();
603 LaneBitmask DefMask = getDefRegMask(MO, *MRI);
604
605 // Treat a def as fully live at the moment of definition: keep a record.
606 if (MO.isEarlyClobber()) {
607 ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
608 HasECDefs = true;
609 } else
610 DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
611
612 auto I = LiveRegs.find(Reg);
613 if (I == LiveRegs.end())
614 continue;
615
616 LaneBitmask &LiveMask = I->second;
617 LaneBitmask PrevMask = LiveMask;
618 LiveMask &= ~DefMask;
619 CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
620 if (LiveMask.none())
621 LiveRegs.erase(I);
622 }
623
624 // Update MaxPressure with defs pressure.
625 DefPressure += CurPressure;
626 if (HasECDefs)
627 DefPressure += ECDefPressure;
628 MaxPressure = max(DefPressure, MaxPressure);
629
630 // Make uses alive.
632 collectVirtualRegUses(RegUses, MI, LIS, *MRI);
633 for (const VRegMaskOrUnit &U : RegUses) {
634 LaneBitmask &LiveMask = LiveRegs[U.VRegOrUnit.asVirtualReg()];
635 LaneBitmask PrevMask = LiveMask;
636 LiveMask |= U.LaneMask;
637 CurPressure.inc(U.VRegOrUnit.asVirtualReg(), PrevMask, LiveMask, *MRI);
638 }
639
640 // Update MaxPressure with uses plus early-clobber defs pressure.
641 MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
643
645}
646
647////////////////////////////////////////////////////////////////////////////////
648// GCNDownwardRPTracker
649
652 const LiveRegSet *LiveRegsCopy) {
653 MBBEnd = MI.getParent()->end();
654 assert(End == MBBEnd ||
655 End->getParent()->end() == MBBEnd && "end unrelated to MI block");
656 NextMI = &MI;
657 NextMI = skipDebugInstructionsForward(NextMI, End);
658
659 // Do not use the MI to compute live registers when a set is provided.
660 // Otherwise the first non-debug instruction after the provided one (or the
661 // end of the block, if no such instruction exists) serves as the basis to
662 // compute a live register set.
663 if (LiveRegsCopy)
664 GCNRPTracker::reset(MI.getMF()->getRegInfo(), *LiveRegsCopy);
665 else if (NextMI != MBBEnd)
666 GCNRPTracker::reset(*NextMI, /*After=*/false);
667 else
668 GCNRPTracker::reset(*MI.getParent(), /*End=*/true);
669 return NextMI != End;
670}
671
673 bool UseInternalIterator) {
674 assert(MRI && "call reset first");
676 const MachineInstr *CurrMI;
677 if (UseInternalIterator) {
678 if (!LastTrackedMI)
679 return NextMI == MBBEnd;
680
681 assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
682 CurrMI = LastTrackedMI;
683
684 SI = NextMI == MBBEnd
685 ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
686 : LIS.getInstructionIndex(*NextMI).getBaseIndex();
687 } else { //! UseInternalIterator
688 SI = LIS.getInstructionIndex(*MI).getBaseIndex();
689 CurrMI = MI;
690 }
691
692 assert(SI.isValid());
693
694 // Remove dead registers or mask bits.
695 SmallSet<Register, 8> SeenRegs;
696 for (auto &MO : CurrMI->operands()) {
697 if (!MO.isReg() || !MO.getReg().isVirtual())
698 continue;
699 if (MO.isUse() && !MO.readsReg())
700 continue;
701 if (!UseInternalIterator && MO.isDef())
702 continue;
703 if (!SeenRegs.insert(MO.getReg()).second)
704 continue;
705 const LiveInterval &LI = LIS.getInterval(MO.getReg());
706 if (LI.hasSubRanges()) {
707 auto It = LiveRegs.end();
708 for (const auto &S : LI.subranges()) {
709 if (!S.liveAt(SI)) {
710 if (It == LiveRegs.end()) {
711 It = LiveRegs.find(MO.getReg());
712 if (It == LiveRegs.end())
713 llvm_unreachable("register isn't live");
714 }
715 auto PrevMask = It->second;
716 It->second &= ~S.LaneMask;
717 CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
718 }
719 }
720 if (It != LiveRegs.end() && It->second.none())
721 LiveRegs.erase(It);
722 } else if (!LI.liveAt(SI)) {
723 auto It = LiveRegs.find(MO.getReg());
724 if (It == LiveRegs.end())
725 llvm_unreachable("register isn't live");
726 CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
727 LiveRegs.erase(It);
728 }
729 }
730
732
733 LastTrackedMI = nullptr;
734
735 return UseInternalIterator && (NextMI == MBBEnd);
736}
737
739 bool UseInternalIterator) {
740 if (UseInternalIterator) {
741 LastTrackedMI = &*NextMI++;
742 NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
743 } else {
745 }
746
747 const MachineInstr *CurrMI = LastTrackedMI;
748
749 // Add new registers or mask bits.
750 for (const auto &MO : CurrMI->all_defs()) {
751 Register Reg = MO.getReg();
752 if (!Reg.isVirtual())
753 continue;
754 auto &LiveMask = LiveRegs[Reg];
755 auto PrevMask = LiveMask;
756 LiveMask |= getDefRegMask(MO, *MRI);
757 CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
758 }
759
761}
762
763bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
764 if (UseInternalIterator && NextMI == MBBEnd)
765 return false;
766
767 advanceBeforeNext(MI, UseInternalIterator);
768 advanceToNext(MI, UseInternalIterator);
769 if (!UseInternalIterator) {
770 // We must remove any dead def lanes from the current RP
771 advanceBeforeNext(MI, true);
772 }
773 return true;
774}
775
777 bool AnyAdvance = false;
778 while (NextMI != End && advance())
779 AnyAdvance = true;
780 return AnyAdvance;
781}
782
785 const LiveRegSet *LiveRegsCopy) {
786 if (!reset(*Begin, End, LiveRegsCopy))
787 return false;
788 return advance(End);
789}
790
792 const GCNRPTracker::LiveRegSet &TrackedLR,
793 const TargetRegisterInfo *TRI, StringRef Pfx) {
794 return Printable([&LISLR, &TrackedLR, TRI, Pfx](raw_ostream &OS) {
795 for (auto const &P : TrackedLR) {
796 auto I = LISLR.find(P.first);
797 if (I == LISLR.end()) {
798 OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
799 << " isn't found in LIS reported set\n";
800 } else if (I->second != P.second) {
801 OS << Pfx << printReg(P.first, TRI)
802 << " masks doesn't match: LIS reported " << PrintLaneMask(I->second)
803 << ", tracked " << PrintLaneMask(P.second) << '\n';
804 }
805 }
806 for (auto const &P : LISLR) {
807 auto I = TrackedLR.find(P.first);
808 if (I == TrackedLR.end()) {
809 OS << Pfx << printReg(P.first, TRI) << ":L" << PrintLaneMask(P.second)
810 << " isn't found in tracked set\n";
811 }
812 }
813 });
814}
815
818 const SIRegisterInfo *TRI) const {
819 assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");
820
821 SlotIndex SlotIdx;
822 SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();
823
824 // Account for register pressure similar to RegPressureTracker::recede().
825 RegisterOperands RegOpers;
826 RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
827 RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
828 GCNRegPressure TempPressure = CurPressure;
829
830 for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
831 if (!Use.VRegOrUnit.isVirtualReg())
832 continue;
833 Register Reg = Use.VRegOrUnit.asVirtualReg();
834 LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
835 if (LastUseMask.none())
836 continue;
837 // The LastUseMask is queried from the liveness information of instruction
838 // which may be further down the schedule. Some lanes may actually not be
839 // last uses for the current position.
840 // FIXME: allow the caller to pass in the list of vreg uses that remain
841 // to be bottom-scheduled to avoid searching uses at each query.
842 SlotIndex CurrIdx;
843 const MachineBasicBlock *MBB = MI->getParent();
845 LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
846 if (IdxPos == MBB->end()) {
847 CurrIdx = LIS.getMBBEndIdx(MBB);
848 } else {
849 CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
850 }
851
852 LastUseMask =
853 findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, TRI, &LIS);
854 if (LastUseMask.none())
855 continue;
856
857 auto It = LiveRegs.find(Reg);
858 LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
859 LaneBitmask NewMask = LiveMask & ~LastUseMask;
860 TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
861 }
862
863 // Generate liveness for defs.
864 for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
865 if (!Def.VRegOrUnit.isVirtualReg())
866 continue;
867 Register Reg = Def.VRegOrUnit.asVirtualReg();
868 auto It = LiveRegs.find(Reg);
869 LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
870 LaneBitmask NewMask = LiveMask | Def.LaneMask;
871 TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
872 }
873
874 return TempPressure;
875}
876
878 const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
879 const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
880 const auto &TrackedLR = LiveRegs;
881
882 if (!isEqual(LISLR, TrackedLR)) {
883 dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
884 " LIS reported livesets mismatch:\n"
885 << print(LISLR, *MRI);
886 reportMismatch(LISLR, TrackedLR, MRI->getTargetRegisterInfo());
887 return false;
888 }
889
890 auto LISPressure = getRegPressure(*MRI, LISLR);
891 if (LISPressure != CurPressure) {
892 dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: "
893 << print(CurPressure) << "LIS rpt: " << print(LISPressure);
894 return false;
895 }
896 return true;
897}
898
900 const MachineRegisterInfo &MRI) {
901 return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
903 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
905 auto It = LiveRegs.find(Reg);
906 if (It != LiveRegs.end() && It->second.any())
907 OS << ' ' << printReg(Reg, TRI) << ':' << PrintLaneMask(It->second);
908 }
909 OS << '\n';
910 });
911}
912
913void GCNRegPressure::dump() const { dbgs() << print(*this); }
914
916 "amdgpu-print-rp-downward",
917 cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"),
918 cl::init(false), cl::Hidden);
919
922
923INITIALIZE_PASS(GCNRegPressurePrinter, "amdgpu-print-rp", "", true, true)
924
925// Return lanemask of Reg's subregs that are live-through at [Begin, End] and
926// are fully covered by Mask.
927static LaneBitmask
929 Register Reg, SlotIndex Begin, SlotIndex End,
930 LaneBitmask Mask = LaneBitmask::getAll()) {
931
932 auto IsInOneSegment = [Begin, End](const LiveRange &LR) -> bool {
933 auto *Segment = LR.getSegmentContaining(Begin);
934 return Segment && Segment->contains(End);
935 };
936
937 LaneBitmask LiveThroughMask;
938 const LiveInterval &LI = LIS.getInterval(Reg);
939 if (LI.hasSubRanges()) {
940 for (auto &SR : LI.subranges()) {
941 if ((SR.LaneMask & Mask) == SR.LaneMask && IsInOneSegment(SR))
942 LiveThroughMask |= SR.LaneMask;
943 }
944 } else {
946 if ((RegMask & Mask) == RegMask && IsInOneSegment(LI))
947 LiveThroughMask = RegMask;
948 }
949
950 return LiveThroughMask;
951}
952
954 const MachineRegisterInfo &MRI = MF.getRegInfo();
957
958 auto &OS = dbgs();
959
960// Leading spaces are important for YAML syntax.
961#define PFX " "
962
963 OS << "---\nname: " << MF.getName() << "\nbody: |\n";
964
965 auto printRP = [](const GCNRegPressure &RP) {
966 return Printable([&RP](raw_ostream &OS) {
967 OS << format(PFX " %-5d", RP.getSGPRNum())
968 << format(" %-5d", RP.getVGPRNum(false));
969 });
970 };
971
972 auto ReportLISMismatchIfAny = [&](const GCNRPTracker::LiveRegSet &TrackedLR,
973 const GCNRPTracker::LiveRegSet &LISLR) {
974 if (LISLR != TrackedLR) {
975 OS << PFX " mis LIS: " << llvm::print(LISLR, MRI)
976 << reportMismatch(LISLR, TrackedLR, TRI, PFX " ");
977 }
978 };
979
980 // Register pressure before and at an instruction (in program order).
982
983 for (auto &MBB : MF) {
984 RP.clear();
985 RP.reserve(MBB.size());
986
987 OS << PFX;
988 MBB.printName(OS);
989 OS << ":\n";
990
991 SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
992 SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
993
994 GCNRPTracker::LiveRegSet LiveIn, LiveOut;
995 GCNRegPressure RPAtMBBEnd;
996
997 if (UseDownwardTracker) {
998 if (MBB.empty()) {
999 LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
1000 RPAtMBBEnd = getRegPressure(MRI, LiveIn);
1001 } else {
1002 GCNDownwardRPTracker RPT(LIS);
1003 RPT.reset(MBB.front(), MBB.end());
1004
1005 LiveIn = RPT.getLiveRegs();
1006
1007 while (!RPT.advanceBeforeNext()) {
1008 GCNRegPressure RPBeforeMI = RPT.getPressure();
1009 RPT.advanceToNext();
1010 RP.emplace_back(RPBeforeMI, RPT.getPressure());
1011 }
1012
1013 LiveOut = RPT.getLiveRegs();
1014 RPAtMBBEnd = RPT.getPressure();
1015 }
1016 } else {
1017 GCNUpwardRPTracker RPT(LIS);
1018 RPT.reset(MRI, MBBLastSlot);
1019
1020 LiveOut = RPT.getLiveRegs();
1021 RPAtMBBEnd = RPT.getPressure();
1022
1023 for (auto &MI : reverse(MBB)) {
1024 RPT.resetMaxPressure();
1025 RPT.recede(MI);
1026 if (!MI.isDebugInstr())
1027 RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
1028 }
1029
1030 LiveIn = RPT.getLiveRegs();
1031 }
1032
1033 OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
1034 if (!UseDownwardTracker)
1035 ReportLISMismatchIfAny(LiveIn, getLiveRegs(MBBStartSlot, LIS, MRI));
1036
1037 OS << PFX " SGPR VGPR\n";
1038 int I = 0;
1039 for (auto &MI : MBB) {
1040 if (!MI.isDebugInstr()) {
1041 auto &[RPBeforeInstr, RPAtInstr] =
1042 RP[UseDownwardTracker ? I : (RP.size() - 1 - I)];
1043 ++I;
1044 OS << printRP(RPBeforeInstr) << '\n' << printRP(RPAtInstr) << " ";
1045 } else
1046 OS << PFX " ";
1047 MI.print(OS);
1048 }
1049 OS << printRP(RPAtMBBEnd) << '\n';
1050
1051 OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
1053 ReportLISMismatchIfAny(LiveOut, getLiveRegs(MBBLastSlot, LIS, MRI));
1054
1055 GCNRPTracker::LiveRegSet LiveThrough;
1056 for (auto [Reg, Mask] : LiveIn) {
1057 LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
1058 if (MaskIntersection.any()) {
1060 MRI, LIS, Reg, MBBStartSlot, MBBLastSlot, MaskIntersection);
1061 if (LTMask.any())
1062 LiveThrough[Reg] = LTMask;
1063 }
1064 }
1065 OS << PFX " Live-thr:" << llvm::print(LiveThrough, MRI);
1066 OS << printRP(getRegPressure(MRI, LiveThrough)) << '\n';
1067 }
1068 OS << "...\n";
1069 return false;
1070
1071#undef PFX
1072}
1073
1074#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1077 LiveIntervals &LIS,
1078 const MachineLoopInfo *MLI) {
1079
1080 const MachineRegisterInfo &MRI = MF.getRegInfo();
1082 auto &OS = dbgs();
1083 const char *RegName = GCNRegPressure::getName(Kind);
1084
1085 unsigned MaxNumRegs = 0;
1086 const MachineInstr *MaxPressureMI = nullptr;
1087 GCNUpwardRPTracker RPT(LIS);
1088 for (const MachineBasicBlock &MBB : MF) {
1089 RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
1090 for (const MachineInstr &MI : reverse(MBB)) {
1091 RPT.recede(MI);
1092 unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
1093 if (NumRegs > MaxNumRegs) {
1094 MaxNumRegs = NumRegs;
1095 MaxPressureMI = &MI;
1096 }
1097 }
1098 }
1099
1100 SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
1101
1102 // Max pressure can occur at either the early-clobber or register slot.
1103 // Choose the maximum liveset between both slots. This is ugly but this is
1104 // diagnostic code.
1105 SlotIndex ECSlot = MISlot.getRegSlot(true);
1106 SlotIndex RSlot = MISlot.getRegSlot(false);
1107 GCNRPTracker::LiveRegSet ECLiveSet = getLiveRegs(ECSlot, LIS, MRI, Kind);
1108 GCNRPTracker::LiveRegSet RLiveSet = getLiveRegs(RSlot, LIS, MRI, Kind);
1109 unsigned ECNumRegs = getRegPressure(MRI, ECLiveSet).getNumRegs(Kind);
1110 unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
1111 GCNRPTracker::LiveRegSet *LiveSet =
1112 ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
1113 SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
1114 assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
1115
1116 // Split live registers into single-def and multi-def sets.
1117 GCNRegPressure SDefPressure, MDefPressure;
1118 SmallVector<Register, 16> SDefRegs, MDefRegs;
1119 for (auto [Reg, LaneMask] : *LiveSet) {
1120 assert(GCNRegPressure::getRegKind(Reg, MRI) == Kind);
1121 LiveInterval &LI = LIS.getInterval(Reg);
1122 if (LI.getNumValNums() == 1 ||
1123 (LI.hasSubRanges() &&
1124 llvm::all_of(LI.subranges(), [](const LiveInterval::SubRange &SR) {
1125 return SR.getNumValNums() == 1;
1126 }))) {
1127 SDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1128 SDefRegs.push_back(Reg);
1129 } else {
1130 MDefPressure.inc(Reg, LaneBitmask::getNone(), LaneMask, MRI);
1131 MDefRegs.push_back(Reg);
1132 }
1133 }
1134 unsigned SDefNumRegs = SDefPressure.getNumRegs(Kind);
1135 unsigned MDefNumRegs = MDefPressure.getNumRegs(Kind);
1136 assert(SDefNumRegs + MDefNumRegs == MaxNumRegs);
1137
1138 auto printLoc = [&](const MachineBasicBlock *MBB, SlotIndex SI) {
1139 return Printable([&, MBB, SI](raw_ostream &OS) {
1140 OS << SI << ':' << printMBBReference(*MBB);
1141 if (MLI)
1142 if (const MachineLoop *ML = MLI->getLoopFor(MBB))
1143 OS << " (LoopHdr " << printMBBReference(*ML->getHeader())
1144 << ", Depth " << ML->getLoopDepth() << ")";
1145 });
1146 };
1147
1148 auto PrintRegInfo = [&](Register Reg, LaneBitmask LiveMask) {
1149 GCNRegPressure RegPressure;
1150 RegPressure.inc(Reg, LaneBitmask::getNone(), LiveMask, MRI);
1151 OS << " " << printReg(Reg, TRI) << ':'
1152 << TRI->getRegClassName(MRI.getRegClass(Reg)) << ", LiveMask "
1153 << PrintLaneMask(LiveMask) << " (" << RegPressure.getNumRegs(Kind) << ' '
1154 << RegName << "s)\n";
1155
1156 // Use std::map to sort def/uses by SlotIndex.
1157 std::map<SlotIndex, const MachineInstr *> Instrs;
1158 for (const MachineInstr &MI : MRI.reg_nodbg_instructions(Reg)) {
1159 Instrs[LIS.getInstructionIndex(MI).getRegSlot()] = &MI;
1160 }
1161
1162 for (const auto &[SI, MI] : Instrs) {
1163 OS << " ";
1164 if (MI->definesRegister(Reg, TRI))
1165 OS << "def ";
1166 if (MI->readsRegister(Reg, TRI))
1167 OS << "use ";
1168 OS << printLoc(MI->getParent(), SI) << ": " << *MI;
1169 }
1170 };
1171
1172 OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
1173 << " ***\n";
1174 OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
1175 << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
1176 << *MaxPressureMI;
1177
1178 OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
1179 << RegName << "s):\n";
1180
1181 // Sort SDefRegs by number of uses (smallest first)
1182 llvm::sort(SDefRegs, [&](Register A, Register B) {
1183 return std::distance(MRI.use_nodbg_begin(A), MRI.use_nodbg_end()) <
1184 std::distance(MRI.use_nodbg_begin(B), MRI.use_nodbg_end());
1185 });
1186
1187 for (const Register Reg : SDefRegs) {
1188 PrintRegInfo(Reg, LiveSet->lookup(Reg));
1189 }
1190
1191 OS << "\nLive registers with multiple definitions (" << MDefNumRegs << ' '
1192 << RegName << "s):\n";
1193 for (const Register Reg : MDefRegs) {
1194 PrintRegInfo(Reg, LiveSet->lookup(Reg));
1195 }
1196}
1197#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
constexpr LLT S1
MachineBasicBlock & MBB
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
static void collectVirtualRegUses(SmallVectorImpl< VRegMaskOrUnit > &VRegMaskOrUnits, const MachineInstr &MI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
#define PFX
static cl::opt< bool > UseDownwardTracker("amdgpu-print-rp-downward", cl::desc("Use GCNDownwardRPTracker for GCNRegPressurePrinter pass"), cl::init(false), cl::Hidden)
static LaneBitmask getDefRegMask(const MachineOperand &MO, const MachineRegisterInfo &MRI)
static LaneBitmask getRegLiveThroughMask(const MachineRegisterInfo &MRI, const LiveIntervals &LIS, Register Reg, SlotIndex Begin, SlotIndex End, LaneBitmask Mask=LaneBitmask::getAll())
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
#define RegName(no)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static bool InRange(int64_t Value, unsigned short Shift, int LBound, int HBound)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, bool TrackLaneMasks, VirtRegOrUnit VRegOrUnit, SlotIndex Pos, LaneBitmask SafeDefault, bool(*Property)(const LiveRange &LR, SlotIndex Pos))
static LaneBitmask findUseBetween(VirtRegOrUnit VRegOrUnit, LaneBitmask LastUseMask, SlotIndex PriorUseIdx, SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, const LiveIntervals *LIS)
Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
bool reset(const MachineInstr &MI, MachineBasicBlock::const_iterator End, const LiveRegSet *LiveRegs=nullptr)
Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.
bool advanceBeforeNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state right before the next MI or after the end of MBB.
bool advance(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the next MI.
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
void advanceToNext(MachineInstr *MI=nullptr, bool UseInternalIterator=true)
Move to the state at the MI, advanceBeforeNext has to be called first.
GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP)
Sets up the target such that the register pressure starting at RP does not show register spilling on ...
bool isSaveBeneficial(Register Reg) const
Determines whether saving virtual register Reg will be beneficial towards achieving the RP target.
bool hasVectorRegisterExcess() const
bool satisfied() const
Whether the current RP is at or below the defined pressure target.
void setTarget(unsigned NumSGPRs, unsigned NumVGPRs)
Changes the target (same semantics as constructor).
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
const decltype(LiveRegs) & getLiveRegs() const
const MachineInstr * LastTrackedMI
GCNRegPressure CurPressure
DenseMap< unsigned, LaneBitmask > LiveRegSet
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp.
GCNRegPressure MaxPressure
const MachineRegisterInfo * MRI
const LiveIntervals & LIS
void reset(const MachineInstr &MI, bool After)
Resets tracker before or After the provided MI, which can be a debug instruction.
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
const GCNRegPressure & getMaxPressure() const
bool isValid() const
returns whether the tracker's state after receding MI corresponds to reported by LIS.
void reset(const MachineInstr &MI)
Resets tracker to the point just after MI (in program order), which can be a debug instruction.
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
Register reg() const
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
bool hasInterval(Register Reg) const
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
bool liveAt(SlotIndex index) const
unsigned getNumValNums() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineInstrBundleIterator< const MachineInstr > const_iterator
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
mop_range operands()
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static use_nodbg_iterator use_nodbg_end()
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
List of registers defined and used by a machine instruction.
SmallVector< VRegMaskOrUnit, 8 > Defs
List of virtual registers and register units defined by the instruction which are not dead.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
SmallVector< VRegMaskOrUnit, 8 > Uses
List of virtual registers and register units read by the instruction.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getNumCoveredRegs(LaneBitmask LM)
bool isVectorSuperClass(const TargetRegisterClass *RC) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex getMBBLastIdx(const MachineBasicBlock *MBB) const
Returns the last valid index in the given basic block.
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the index past the last valid index in the given basic block.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Wrapper class representing a virtual register or register unit.
Definition Register.h:181
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter=LaneBitmask::getAll())
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, GCNRegPressure::RegKind RegKind=GCNRegPressure::TOTAL_KINDS)
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition LaneBitmask.h:92
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
constexpr NextUseDistance max(NextUseDistance A, NextUseDistance B)
char & GCNRegPressurePrinterID
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedL, const TargetRegisterInfo *TRI, StringRef Pfx=" ")
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
static RegKind getRegKind(unsigned Reg, const MachineRegisterInfo &MRI)
static constexpr const char * getName(RegKind Kind)
unsigned getNumRegs(RegKind Kind) const
unsigned getVGPRTuplesWeight() const
unsigned getVGPRNum(bool UnifiedVGPRFile) const
friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST, unsigned DynamicVGPRBlockSize)
void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask, const MachineRegisterInfo &MRI)
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
unsigned getSGPRTuplesWeight() const
bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy=std::numeric_limits< unsigned >::max()) const
Compares this GCNRegpressure to O, returning true if this is less.
static constexpr LaneBitmask getAll()
Definition LaneBitmask.h:82
constexpr bool none() const
Definition LaneBitmask.h:52
constexpr bool any() const
Definition LaneBitmask.h:53
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81
bool contains(SlotIndex I) const
Return true if the index is covered by this segment.