LLVM  15.0.0git
GCNSchedStrategy.cpp
Go to the documentation of this file.
1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This contains a MachineSchedStrategy implementation for maximizing wave
11 /// occupancy on GCN hardware.
12 //===----------------------------------------------------------------------===//
13 
14 #include "GCNSchedStrategy.h"
15 #include "SIMachineFunctionInfo.h"
17 
18 #define DEBUG_TYPE "machine-scheduler"
19 
20 using namespace llvm;
21 
23  const MachineSchedContext *C) :
24  GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false),
25  HasExcessPressure(false), MF(nullptr) { }
26 
29 
30  MF = &DAG->MF;
31 
32  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
33 
34  // FIXME: This is also necessary, because some passes that run after
35  // scheduling and before regalloc increase register pressure.
36  const unsigned ErrorMargin = 3;
37 
38  SGPRExcessLimit =
39  Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
40  VGPRExcessLimit =
41  Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
42 
44  // Set the initial TargetOccupnacy to the maximum occupancy that we can
45  // achieve for this function. This effectively sets a lower bound on the
46  // 'Critical' register limits in the scheduler.
47  TargetOccupancy = MFI.getOccupancy();
48  SGPRCriticalLimit =
49  std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
50  VGPRCriticalLimit =
51  std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
52 
53  // Subtract error margin from register limits and avoid overflow.
54  SGPRCriticalLimit =
55  std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
56  VGPRCriticalLimit =
57  std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
58  SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
59  VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
60 }
61 
62 void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
63  bool AtTop, const RegPressureTracker &RPTracker,
64  const SIRegisterInfo *SRI,
65  unsigned SGPRPressure,
66  unsigned VGPRPressure) {
67 
68  Cand.SU = SU;
69  Cand.AtTop = AtTop;
70 
71  // getDownwardPressure() and getUpwardPressure() make temporary changes to
72  // the tracker, so we need to pass those function a non-const copy.
73  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
74 
75  Pressure.clear();
76  MaxPressure.clear();
77 
78  if (AtTop)
79  TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
80  else {
81  // FIXME: I think for bottom up scheduling, the register pressure is cached
82  // and can be retrieved by DAG->getPressureDif(SU).
83  TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
84  }
85 
86  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
87  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
88 
89  // If two instructions increase the pressure of different register sets
90  // by the same amount, the generic scheduler will prefer to schedule the
91  // instruction that increases the set with the least amount of registers,
92  // which in our case would be SGPRs. This is rarely what we want, so
93  // when we report excess/critical register pressure, we do it either
94  // only for VGPRs or only for SGPRs.
95 
96  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
97  const unsigned MaxVGPRPressureInc = 16;
98  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
99  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
100 
101 
102  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
103  // to increase the likelihood we don't go over the limits. We should improve
104  // the analysis to look through dependencies to find the path with the least
105  // register pressure.
106 
107  // We only need to update the RPDelta for instructions that increase register
108  // pressure. Instructions that decrease or keep reg pressure the same will be
109  // marked as RegExcess in tryCandidate() when they are compared with
110  // instructions that increase the register pressure.
111  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
112  HasExcessPressure = true;
113  Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
114  Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
115  }
116 
117  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
118  HasExcessPressure = true;
119  Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
120  Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
121  }
122 
123  // Register pressure is considered 'CRITICAL' if it is approaching a value
124  // that would reduce the wave occupancy for the execution unit. When
125  // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
126  // has the same cost, so we don't need to prefer one over the other.
127 
128  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
129  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
130 
131  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
132  HasExcessPressure = true;
133  if (SGPRDelta > VGPRDelta) {
134  Cand.RPDelta.CriticalMax =
135  PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
136  Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
137  } else {
138  Cand.RPDelta.CriticalMax =
139  PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
140  Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
141  }
142  }
143 }
144 
145 // This function is mostly cut and pasted from
146 // GenericScheduler::pickNodeFromQueue()
147 void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
148  const CandPolicy &ZonePolicy,
149  const RegPressureTracker &RPTracker,
150  SchedCandidate &Cand) {
151  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
152  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
153  unsigned SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
154  unsigned VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
155  ReadyQueue &Q = Zone.Available;
156  for (SUnit *SU : Q) {
157 
158  SchedCandidate TryCand(ZonePolicy);
159  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
160  SGPRPressure, VGPRPressure);
161  // Pass SchedBoundary only when comparing nodes from the same boundary.
162  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
163  GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
164  if (TryCand.Reason != NoCand) {
165  // Initialize resource delta if needed in case future heuristics query it.
166  if (TryCand.ResDelta == SchedResourceDelta())
167  TryCand.initResourceDelta(Zone.DAG, SchedModel);
168  Cand.setBest(TryCand);
169  LLVM_DEBUG(traceCandidate(Cand));
170  }
171  }
172 }
173 
174 // This function is mostly cut and pasted from
175 // GenericScheduler::pickNodeBidirectional()
176 SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
177  // Schedule as far as possible in the direction of no choice. This is most
178  // efficient, but also provides the best heuristics for CriticalPSets.
179  if (SUnit *SU = Bot.pickOnlyChoice()) {
180  IsTopNode = false;
181  return SU;
182  }
183  if (SUnit *SU = Top.pickOnlyChoice()) {
184  IsTopNode = true;
185  return SU;
186  }
187  // Set the bottom-up policy based on the state of the current bottom zone and
188  // the instructions outside the zone, including the top zone.
189  CandPolicy BotPolicy;
190  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
191  // Set the top-down policy based on the state of the current top zone and
192  // the instructions outside the zone, including the bottom zone.
193  CandPolicy TopPolicy;
194  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
195 
196  // See if BotCand is still valid (because we previously scheduled from Top).
197  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
198  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
199  BotCand.Policy != BotPolicy) {
200  BotCand.reset(CandPolicy());
201  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
202  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
203  } else {
205 #ifndef NDEBUG
206  if (VerifyScheduling) {
207  SchedCandidate TCand;
208  TCand.reset(CandPolicy());
209  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
210  assert(TCand.SU == BotCand.SU &&
211  "Last pick result should correspond to re-picking right now");
212  }
213 #endif
214  }
215 
216  // Check if the top Q has a better candidate.
217  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
218  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
219  TopCand.Policy != TopPolicy) {
220  TopCand.reset(CandPolicy());
221  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
222  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
223  } else {
225 #ifndef NDEBUG
226  if (VerifyScheduling) {
227  SchedCandidate TCand;
228  TCand.reset(CandPolicy());
229  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
230  assert(TCand.SU == TopCand.SU &&
231  "Last pick result should correspond to re-picking right now");
232  }
233 #endif
234  }
235 
236  // Pick best from BotCand and TopCand.
237  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
238  dbgs() << "Bot Cand: "; traceCandidate(BotCand););
239  SchedCandidate Cand = BotCand;
241  GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
242  if (TopCand.Reason != NoCand) {
243  Cand.setBest(TopCand);
244  }
245  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
246 
247  IsTopNode = Cand.AtTop;
248  return Cand.SU;
249 }
250 
251 // This function is mostly cut and pasted from
252 // GenericScheduler::pickNode()
254  if (DAG->top() == DAG->bottom()) {
256  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
257  return nullptr;
258  }
259  SUnit *SU;
260  do {
262  SU = Top.pickOnlyChoice();
263  if (!SU) {
264  CandPolicy NoPolicy;
265  TopCand.reset(NoPolicy);
266  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
267  assert(TopCand.Reason != NoCand && "failed to find a candidate");
268  SU = TopCand.SU;
269  }
270  IsTopNode = true;
271  } else if (RegionPolicy.OnlyBottomUp) {
272  SU = Bot.pickOnlyChoice();
273  if (!SU) {
274  CandPolicy NoPolicy;
275  BotCand.reset(NoPolicy);
276  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
277  assert(BotCand.Reason != NoCand && "failed to find a candidate");
278  SU = BotCand.SU;
279  }
280  IsTopNode = false;
281  } else {
282  SU = pickNodeBidirectional(IsTopNode);
283  }
284  } while (SU->isScheduled);
285 
286  if (SU->isTopReady())
287  Top.removeReady(SU);
288  if (SU->isBottomReady())
289  Bot.removeReady(SU);
290 
291  if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) {
292  for (SDep &Dep : SU->Preds) {
293  if (Dep.isCluster()) {
294  HasClusteredNodes = true;
295  break;
296  }
297  }
298  }
299 
300  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
301  << *SU->getInstr());
302  return SU;
303 }
304 
306  std::unique_ptr<MachineSchedStrategy> S) :
308  ST(MF.getSubtarget<GCNSubtarget>()),
309  MFI(*MF.getInfo<SIMachineFunctionInfo>()),
310  StartingOccupancy(MFI.getOccupancy()),
311  MinOccupancy(StartingOccupancy), Stage(Collect), RegionIdx(0) {
312 
313  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
314 }
315 
317  if (Stage == Collect) {
318  // Just record regions at the first pass.
319  Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
320  return;
321  }
322 
323  std::vector<MachineInstr*> Unsched;
324  Unsched.reserve(NumRegionInstrs);
325  for (auto &I : *this) {
326  Unsched.push_back(&I);
327  }
328 
329  GCNRegPressure PressureBefore;
330  if (LIS) {
331  PressureBefore = Pressure[RegionIdx];
332 
333  LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:";
334  GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI);
335  dbgs() << "Region live-in pressure: ";
336  llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs());
337  dbgs() << "Region register pressure: ";
338  PressureBefore.print(dbgs()));
339  }
340 
342  // Set HasClusteredNodes to true for late stages where we have already
343  // collected it. That way pickNode() will not scan SDep's when not needed.
344  S.HasClusteredNodes = Stage > InitialSchedule;
345  S.HasExcessPressure = false;
347  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
348  RescheduleRegions[RegionIdx] = false;
349  if (Stage == InitialSchedule && S.HasClusteredNodes)
350  RegionsWithClusters[RegionIdx] = true;
351  if (S.HasExcessPressure)
352  RegionsWithHighRP[RegionIdx] = true;
353 
354  if (!LIS)
355  return;
356 
357  // Check the results of scheduling.
358  auto PressureAfter = getRealRegPressure();
359 
360  LLVM_DEBUG(dbgs() << "Pressure after scheduling: ";
361  PressureAfter.print(dbgs()));
362 
363  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
364  PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
365  Pressure[RegionIdx] = PressureAfter;
366  RegionsWithMinOcc[RegionIdx] =
367  PressureAfter.getOccupancy(ST) == MinOccupancy;
368 
369  LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
370  return;
371  }
372 
373  unsigned WavesAfter =
374  std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST));
375  unsigned WavesBefore =
376  std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST));
377  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
378  << ", after " << WavesAfter << ".\n");
379 
380  // We may not be able to keep the current target occupancy because of the just
381  // scheduled region. We might still be able to revert scheduling if the
382  // occupancy before was higher, or if the current schedule has register
383  // pressure higher than the excess limits which could lead to more spilling.
384  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
385 
386  // Allow memory bound functions to drop to 4 waves if not limited by an
387  // attribute.
388  if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
389  WavesAfter >= MFI.getMinAllowedOccupancy()) {
390  LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
391  << MFI.getMinAllowedOccupancy() << " waves\n");
392  NewOccupancy = WavesAfter;
393  }
394 
395  if (NewOccupancy < MinOccupancy) {
396  MinOccupancy = NewOccupancy;
397  MFI.limitOccupancy(MinOccupancy);
398  RegionsWithMinOcc.reset();
399  LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
400  << MinOccupancy << ".\n");
401  }
402 
403  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
404  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
405  if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
406  PressureAfter.getAGPRNum() > MaxVGPRs ||
407  PressureAfter.getSGPRNum() > MaxSGPRs) {
408  RescheduleRegions[RegionIdx] = true;
409  RegionsWithHighRP[RegionIdx] = true;
410  }
411 
412  // If this condition is true, then either the occupancy before and after
413  // scheduling is the same, or we are allowing the occupancy to drop because
414  // the function is memory bound. Even if we are OK with the current occupancy,
415  // we still need to verify that we will not introduce any extra chance of
416  // spilling.
417  if (WavesAfter >= MinOccupancy) {
418  if (Stage == UnclusteredReschedule &&
419  !PressureAfter.less(ST, PressureBefore)) {
420  LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
421  } else if (WavesAfter > MFI.getMinWavesPerEU() ||
422  PressureAfter.less(ST, PressureBefore) ||
423  !RescheduleRegions[RegionIdx]) {
424  Pressure[RegionIdx] = PressureAfter;
425  RegionsWithMinOcc[RegionIdx] =
426  PressureAfter.getOccupancy(ST) == MinOccupancy;
427  if (!RegionsWithClusters[RegionIdx] &&
428  (Stage + 1) == UnclusteredReschedule)
429  RescheduleRegions[RegionIdx] = false;
430  return;
431  } else {
432  LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
433  }
434  }
435 
436  RegionsWithMinOcc[RegionIdx] =
437  PressureBefore.getOccupancy(ST) == MinOccupancy;
438  LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
439  RescheduleRegions[RegionIdx] = RegionsWithClusters[RegionIdx] ||
440  (Stage + 1) != UnclusteredReschedule;
442  int SkippedDebugInstr = 0;
443  for (MachineInstr *MI : Unsched) {
444  if (MI->isDebugInstr()) {
445  ++SkippedDebugInstr;
446  continue;
447  }
448 
449  if (MI->getIterator() != RegionEnd) {
450  BB->remove(MI);
451  BB->insert(RegionEnd, MI);
452  if (!MI->isDebugInstr())
453  LIS->handleMove(*MI, true);
454  }
455  // Reset read-undef flags and update them later.
456  for (auto &Op : MI->operands())
457  if (Op.isReg() && Op.isDef())
458  Op.setIsUndef(false);
459  RegisterOperands RegOpers;
460  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
461  if (!MI->isDebugInstr()) {
462  if (ShouldTrackLaneMasks) {
463  // Adjust liveness and add missing dead+read-undef flags.
464  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
465  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
466  } else {
467  // Adjust for missing dead-def flags.
468  RegOpers.detectDeadDefs(*MI, *LIS);
469  }
470  }
471  RegionEnd = MI->getIterator();
472  ++RegionEnd;
473  LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
474  }
475 
476  // After reverting schedule, debug instrs will now be at the end of the block
477  // and RegionEnd will point to the first debug instr. Increment RegionEnd
478  // pass debug instrs to the actual end of the scheduling region.
479  while (SkippedDebugInstr-- > 0)
480  ++RegionEnd;
481 
482  // If Unsched.front() instruction is a debug instruction, this will actually
483  // shrink the region since we moved all debug instructions to the end of the
484  // block. Find the first instruction that is not a debug instruction.
485  RegionBegin = Unsched.front()->getIterator();
486  if (RegionBegin->isDebugInstr()) {
487  for (MachineInstr *MI : Unsched) {
488  if (MI->isDebugInstr())
489  continue;
490  RegionBegin = MI->getIterator();
491  break;
492  }
493  }
494 
495  // Then move the debug instructions back into their correct place and set
496  // RegionBegin and RegionEnd if needed.
498 
499  Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd);
500 }
501 
502 GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const {
504  RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]);
505  return RPTracker.moveMaxPressure();
506 }
507 
508 void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
510 
511  // If the block has the only successor then live-ins of that successor are
512  // live-outs of the current block. We can reuse calculated live set if the
513  // successor will be sent to scheduling past current block.
514  const MachineBasicBlock *OnlySucc = nullptr;
515  if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
516  SlotIndexes *Ind = LIS->getSlotIndexes();
517  if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
518  OnlySucc = *MBB->succ_begin();
519  }
520 
521  // Scheduler sends regions from the end of the block upwards.
522  size_t CurRegion = RegionIdx;
523  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
524  if (Regions[CurRegion].first->getParent() != MBB)
525  break;
526  --CurRegion;
527 
528  auto I = MBB->begin();
529  auto LiveInIt = MBBLiveIns.find(MBB);
530  auto &Rgn = Regions[CurRegion];
531  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
532  if (LiveInIt != MBBLiveIns.end()) {
533  auto LiveIn = std::move(LiveInIt->second);
534  RPTracker.reset(*MBB->begin(), &LiveIn);
535  MBBLiveIns.erase(LiveInIt);
536  } else {
537  I = Rgn.first;
538  auto LRS = BBLiveInMap.lookup(NonDbgMI);
539 #ifdef EXPENSIVE_CHECKS
540  assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
541 #endif
542  RPTracker.reset(*I, &LRS);
543  }
544 
545  for ( ; ; ) {
546  I = RPTracker.getNext();
547 
548  if (Regions[CurRegion].first == I || NonDbgMI == I) {
549  LiveIns[CurRegion] = RPTracker.getLiveRegs();
550  RPTracker.clearMaxPressure();
551  }
552 
553  if (Regions[CurRegion].second == I) {
554  Pressure[CurRegion] = RPTracker.moveMaxPressure();
555  if (CurRegion-- == RegionIdx)
556  break;
557  }
558  RPTracker.advanceToNext();
559  RPTracker.advanceBeforeNext();
560  }
561 
562  if (OnlySucc) {
563  if (I != MBB->end()) {
564  RPTracker.advanceToNext();
565  RPTracker.advance(MBB->end());
566  }
567  RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs());
568  RPTracker.advanceBeforeNext();
569  MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
570  }
571 }
572 
574 GCNScheduleDAGMILive::getBBLiveInMap() const {
575  assert(!Regions.empty());
576  std::vector<MachineInstr *> BBStarters;
577  BBStarters.reserve(Regions.size());
578  auto I = Regions.rbegin(), E = Regions.rend();
579  auto *BB = I->first->getParent();
580  do {
581  auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
582  BBStarters.push_back(MI);
583  do {
584  ++I;
585  } while (I != E && I->first->getParent() == BB);
586  } while (I != E);
587  return getLiveRegMap(BBStarters, false /*After*/, *LIS);
588 }
589 
591  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
592 
593  LiveIns.resize(Regions.size());
594  Pressure.resize(Regions.size());
595  RescheduleRegions.resize(Regions.size());
596  RegionsWithClusters.resize(Regions.size());
597  RegionsWithHighRP.resize(Regions.size());
598  RegionsWithMinOcc.resize(Regions.size());
599  RescheduleRegions.set();
600  RegionsWithClusters.reset();
601  RegionsWithHighRP.reset();
602  RegionsWithMinOcc.reset();
603 
604  if (!Regions.empty())
605  BBLiveInMap = getBBLiveInMap();
606 
607  std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
608 
609  do {
610  Stage++;
611  RegionIdx = 0;
612  MachineBasicBlock *MBB = nullptr;
613 
614  if (Stage > InitialSchedule) {
615  if (!LIS)
616  break;
617 
618  // Retry function scheduling if we found resulting occupancy and it is
619  // lower than used for first pass scheduling. This will give more freedom
620  // to schedule low register pressure blocks.
621  // Code is partially copied from MachineSchedulerBase::scheduleRegions().
622 
623  if (Stage == UnclusteredReschedule) {
624  if (RescheduleRegions.none())
625  continue;
626  LLVM_DEBUG(dbgs() <<
627  "Retrying function scheduling without clustering.\n");
628  }
629 
630  if (Stage == ClusteredLowOccupancyReschedule) {
631  if (StartingOccupancy <= MinOccupancy)
632  break;
633 
634  LLVM_DEBUG(
635  dbgs()
636  << "Retrying function scheduling with lowest recorded occupancy "
637  << MinOccupancy << ".\n");
638  }
639 
640  if (Stage == PreRARematerialize) {
641  if (RegionsWithMinOcc.none() || Regions.size() == 1)
642  break;
643 
644  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
646  // Check maximum occupancy
647  if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
648  MinOccupancy)
649  break;
650 
651  // FIXME: This pass will invalidate cached MBBLiveIns for regions
652  // inbetween the defs and region we sinked the def to. Cached pressure
653  // for regions where a def is sinked from will also be invalidated. Will
654  // need to be fixed if there is another pass after this pass.
655  static_assert(LastStage == PreRARematerialize,
656  "Passes after PreRARematerialize are not supported");
657 
658  collectRematerializableInstructions();
659  if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
660  break;
661 
662  LLVM_DEBUG(
663  dbgs() << "Retrying function scheduling with improved occupancy of "
664  << MinOccupancy << " from rematerializing\n");
665  }
666  }
667 
668  if (Stage == UnclusteredReschedule)
669  SavedMutations.swap(Mutations);
670 
671  for (auto Region : Regions) {
672  if (((Stage == UnclusteredReschedule || Stage == PreRARematerialize) &&
673  !RescheduleRegions[RegionIdx]) ||
674  (Stage == ClusteredLowOccupancyReschedule &&
675  !RegionsWithClusters[RegionIdx] && !RegionsWithHighRP[RegionIdx])) {
676 
677  ++RegionIdx;
678  continue;
679  }
680 
681  RegionBegin = Region.first;
682  RegionEnd = Region.second;
683 
684  if (RegionBegin->getParent() != MBB) {
685  if (MBB) finishBlock();
686  MBB = RegionBegin->getParent();
687  startBlock(MBB);
688  if (Stage == InitialSchedule)
689  computeBlockPressure(MBB);
690  }
691 
692  unsigned NumRegionInstrs = std::distance(begin(), end());
694 
695  // Skip empty scheduling regions (0 or 1 schedulable instructions).
696  if (begin() == end() || begin() == std::prev(end())) {
697  exitRegion();
698  ++RegionIdx;
699  continue;
700  }
701 
702  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
703  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " "
704  << MBB->getName() << "\n From: " << *begin()
705  << " To: ";
706  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
707  else dbgs() << "End";
708  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
709 
710  schedule();
711 
712  exitRegion();
713  ++RegionIdx;
714  }
715  finishBlock();
716 
717  if (Stage == UnclusteredReschedule)
718  SavedMutations.swap(Mutations);
719  } while (Stage != LastStage);
720 }
721 
722 void GCNScheduleDAGMILive::collectRematerializableInstructions() {
723  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
724  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
726  if (!LIS->hasInterval(Reg))
727  continue;
728 
729  // TODO: Handle AGPR and SGPR rematerialization
730  if (!SRI->isVGPRClass(MRI.getRegClass(Reg)) || !MRI.hasOneDef(Reg) ||
732  continue;
733 
735  MachineInstr *Def = Op->getParent();
736  if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA))
737  continue;
738 
740  if (Def->getParent() == UseI->getParent())
741  continue;
742 
743  // We are only collecting defs that are defined in another block and are
744  // live-through or used inside regions at MinOccupancy. This means that the
745  // register must be in the live-in set for the region.
746  bool AddedToRematList = false;
747  for (unsigned I = 0, E = Regions.size(); I != E; ++I) {
748  auto It = LiveIns[I].find(Reg);
749  if (It != LiveIns[I].end() && !It->second.none()) {
750  if (RegionsWithMinOcc[I]) {
751  RematerializableInsts[I][Def] = UseI;
752  AddedToRematList = true;
753  }
754 
755  // Collect regions with rematerializable reg as live-in to avoid
756  // searching later when updating RP.
757  RematDefToLiveInRegions[Def].push_back(I);
758  }
759  }
760  if (!AddedToRematList)
761  RematDefToLiveInRegions.erase(Def);
762  }
763 }
764 
765 bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST,
766  const TargetInstrInfo *TII) {
767  // Temporary copies of cached variables we will be modifying and replacing if
768  // sinking succeeds.
769  SmallVector<
770  std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
771  NewRegions;
774  BitVector NewRescheduleRegions;
775 
776  NewRegions.resize(Regions.size());
777  NewRescheduleRegions.resize(Regions.size());
778 
779  // Collect only regions that has a rematerializable def as a live-in.
780  SmallSet<unsigned, 16> ImpactedRegions;
781  for (const auto &It : RematDefToLiveInRegions)
782  ImpactedRegions.insert(It.second.begin(), It.second.end());
783 
784  // Make copies of register pressure and live-ins cache that will be updated
785  // as we rematerialize.
786  for (auto Idx : ImpactedRegions) {
787  NewPressure[Idx] = Pressure[Idx];
788  NewLiveIns[Idx] = LiveIns[Idx];
789  }
790  NewRegions = Regions;
791  NewRescheduleRegions.reset();
792 
793  DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
794  bool Improved = false;
795  for (auto I : ImpactedRegions) {
796  if (!RegionsWithMinOcc[I])
797  continue;
798 
799  Improved = false;
800  int VGPRUsage = NewPressure[I].getVGPRNum(ST.hasGFX90AInsts());
801  int SGPRUsage = NewPressure[I].getSGPRNum();
802 
803  // TODO: Handle occupancy drop due to AGPR and SGPR.
804  // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
805  if (ST.getOccupancyWithNumSGPRs(SGPRUsage) == MinOccupancy)
806  break;
807 
808  // The occupancy of this region could have been improved by a previous
809  // iteration's sinking of defs.
810  if (NewPressure[I].getOccupancy(ST) > MinOccupancy) {
811  NewRescheduleRegions[I] = true;
812  Improved = true;
813  continue;
814  }
815 
816  // First check if we have enough trivially rematerializable instructions to
817  // improve occupancy. Optimistically assume all instructions we are able to
818  // sink decreased RP.
819  int TotalSinkableRegs = 0;
820  for (const auto &It : RematerializableInsts[I]) {
821  MachineInstr *Def = It.first;
822  Register DefReg = Def->getOperand(0).getReg();
823  TotalSinkableRegs +=
824  SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
825  }
826  int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
827  unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
828  // If in the most optimistic scenario, we cannot improve occupancy, then do
829  // not attempt to sink any instructions.
830  if (OptimisticOccupancy <= MinOccupancy)
831  break;
832 
833  unsigned ImproveOccupancy = 0;
835  for (auto &It : RematerializableInsts[I]) {
836  MachineInstr *Def = It.first;
837  MachineBasicBlock::iterator InsertPos =
838  MachineBasicBlock::iterator(It.second);
839  Register Reg = Def->getOperand(0).getReg();
840  // Rematerialize MI to its use block. Since we are only rematerializing
841  // instructions that do not have any virtual reg uses, we do not need to
842  // call LiveRangeEdit::allUsesAvailableAt() and
843  // LiveRangeEdit::canRematerializeAt().
844  TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
845  Def->getOperand(0).getSubReg(), *Def, *TRI);
846  MachineInstr *NewMI = &*(--InsertPos);
847  LIS->InsertMachineInstrInMaps(*NewMI);
850  InsertedMIToOldDef[NewMI] = Def;
851 
852  // Update region boundaries in scheduling region we sinked from since we
853  // may sink an instruction that was at the beginning or end of its region
854  updateRegionBoundaries(NewRegions, Def, /*NewMI =*/nullptr,
855  /*Removing =*/true);
856 
857  // Update region boundaries in region we sinked to.
858  updateRegionBoundaries(NewRegions, InsertPos, NewMI);
859 
860  LaneBitmask PrevMask = NewLiveIns[I][Reg];
861  // FIXME: Also update cached pressure for where the def was sinked from.
862  // Update RP for all regions that has this reg as a live-in and remove
863  // the reg from all regions as a live-in.
864  for (auto Idx : RematDefToLiveInRegions[Def]) {
865  NewLiveIns[Idx].erase(Reg);
866  if (InsertPos->getParent() != Regions[Idx].first->getParent()) {
867  // Def is live-through and not used in this block.
868  NewPressure[Idx].inc(Reg, PrevMask, LaneBitmask::getNone(), MRI);
869  } else {
870  // Def is used and rematerialized into this block.
872  auto *NonDbgMI = &*skipDebugInstructionsForward(
873  NewRegions[Idx].first, NewRegions[Idx].second);
874  RPT.reset(*NonDbgMI, &NewLiveIns[Idx]);
875  RPT.advance(NewRegions[Idx].second);
876  NewPressure[Idx] = RPT.moveMaxPressure();
877  }
878  }
879 
880  SinkedDefs.push_back(Def);
881  ImproveOccupancy = NewPressure[I].getOccupancy(ST);
882  if (ImproveOccupancy > MinOccupancy)
883  break;
884  }
885 
886  // Remove defs we just sinked from all regions' list of sinkable defs
887  for (auto &Def : SinkedDefs)
888  for (auto TrackedIdx : RematDefToLiveInRegions[Def])
889  RematerializableInsts[TrackedIdx].erase(Def);
890 
891  if (ImproveOccupancy <= MinOccupancy)
892  break;
893 
894  NewRescheduleRegions[I] = true;
895  Improved = true;
896  }
897 
898  if (!Improved) {
899  // Occupancy was not improved for all regions that were at MinOccupancy.
900  // Undo sinking and remove newly rematerialized instructions.
901  for (auto &Entry : InsertedMIToOldDef) {
902  MachineInstr *MI = Entry.first;
903  MachineInstr *OldMI = Entry.second;
904  Register Reg = MI->getOperand(0).getReg();
906  MI->eraseFromParent();
907  OldMI->clearRegisterDeads(Reg);
910  }
911  return false;
912  }
913 
914  // Occupancy was improved for all regions.
915  for (auto &Entry : InsertedMIToOldDef) {
916  MachineInstr *MI = Entry.first;
917  MachineInstr *OldMI = Entry.second;
918 
919  // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
920  BBLiveInMap.erase(OldMI);
921 
922  // Remove OldMI and update LIS
923  Register Reg = MI->getOperand(0).getReg();
925  OldMI->eraseFromParent();
928  }
929 
930  // Update live-ins, register pressure, and regions caches.
931  for (auto Idx : ImpactedRegions) {
932  LiveIns[Idx] = NewLiveIns[Idx];
933  Pressure[Idx] = NewPressure[Idx];
934  MBBLiveIns.erase(Regions[Idx].first->getParent());
935  }
936  Regions = NewRegions;
937  RescheduleRegions = NewRescheduleRegions;
938 
940  MFI.increaseOccupancy(MF, ++MinOccupancy);
941 
942  return true;
943 }
944 
945 // Copied from MachineLICM
946 bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI,
947  AAResults *AA) {
949  return false;
950 
951  for (const MachineOperand &MO : MI.operands())
952  if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
953  return false;
954 
955  return true;
956 }
957 
958 // When removing, we will have to check both beginning and ending of the region.
959 // When inserting, we will only have to check if we are inserting NewMI in front
960 // of a scheduling region and do not need to check the ending since we will only
961 // ever be inserting before an already existing MI.
962 void GCNScheduleDAGMILive::updateRegionBoundaries(
964  MachineBasicBlock::iterator>> &RegionBoundaries,
965  MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
966  unsigned I = 0, E = RegionBoundaries.size();
967  // Search for first region of the block where MI is located
968  while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
969  ++I;
970 
971  for (; I != E; ++I) {
972  if (MI->getParent() != RegionBoundaries[I].first->getParent())
973  return;
974 
975  if (Removing && MI == RegionBoundaries[I].first &&
976  MI == RegionBoundaries[I].second) {
977  // MI is in a region with size 1, after removing, the region will be
978  // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
979  RegionBoundaries[I] =
980  std::make_pair(MI->getParent()->end(), MI->getParent()->end());
981  return;
982  }
983  if (MI == RegionBoundaries[I].first) {
984  if (Removing)
985  RegionBoundaries[I] =
986  std::make_pair(std::next(MI), RegionBoundaries[I].second);
987  else
988  // Inserted NewMI in front of region, set new RegionBegin to NewMI
989  RegionBoundaries[I] = std::make_pair(MachineBasicBlock::iterator(NewMI),
990  RegionBoundaries[I].second);
991  return;
992  }
993  if (Removing && MI == RegionBoundaries[I].second) {
994  RegionBoundaries[I] =
995  std::make_pair(RegionBoundaries[I].first, std::prev(MI));
996  return;
997  }
998  }
999 }
llvm::LaneBitmask
Definition: LaneBitmask.h:40
llvm::GenericScheduler::Top
SchedBoundary Top
Definition: MachineScheduler.h:1002
llvm::MachineSchedPolicy::OnlyBottomUp
bool OnlyBottomUp
Definition: MachineScheduler.h:189
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:354
llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:910
llvm::MachineInstr::clearRegisterDeads
void clearRegisterDeads(Register Reg)
Clear all dead flags on operands defining register Reg.
Definition: MachineInstr.cpp:1961
llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:581
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::GCNRegPressure
Definition: GCNRegPressure.h:30
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::getLiveRegsBefore
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
Definition: GCNRegPressure.h:255
llvm::LiveIntervals::getSlotIndexes
SlotIndexes * getSlotIndexes() const
Definition: LiveIntervals.h:211
llvm::SIMachineFunctionInfo::getMinAllowedOccupancy
unsigned getMinAllowedOccupancy() const
Definition: SIMachineFunctionInfo.h:949
SIMachineFunctionInfo.h
llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:133
llvm::MachineInstr::mayLoadOrStore
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
Definition: MachineInstr.h:1035
llvm::getRegPressure
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
Definition: GCNRegPressure.h:262
llvm::getLiveRegMap
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
Definition: GCNRegPressure.h:212
llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:618
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:277
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:166
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
llvm::BitVector::none
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:181
llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:882
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::RegPressureTracker::getUpwardPressure
void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
Definition: RegisterPressure.cpp:1367
llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:623
llvm::ScheduleDAGInstrs::NumRegionInstrs
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
Definition: ScheduleDAGInstrs.h:154
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:116
GCNSchedStrategy.h
llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:909
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
RegisterClassInfo.h
llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3183
llvm::MachineRegisterInfo::use_instr_nodbg_begin
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:543
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:74
llvm::LiveIntervals::removeInterval
void removeInterval(Register Reg)
Interval removal.
Definition: LiveIntervals.h:145
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:357
llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:2687
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:334
llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:2898
llvm::SmallSet< unsigned, 16 >
llvm::SIMachineFunctionInfo::increaseOccupancy
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
Definition: SIMachineFunctionInfo.h:962
llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:868
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:765
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:280
llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition: GCNRegPressure.cpp:55
llvm::LiveIntervals::createAndComputeVirtRegInterval
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
Definition: LiveIntervals.h:138
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:226
llvm::GCNScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition: GCNSchedStrategy.cpp:590
llvm::GCNDownwardRPTracker
Definition: GCNRegPressure.h:161
llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:278
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1512
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::GenericScheduler::TopCand
SchedCandidate TopCand
Candidate last picked from Top boundary.
Definition: MachineScheduler.h:1006
llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:703
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MachineBasicBlock::remove
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
Definition: MachineBasicBlock.h:962
llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition: GCNSubtarget.h:1120
llvm::GCNMaxOccupancySchedStrategy
This is a minimal scheduler strategy.
Definition: GCNSchedStrategy.h:30
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:754
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::GCNRegPressure::getOccupancy
unsigned getOccupancy(const GCNSubtarget &ST) const
Definition: GCNRegPressure.h:63
llvm::ScheduleDAGInstrs::exitRegion
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Definition: ScheduleDAGInstrs.cpp:193
llvm::RegPressureTracker::getDownwardPressure
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Definition: RegisterPressure.cpp:1383
llvm::LiveIntervals::InsertMachineInstrInMaps
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
Definition: LiveIntervals.h:266
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:823
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:313
llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:709
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151
llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:30
llvm::BitVector
Definition: BitVector.h:75
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1190
llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:865
llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:413
llvm::GenericScheduler::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition: MachineScheduler.h:999
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:95
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:463
llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:814
llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:274
llvm::SlotIndexes::getMBBStartIdx
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:465
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::SIRegisterInfo::isVGPRClass
static bool isVGPRClass(const TargetRegisterClass *RC)
Definition: SIRegisterInfo.h:199
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243
llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:908
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:716
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:125
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:948
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::SIMachineFunctionInfo::getMinWavesPerEU
unsigned getMinWavesPerEU() const
Definition: SIMachineFunctionInfo.h:905
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:565
llvm::MachineRegisterInfo::getOneDef
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
Definition: MachineRegisterInfo.h:458
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1675
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:342
llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:275
llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:862
llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1019
llvm::GenericScheduler::BotCand
SchedCandidate BotCand
Candidate last picked from Bot boundary.
Definition: MachineScheduler.h:1008
llvm::MachineRegisterInfo::hasOneDef
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
Definition: MachineRegisterInfo.h:452
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:271
llvm::ScheduleDAGMI::placeDebugValues
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
Definition: MachineScheduler.cpp:908
llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition: SIMachineFunctionInfo.h:945
llvm::SlotIndex::getRegSlot
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:253
llvm::SDep::isCluster
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
llvm::GenericScheduler::Bot
SchedBoundary Bot
Definition: MachineScheduler.h:1003
llvm::skipDebugInstructionsForward
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
Definition: MachineBasicBlock.h:1238
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:242
llvm::ArrayRef< unsigned >
llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition: MachineScheduler.h:997
llvm::GCNScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: GCNSchedStrategy.cpp:316
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::SIRegisterInfo::getNumCoveredRegs
static unsigned getNumCoveredRegs(LaneBitmask LM)
Definition: SIRegisterInfo.h:361
llvm::ScheduleDAGMILive::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:990
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
llvm::ScheduleDAGMILive::getBotRPTracker
const RegPressureTracker & getBotRPTracker() const
Definition: MachineScheduler.h:449
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
llvm::TargetInstrInfo::reMaterialize
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
Definition: TargetInstrInfo.cpp:417
llvm::GCNMaxOccupancySchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
Definition: GCNSchedStrategy.cpp:253
llvm::SIMachineFunctionInfo::limitOccupancy
void limitOccupancy(const MachineFunction &MF)
Definition: SIMachineFunctionInfo.cpp:205
llvm::ScheduleDAGMILive::ShouldTrackLaneMasks
bool ShouldTrackLaneMasks
Definition: MachineScheduler.h:411
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition: MachineScheduler.cpp:2766
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C)
Definition: GCNSchedStrategy.cpp:22
llvm::VerifyScheduling
cl::opt< bool > VerifyScheduling
llvm::SchedBoundary::pickOnlyChoice
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition: MachineScheduler.cpp:2562
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::TargetInstrInfo::isTriviallyReMaterializable
bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA=nullptr) const
Return true if the instruction is trivially rematerializable, meaning it has no side effects and requ...
Definition: TargetInstrInfo.h:124
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::GCNMaxOccupancySchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition: GCNSchedStrategy.cpp:27
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:148
std
Definition: BitVector.h:851
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::MachineSchedPolicy::OnlyTopDown
bool OnlyTopDown
Definition: MachineScheduler.h:188
llvm::ReadyQueue::empty
bool empty() const
Definition: MachineScheduler.h:547
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::ScheduleDAGMI::bottom
MachineBasicBlock::iterator bottom() const
Definition: MachineScheduler.h:329
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::ScheduleDAGMI::finishBlock
void finishBlock() override
Cleans up after scheduling in the given block.
Definition: MachineScheduler.cpp:714
llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition: RegisterPressure.h:102
llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition: MachineScheduler.h:609
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:385
llvm::RegisterOperands::adjustLaneLiveness
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Definition: RegisterPressure.cpp:601
AA
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition: MachineScheduler.cpp:1216
llvm::SchedBoundary::Available
ReadyQueue Available
Definition: MachineScheduler.h:622
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
llvm::GCNRPTracker::printLiveRegs
static void printLiveRegs(raw_ostream &OS, const LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI)
Definition: GCNRegPressure.cpp:486
llvm::LiveIntervals::RemoveMachineInstrFromMaps
void RemoveMachineInstrFromMaps(MachineInstr &MI)
Definition: LiveIntervals.h:276
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:349
llvm::ScheduleDAGMILive::getTopRPTracker
const RegPressureTracker & getTopRPTracker() const
Definition: MachineScheduler.h:445
llvm::ScheduleDAGMI::top
MachineBasicBlock::iterator top() const
Definition: MachineScheduler.h:328
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::LiveIntervals::hasInterval
bool hasInterval(Register Reg) const
Definition: LiveIntervals.h:125
llvm::ScheduleDAGMILive
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
Definition: MachineScheduler.h:390
llvm::Region
Definition: RegionInfo.h:889
llvm::GCNRegPressure::print
void print(raw_ostream &OS, const GCNSubtarget *ST=nullptr) const
Definition: GCNRegPressure.cpp:172
llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition: MachineScheduler.h:532
llvm::GenericScheduler
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
Definition: MachineScheduler.h:952
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:650
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:310
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
llvm::SchedBoundary::removeReady
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Definition: MachineScheduler.cpp:2550
llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition: MachineScheduler.h:891
llvm::GCNScheduleDAGMILive::GCNScheduleDAGMILive
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition: GCNSchedStrategy.cpp:305