docs/doxygen/GCNSchedStrategy_8cpp_source.html

//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This contains a MachineSchedStrategy implementation for maximizing wave

/// occupancy on GCN hardware.

///

/// This pass will apply multiple scheduling stages to the same function.

/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual

/// entry point for the scheduling of those regions is

/// GCNScheduleDAGMILive::runSchedStages.


/// Generally, the reason for having multiple scheduling stages is to account

/// for the kernel-wide effect of register usage on occupancy.  Usually, only a

/// few scheduling regions will have register pressure high enough to limit

/// occupancy for the kernel, so constraints can be relaxed to improve ILP in

/// other regions.

///

//===----------------------------------------------------------------------===//


#include "GCNSchedStrategy.h"

#include "AMDGPUIGroupLP.h"

#include "GCNHazardRecognizer.h"

#include "GCNRegPressure.h"

#include "SIMachineFunctionInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/BitVector.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/CalcSpillWeights.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"

#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/RegisterClassInfo.h"

#include "llvm/CodeGen/Rematerializer.h"

#include "llvm/MC/LaneBitmask.h"

#include "llvm/MC/MCSchedule.h"

#include "llvm/MC/TargetRegistry.h"

#include "llvm/Support/ErrorHandling.h"


#define DEBUG_TYPE "machine-scheduler"


using namespace llvm;


static cl::opt<bool> DisableUnclusterHighRP(

    "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,

    cl::desc("Disable unclustered high register pressure "

             "reduction scheduling stage."),

    cl::init(false));


static cl::opt<bool> DisableClusteredLowOccupancy(

    "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,

    cl::desc("Disable clustered low occupancy "

             "rescheduling for ILP scheduling stage."),

    cl::init(false));


static cl::opt<unsigned> ScheduleMetricBias(

    "amdgpu-schedule-metric-bias", cl::Hidden,

    cl::desc(

        "Sets the bias which adds weight to occupancy vs latency. Set it to "

        "100 to chase the occupancy only."),

    cl::init(10));


static cl::opt<bool>

    RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,

               cl::desc("Relax occupancy targets for kernels which are memory "

                        "bound (amdgpu-membound-threshold), or "

                        "Wave Limited (amdgpu-limit-wave-threshold)."),

               cl::init(false));


static cl::opt<bool> GCNTrackers(

    "amdgpu-use-amdgpu-trackers", cl::Hidden,

    cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),

    cl::init(false));


static cl::opt<unsigned> PendingQueueLimit(

    "amdgpu-scheduler-pending-queue-limit", cl::Hidden,

    cl::desc(

        "Max (Available+Pending) size to inspect pending queue (0 disables)"),

    cl::init(256));


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

#define DUMP_MAX_REG_PRESSURE

static cl::opt<bool> PrintMaxRPRegUsageBeforeScheduler(

    "amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden,

    cl::desc("Print a list of live registers along with their def/uses at the "

             "point of maximum register pressure before scheduling."),

    cl::init(false));


static cl::opt<bool> PrintMaxRPRegUsageAfterScheduler(

    "amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden,

    cl::desc("Print a list of live registers along with their def/uses at the "

             "point of maximum register pressure after scheduling."),

    cl::init(false));

#endif


static cl::opt<bool> DisableRewriteMFMAFormSchedStage(

    "amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden,

    cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true));


const unsigned ScheduleMetrics::ScaleFactor = 100;


GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)

    : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),

      DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {

  if (GCNTrackers.getNumOccurrences() > 0)

    GCNTrackersOverride = GCNTrackers;

}


void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {

  GenericScheduler::initialize(DAG);


  MF = &DAG->MF;


  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();


  SGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);

  VGPRExcessLimit =

      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);


  SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  // Set the initial TargetOccupnacy to the maximum occupancy that we can

  // achieve for this function. This effectively sets a lower bound on the

  // 'Critical' register limits in the scheduler.

  // Allow for lower occupancy targets if kernel is wave limited or memory

  // bound, and using the relaxed occupancy feature.

  TargetOccupancy =

      RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();

  SGPRCriticalLimit =

      std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);


  if (!KnownExcessRP) {

    VGPRCriticalLimit = std::min(

        ST.getMaxNumVGPRs(TargetOccupancy, MFI.getDynamicVGPRBlockSize()),

        VGPRExcessLimit);

  } else {

    // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except

    // returns a reasonably small number for targets with lots of VGPRs, such

    // as GFX10 and GFX11.

    LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "

                         "VGPRCriticalLimit calculation method.\n");

    unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();

    unsigned Granule =

        AMDGPU::IsaInfo::getVGPRAllocGranule(ST, DynamicVGPRBlockSize);

    unsigned Addressable =

        AMDGPU::IsaInfo::getAddressableNumVGPRs(ST, DynamicVGPRBlockSize);

    unsigned VGPRBudget = alignDown(Addressable / TargetOccupancy, Granule);

    VGPRBudget = std::max(VGPRBudget, Granule);

    VGPRCriticalLimit = std::min(VGPRBudget, VGPRExcessLimit);

  }


  // Subtract error margin and bias from register limits and avoid overflow.

  SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);

  VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);

  SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);

  VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);

  LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit

                    << ", VGPRExcessLimit = " << VGPRExcessLimit

                    << ", SGPRCriticalLimit = " << SGPRCriticalLimit

                    << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");

}


/// Checks whether \p SU can use the cached DAG pressure diffs to compute the

/// current register pressure.

///

/// This works for the common case, but it has a few exceptions that have been

/// observed through trial and error:

///   - Explicit physical register operands

///   - Subregister definitions

///

/// In both of those cases, PressureDiff doesn't represent the actual pressure,

/// and querying LiveIntervals through the RegPressureTracker is needed to get

/// an accurate value.

///

/// We should eventually only use PressureDiff for maximum performance, but this

/// already allows 80% of SUs to take the fast path without changing scheduling

/// at all. Further changes would either change scheduling, or require a lot

/// more logic to recover an accurate pressure estimate from the PressureDiffs.


static bool canUsePressureDiffs(const SUnit &SU) {

  if (!SU.isInstr())

    return false;


  // Cannot use pressure diffs for subregister defs or with physregs, it's

  // imprecise in both cases.

  for (const auto &Op : SU.getInstr()->operands()) {

    if (!Op.isReg() || Op.isImplicit())

      continue;

    if (Op.getReg().isPhysical() ||

        (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister))

      return false;

  }

  return true;

}


void GCNSchedStrategy::getRegisterPressures(

    bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU,

    std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,

    GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker,

    ScheduleDAGMI *DAG, const SIRegisterInfo *SRI) {

  // getDownwardPressure() and getUpwardPressure() make temporary changes to

  // the tracker, so we need to pass those function a non-const copy.

  RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);

  if (!useGCNTrackers()) {

    AtTop

        ? TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure)

        : TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);


    return;

  }


  // GCNTrackers

  Pressure.resize(4, 0);

  MachineInstr *MI = SU->getInstr();

  GCNRegPressure NewPressure;

  if (AtTop) {

    GCNDownwardRPTracker TempDownwardTracker(DownwardTracker);

    NewPressure = TempDownwardTracker.bumpDownwardPressure(MI, SRI);

  } else {

    GCNUpwardRPTracker TempUpwardTracker(UpwardTracker);

    TempUpwardTracker.recede(*MI);

    NewPressure = TempUpwardTracker.getPressure();

  }

  Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.getSGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =

      NewPressure.getArchVGPRNum();

  Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.getAGPRNum();

}


unsigned GCNSchedStrategy::getStructuralStallCycles(SchedBoundary &Zone,

                                                    SUnit *SU) const {

  // Only implemented for top-down scheduling currently.

  if (!Zone.isTop() || !SU)

    return 0;


  MachineInstr *MI = SU->getInstr();

  unsigned CurrCycle = Zone.getCurrCycle();

  unsigned Stall = 0;


  // Query SchedModel for resource stalls (unbuffered resources).

  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {

    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

    for (const MCWriteProcResEntry &PE :

         make_range(SchedModel->getWriteProcResBegin(SC),

                    SchedModel->getWriteProcResEnd(SC))) {

      unsigned NextAvail =

          Zone.getNextResourceCycle(SC, PE.ProcResourceIdx, PE.ReleaseAtCycle,

                                    PE.AcquireAtCycle)

              .first;

      if (NextAvail > CurrCycle)

        Stall = std::max(Stall, NextAvail - CurrCycle);

    }

  }


  // Query HazardRecognizer for sequence-dependent hazard penalties.

  // AMDGPU currently installs GCNHazardRecognizer for MI scheduling only in

  // the post-RA configuration without vreg liveness.

  if (!DAG->hasVRegLiveness() && Zone.HazardRec &&

      Zone.HazardRec->isEnabled()) {

    auto *HR = static_cast<GCNHazardRecognizer *>(Zone.HazardRec);

    Stall = std::max(Stall, HR->getHazardWaitStates(MI));

  }


  return Stall;

}


void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,

                                     bool AtTop,

                                     const RegPressureTracker &RPTracker,

                                     const SIRegisterInfo *SRI,

                                     unsigned SGPRPressure,

                                     unsigned VGPRPressure, bool IsBottomUp) {

  Cand.SU = SU;

  Cand.AtTop = AtTop;


  if (!DAG->isTrackingPressure())

    return;


  Pressure.clear();

  MaxPressure.clear();


  // We try to use the cached PressureDiffs in the ScheduleDAG whenever

  // possible over querying the RegPressureTracker.

  //

  // RegPressureTracker will make a lot of LIS queries which are very

  // expensive, it is considered a slow function in this context.

  //

  // PressureDiffs are precomputed and cached, and getPressureDiff is just a

  // trivial lookup into an array. It is pretty much free.

  //

  // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of

  // PressureDiffs.

  if (AtTop || !canUsePressureDiffs(*SU) || useGCNTrackers()) {

    getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

  } else {

    // Reserve 4 slots.

    Pressure.resize(4, 0);

    Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;

    Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;


    for (const auto &Diff : DAG->getPressureDiff(SU)) {

      if (!Diff.isValid())

        continue;

      // PressureDiffs is always bottom-up so if we're working top-down we need

      // to invert its sign.

      Pressure[Diff.getPSet()] +=

          (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());

    }


#ifdef EXPENSIVE_CHECKS

    std::vector<unsigned> CheckPressure, CheckMaxPressure;

    getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure,

                         DownwardTracker, UpwardTracker, DAG, SRI);

    if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||

        Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=

            CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {

      errs() << "Register Pressure is inaccurate when calculated through "

                "PressureDiff\n"

             << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n"

             << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]

             << ", expected "

             << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n";

      report_fatal_error("inaccurate register pressure calculation");

    }

#endif

  }


  unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

  unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];


  // If two instructions increase the pressure of different register sets

  // by the same amount, the generic scheduler will prefer to schedule the

  // instruction that increases the set with the least amount of registers,

  // which in our case would be SGPRs.  This is rarely what we want, so

  // when we report excess/critical register pressure, we do it either

  // only for VGPRs or only for SGPRs.


  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.

  const unsigned MaxVGPRPressureInc = 16;

  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;

  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;


  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold

  // to increase the likelihood we don't go over the limits.  We should improve

  // the analysis to look through dependencies to find the path with the least

  // register pressure.


  // We only need to update the RPDelta for instructions that increase register

  // pressure. Instructions that decrease or keep reg pressure the same will be

  // marked as RegExcess in tryCandidate() when they are compared with

  // instructions that increase the register pressure.

  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);

  }


  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {

    HasHighPressure = true;

    Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);

  }


  // Register pressure is considered 'CRITICAL' if it is approaching a value

  // that would reduce the wave occupancy for the execution unit.  When

  // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both

  // has the same cost, so we don't need to prefer one over the other.


  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;

  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;


  if (SGPRDelta >= 0 || VGPRDelta >= 0) {

    HasHighPressure = true;

    if (SGPRDelta > VGPRDelta) {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::SReg_32);

      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);

    } else {

      Cand.RPDelta.CriticalMax =

          PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);

      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);

    }

  }

}


static bool shouldCheckPending(SchedBoundary &Zone,

                               const TargetSchedModel *SchedModel) {

  bool HasBufferedModel =

      SchedModel->hasInstrSchedModel() && SchedModel->getMicroOpBufferSize();

  unsigned Combined = Zone.Available.size() + Zone.Pending.size();

  return Combined <= PendingQueueLimit && HasBufferedModel;

}


static SUnit *pickOnlyChoice(SchedBoundary &Zone,

                             const TargetSchedModel *SchedModel) {

  // pickOnlyChoice() releases pending instructions and checks for new hazards.

  SUnit *OnlyChoice = Zone.pickOnlyChoice();

  if (!shouldCheckPending(Zone, SchedModel) || Zone.Pending.empty())

    return OnlyChoice;


  return nullptr;

}


void GCNSchedStrategy::printCandidateDecision(const SchedCandidate &Current,

                                              const SchedCandidate &Preferred) {

  LLVM_DEBUG({

    dbgs() << "Prefer:\t\t";

    DAG->dumpNode(*Preferred.SU);


    if (Current.SU) {

      dbgs() << "Not:\t";

      DAG->dumpNode(*Current.SU);

    }


    dbgs() << "Reason:\t\t";

    traceCandidate(Preferred);

  });

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeFromQueue()


void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,

                                         const CandPolicy &ZonePolicy,

                                         const RegPressureTracker &RPTracker,

                                         SchedCandidate &Cand, bool &IsPending,

                                         bool IsBottomUp) {

  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();

  unsigned SGPRPressure = 0;

  unsigned VGPRPressure = 0;

  IsPending = false;

  if (DAG->isTrackingPressure()) {

    if (!useGCNTrackers()) {

      SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

      VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

    } else {

      GCNRPTracker *T = IsBottomUp

                            ? static_cast<GCNRPTracker *>(&UpwardTracker)

                            : static_cast<GCNRPTracker *>(&DownwardTracker);

      SGPRPressure = T->getPressure().getSGPRNum();

      VGPRPressure = T->getPressure().getArchVGPRNum();

    }

  }

  LLVM_DEBUG(dbgs() << "Available Q:\n");

  ReadyQueue &AQ = Zone.Available;

  for (SUnit *SU : AQ) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

                  VGPRPressure, IsBottomUp);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    tryCandidate(Cand, TryCand, ZoneArg);

    if (TryCand.Reason != NoCand) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(Zone.DAG, SchedModel);

      LLVM_DEBUG(printCandidateDecision(Cand, TryCand));

      Cand.setBest(TryCand);

    } else {

      printCandidateDecision(TryCand, Cand);

    }

  }


  if (!shouldCheckPending(Zone, SchedModel))

    return;


  LLVM_DEBUG(dbgs() << "Pending Q:\n");

  ReadyQueue &PQ = Zone.Pending;

  for (SUnit *SU : PQ) {


    SchedCandidate TryCand(ZonePolicy);

    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

                  VGPRPressure, IsBottomUp);

    // Pass SchedBoundary only when comparing nodes from the same boundary.

    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

    tryPendingCandidate(Cand, TryCand, ZoneArg);

    if (TryCand.Reason != NoCand) {

      // Initialize resource delta if needed in case future heuristics query it.

      if (TryCand.ResDelta == SchedResourceDelta())

        TryCand.initResourceDelta(Zone.DAG, SchedModel);

      LLVM_DEBUG(printCandidateDecision(Cand, TryCand));

      IsPending = true;

      Cand.setBest(TryCand);

    } else {

      printCandidateDecision(TryCand, Cand);

    }

  }

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNodeBidirectional()


SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode,

                                               bool &PickedPending) {

  // Schedule as far as possible in the direction of no choice. This is most

  // efficient, but also provides the best heuristics for CriticalPSets.

  if (SUnit *SU = pickOnlyChoice(Bot, SchedModel)) {

    IsTopNode = false;

    return SU;

  }

  if (SUnit *SU = pickOnlyChoice(Top, SchedModel)) {

    IsTopNode = true;

    return SU;

  }

  // Set the bottom-up policy based on the state of the current bottom zone

  // and the instructions outside the zone, including the top zone.

  CandPolicy BotPolicy;

  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);

  // Set the top-down policy based on the state of the current top zone and

  // the instructions outside the zone, including the bottom zone.

  CandPolicy TopPolicy;

  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);


  bool BotPending = false;

  // See if BotCand is still valid (because we previously scheduled from Top).

  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");

  if (!BotCand.isValid() || BotCand.SU->isScheduled ||

      BotCand.Policy != BotPolicy) {

    BotCand.reset(CandPolicy());

    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,

                      BotPending,

                      /*IsBottomUp=*/true);

    assert(BotCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(BotCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,

                        BotPending,

                        /*IsBottomUp=*/true);

      assert(TCand.SU == BotCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  bool TopPending = false;

  // Check if the top Q has a better candidate.

  LLVM_DEBUG(dbgs() << "Picking from Top:\n");

  if (!TopCand.isValid() || TopCand.SU->isScheduled ||

      TopCand.Policy != TopPolicy) {

    TopCand.reset(CandPolicy());

    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,

                      TopPending,

                      /*IsBottomUp=*/false);

    assert(TopCand.Reason != NoCand && "failed to find the first candidate");

  } else {

    LLVM_DEBUG(traceCandidate(TopCand));

#ifndef NDEBUG

    if (VerifyScheduling) {

      SchedCandidate TCand;

      TCand.reset(CandPolicy());

      pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,

                        TopPending,

                        /*IsBottomUp=*/false);

      assert(TCand.SU == TopCand.SU &&

             "Last pick result should correspond to re-picking right now");

    }

#endif

  }


  // Pick best from BotCand and TopCand.

  LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);

             dbgs() << "Bot Cand: "; traceCandidate(BotCand););

  SchedCandidate Cand = BotPending ? TopCand : BotCand;

  SchedCandidate TryCand = BotPending ? BotCand : TopCand;

  PickedPending = BotPending && TopPending;


  TryCand.Reason = NoCand;

  if (BotPending || TopPending) {

    PickedPending |= tryPendingCandidate(Cand, TopCand, nullptr);

  } else {

    tryCandidate(Cand, TryCand, nullptr);

  }


  if (TryCand.Reason != NoCand) {

    Cand.setBest(TryCand);

  }


  LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););


  IsTopNode = Cand.AtTop;

  return Cand.SU;

}


// This function is mostly cut and pasted from

// GenericScheduler::pickNode()


SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {

  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }

  bool PickedPending;

  SUnit *SU;

  do {

    PickedPending = false;

    if (RegionPolicy.OnlyTopDown) {

      SU = pickOnlyChoice(Top, SchedModel);

      if (!SU) {

        CandPolicy NoPolicy;

        TopCand.reset(NoPolicy);

        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,

                          PickedPending,

                          /*IsBottomUp=*/false);

        assert(TopCand.Reason != NoCand && "failed to find a candidate");

        SU = TopCand.SU;

      }

      IsTopNode = true;

    } else if (RegionPolicy.OnlyBottomUp) {

      SU = pickOnlyChoice(Bot, SchedModel);

      if (!SU) {

        CandPolicy NoPolicy;

        BotCand.reset(NoPolicy);

        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,

                          PickedPending,

                          /*IsBottomUp=*/true);

        assert(BotCand.Reason != NoCand && "failed to find a candidate");

        SU = BotCand.SU;

      }

      IsTopNode = false;

    } else {

      SU = pickNodeBidirectional(IsTopNode, PickedPending);

    }

  } while (SU->isScheduled);


  if (PickedPending) {

    unsigned ReadyCycle = IsTopNode ? SU->TopReadyCycle : SU->BotReadyCycle;

    SchedBoundary &Zone = IsTopNode ? Top : Bot;

    unsigned CurrentCycle = Zone.getCurrCycle();

    if (ReadyCycle > CurrentCycle)

      Zone.bumpCycle(ReadyCycle);


    // FIXME: checkHazard() doesn't give information about which cycle the

    // hazard will resolve so just keep bumping the cycle by 1. This could be

    // made more efficient if checkHazard() returned more details.

    while (Zone.checkHazard(SU))

      Zone.bumpCycle(Zone.getCurrCycle() + 1);


    Zone.releasePending();

  }


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());

  return SU;

}


void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {

  if (useGCNTrackers()) {

    MachineInstr *MI = SU->getInstr();

    IsTopNode ? (void)DownwardTracker.advance(MI, false)

              : UpwardTracker.recede(*MI);

  }


  return GenericScheduler::schedNode(SU, IsTopNode);

}


GCNSchedStageID GCNSchedStrategy::getCurrentStage() {

  assert(CurrentStage && CurrentStage != SchedStages.end());

  return *CurrentStage;

}


bool GCNSchedStrategy::advanceStage() {

  assert(CurrentStage != SchedStages.end());

  if (!CurrentStage)

    CurrentStage = SchedStages.begin();

  else

    CurrentStage++;


  return CurrentStage != SchedStages.end();

}


bool GCNSchedStrategy::hasNextStage() const {

  assert(CurrentStage);

  return std::next(CurrentStage) != SchedStages.end();

}


GCNSchedStageID GCNSchedStrategy::getNextStage() const {

  assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());

  return *std::next(CurrentStage);

}


bool GCNSchedStrategy::tryPendingCandidate(SchedCandidate &Cand,

                                           SchedCandidate &TryCand,

                                           SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  // Avoid exceeding the target's limit.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                  RegExcess, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                  TryCand, Cand, RegCritical, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;

  }


  return false;

}


GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(

    const MachineSchedContext *C, bool IsLegacyScheduler)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);

  if (!DisableRewriteMFMAFormSchedStage)

    SchedStages.push_back(GCNSchedStageID::RewriteMFMAForm);

  SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);

  SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);

  SchedStages.push_back(GCNSchedStageID::PreRARematerialize);

  if (IsLegacyScheduler)

    GCNTrackersOverride = std::nullopt;

}


GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);

}


bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                          SchedCandidate &TryCand,

                                          SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Avoid spilling by exceeding the register limit.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                  RegExcess, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;


    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Unconditionally try to reduce latency.

    if (tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max critical pressure in the scheduled region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                  TryCand, Cand, RegCritical, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||

        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }

  return false;

}


GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy(

    const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::MemoryClauseInitialSchedule);

}


/// GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as

/// much as possible. This is achieved by:

//  1. Prioritize clustered operations before stall latency heuristic.

//  2. Prioritize long-latency-load before stall latency heuristic.

///

/// \param Cand provides the policy and current best candidate.

/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.

/// \param Zone describes the scheduled zone that we are extending, or nullptr

///             if Cand is from a different zone than TryCand.

/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)


bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,

                                                   SchedCandidate &TryCand,

                                                   SchedBoundary *Zone) const {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = NodeOrder;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  if (DAG->isTrackingPressure()) {

    // Avoid exceeding the target's limit.

    if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                    RegExcess, TRI, DAG->MF))

      return TryCand.Reason != NoCand;


    // Avoid increasing the max critical pressure in the scheduled region.

    if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,

                    TryCand, Cand, RegCritical, TRI, DAG->MF))

      return TryCand.Reason != NoCand;

  }


  // MaxMemoryClause-specific: We prioritize clustered instructions as we would

  // get more benefit from clausing these memory instructions.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);

  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // For loops that are acyclic path limited, aggressively schedule for

    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal

    // heuristics to take precedence.

    if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&

        tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // MaxMemoryClause-specific: Prioritize long latency memory load

    // instructions in top-bottom order to hide more latency. The mayLoad check

    // is used to exclude store-like instructions, which we do not want to

    // scheduler them too early.

    bool TryMayLoad =

        TryCand.SU->isInstr() && TryCand.SU->getInstr()->mayLoad();

    bool CandMayLoad = Cand.SU->isInstr() && Cand.SU->getInstr()->mayLoad();


    if (TryMayLoad || CandMayLoad) {

      bool TryLongLatency =

          TryCand.SU->Latency > 10 * Cand.SU->Latency && TryMayLoad;

      bool CandLongLatency =

          10 * TryCand.SU->Latency < Cand.SU->Latency && CandMayLoad;


      if (tryGreater(Zone->isTop() ? TryLongLatency : CandLongLatency,

                     Zone->isTop() ? CandLongLatency : TryLongLatency, TryCand,

                     Cand, Stall))

        return TryCand.Reason != NoCand;

    }

    // Prioritize instructions that read unbuffered resources by stall cycles.

    if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),

                Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))

      return TryCand.Reason != NoCand;

  }


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid critical resource consumption and balance the schedule.

    TryCand.initResourceDelta(DAG, SchedModel);

    if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,

                TryCand, Cand, ResourceReduce))

      return TryCand.Reason != NoCand;

    if (tryGreater(TryCand.ResDelta.DemandedResources,

                   Cand.ResDelta.DemandedResources, TryCand, Cand,

                   ResourceDemand))

      return TryCand.Reason != NoCand;


    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if (Zone->isTop() == (TryCand.SU->NodeNum < Cand.SU->NodeNum)) {

      assert(TryCand.SU->NodeNum != Cand.SU->NodeNum);

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


GCNScheduleDAGMILive::GCNScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)

    : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),

      MFI(*MF.getInfo<SIMachineFunctionInfo>()),

      StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),

      RegionLiveOuts(this, /*IsLiveOut=*/true) {


  // We want regions with a single MI to be scheduled so that we can reason

  // about them correctly during scheduling stages that move MIs between regions

  // (e.g., rematerialization).

  ScheduleSingleMIRegions = true;

  LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");

  if (RelaxedOcc) {

    MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);

    if (MinOccupancy != StartingOccupancy)

      LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy

                        << ".\n");

  }

}


std::unique_ptr<GCNSchedStage>

GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {

  switch (SchedStageID) {

  case GCNSchedStageID::OccInitialSchedule:

    return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::RewriteMFMAForm:

    return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *this);

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    return std::make_unique<ClusteredLowOccStage>(SchedStageID, *this);

  case GCNSchedStageID::PreRARematerialize:

    return std::make_unique<PreRARematStage>(SchedStageID, *this);

  case GCNSchedStageID::ILPInitialSchedule:

    return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this);

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,

                                                              *this);

  }


  llvm_unreachable("Unknown SchedStageID.");

}


void GCNScheduleDAGMILive::schedule() {

  // Collect all scheduling regions. The actual scheduling is performed in

  // GCNScheduleDAGMILive::finalizeSchedule.

  Regions.push_back(std::pair(RegionBegin, RegionEnd));

}


GCNRegPressure

GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {

  if (Regions[RegionIdx].first == Regions[RegionIdx].second)

    return llvm::getRegPressure(MRI, LiveIns[RegionIdx]);

  GCNDownwardRPTracker RPTracker(*LIS);

  RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,

                    &LiveIns[RegionIdx]);

  return RPTracker.moveMaxPressure();

}


static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,

                                        MachineBasicBlock::iterator RegionEnd) {

  assert(RegionBegin != RegionEnd && "Region must not be empty");

  return &*skipDebugInstructionsBackward(std::prev(RegionEnd), RegionBegin);

}


void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,

                                                const MachineBasicBlock *MBB) {

  GCNDownwardRPTracker RPTracker(*LIS);


  // If the block has the only successor then live-ins of that successor are

  // live-outs of the current block. We can reuse calculated live set if the

  // successor will be sent to scheduling past current block.


  // However, due to the bug in LiveInterval analysis it may happen that two

  // predecessors of the same successor block have different lane bitmasks for

  // a live-out register. Workaround that by sticking to one-to-one relationship

  // i.e. one predecessor with one successor block.

  const MachineBasicBlock *OnlySucc = nullptr;

  if (MBB->succ_size() == 1) {

    auto *Candidate = *MBB->succ_begin();

    if (!Candidate->empty() && Candidate->pred_size() == 1) {

      SlotIndexes *Ind = LIS->getSlotIndexes();

      if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))

        OnlySucc = Candidate;

    }

  }


  // Scheduler sends regions from the end of the block upwards.

  size_t CurRegion = RegionIdx;

  for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)

    if (Regions[CurRegion].first->getParent() != MBB)

      break;

  --CurRegion;


  auto I = MBB->begin();

  auto LiveInIt = MBBLiveIns.find(MBB);

  auto &Rgn = Regions[CurRegion];

  auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

  if (LiveInIt != MBBLiveIns.end()) {

    auto LiveIn = std::move(LiveInIt->second);

    RPTracker.reset(*MBB->begin(), MBB->end(), &LiveIn);

    MBBLiveIns.erase(LiveInIt);

  } else {

    I = Rgn.first;

    auto LRS = BBLiveInMap.lookup(NonDbgMI);

#ifdef EXPENSIVE_CHECKS

    assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));

#endif

    RPTracker.reset(*I, I->getParent()->end(), &LRS);

  }


  for (;;) {

    I = RPTracker.getNext();


    if (Regions[CurRegion].first == I || NonDbgMI == I) {

      LiveIns[CurRegion] = RPTracker.getLiveRegs();

      RPTracker.clearMaxPressure();

    }


    if (Regions[CurRegion].second == I) {

      Pressure[CurRegion] = RPTracker.moveMaxPressure();

      if (CurRegion-- == RegionIdx)

        break;

      auto &Rgn = Regions[CurRegion];

      NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);

    }

    RPTracker.advanceBeforeNext();

    RPTracker.advanceToNext();

  }


  if (OnlySucc) {

    if (I != MBB->end()) {

      RPTracker.advanceBeforeNext();

      RPTracker.advanceToNext();

      RPTracker.advance(MBB->end());

    }

    MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();

  }

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveInMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionFirstMIs;

  RegionFirstMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions))

    RegionFirstMIs.push_back(

        &*skipDebugInstructionsForward(RegionBegin, RegionEnd));


  return getLiveRegMap(RegionFirstMIs, /*After=*/false, *LIS);

}


DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>

GCNScheduleDAGMILive::getRegionLiveOutMap() const {

  assert(!Regions.empty());

  std::vector<MachineInstr *> RegionLastMIs;

  RegionLastMIs.reserve(Regions.size());

  for (auto &[RegionBegin, RegionEnd] : reverse(Regions)) {

    // Skip empty regions.

    if (RegionBegin == RegionEnd)

      continue;

    RegionLastMIs.push_back(getLastMIForRegion(RegionBegin, RegionEnd));

  }

  return getLiveRegMap(RegionLastMIs, /*After=*/true, *LIS);

}


void RegionPressureMap::buildLiveRegMap() {

  IdxToInstruction.clear();


  RegionLiveRegMap =

      IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();

  for (unsigned I = 0; I < DAG->Regions.size(); I++) {

    auto &[RegionBegin, RegionEnd] = DAG->Regions[I];

    // Skip empty regions.

    if (RegionBegin == RegionEnd)

      continue;

    MachineInstr *RegionKey =

        IsLiveOut ? getLastMIForRegion(RegionBegin, RegionEnd) : &*RegionBegin;

    IdxToInstruction[I] = RegionKey;

  }

}


void GCNScheduleDAGMILive::finalizeSchedule() {

  // Start actual scheduling here. This function is called by the base

  // MachineScheduler after all regions have been recorded by

  // GCNScheduleDAGMILive::schedule().

  LiveIns.resize(Regions.size());

  Pressure.resize(Regions.size());

  RegionsWithHighRP.resize(Regions.size());

  RegionsWithExcessRP.resize(Regions.size());

  RegionsWithIGLPInstrs.resize(Regions.size());

  RegionsWithHighRP.reset();

  RegionsWithExcessRP.reset();

  RegionsWithIGLPInstrs.reset();


  runSchedStages();

}


void GCNScheduleDAGMILive::runSchedStages() {

  LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");


  GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);

  if (!Regions.empty()) {

    BBLiveInMap = getRegionLiveInMap();

    if (S.useGCNTrackers())

      RegionLiveOuts.buildLiveRegMap();

  }


#ifdef DUMP_MAX_REG_PRESSURE

  if (PrintMaxRPRegUsageBeforeScheduler) {

    dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);

    dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);

    LIS->dump();

  }

#endif


  while (S.advanceStage()) {

    auto Stage = createSchedStage(S.getCurrentStage());

    if (!Stage->initGCNSchedStage())

      continue;


    for (auto Region : Regions) {

      RegionBegin = Region.first;

      RegionEnd = Region.second;

      // Setup for scheduling the region and check whether it should be skipped.

      if (!Stage->initGCNRegion()) {

        Stage->advanceRegion();

        exitRegion();

        continue;

      }


      if (S.useGCNTrackers()) {

        const unsigned RegionIdx = Stage->getRegionIdx();

        S.getDownwardTracker()->reset(MRI, LiveIns[RegionIdx]);

        S.getUpwardTracker()->reset(

            MRI, RegionLiveOuts.getLiveRegsForRegionIdx(RegionIdx));

      }


      ScheduleDAGMILive::schedule();

      Stage->finalizeGCNRegion();

      Stage->advanceRegion();

      exitRegion();

    }


    Stage->finalizeGCNSchedStage();

  }


#ifdef DUMP_MAX_REG_PRESSURE

  if (PrintMaxRPRegUsageAfterScheduler) {

    dumpMaxRegPressure(MF, GCNRegPressure::VGPR, *LIS, MLI);

    dumpMaxRegPressure(MF, GCNRegPressure::SGPR, *LIS, MLI);

    LIS->dump();

  }

#endif

}


#ifndef NDEBUG


raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {

  switch (StageID) {

  case GCNSchedStageID::OccInitialSchedule:

    OS << "Max Occupancy Initial Schedule";

    break;

  case GCNSchedStageID::RewriteMFMAForm:

    OS << "Instruction Rewriting Reschedule";

    break;

  case GCNSchedStageID::UnclusteredHighRPReschedule:

    OS << "Unclustered High Register Pressure Reschedule";

    break;

  case GCNSchedStageID::ClusteredLowOccupancyReschedule:

    OS << "Clustered Low Occupancy Reschedule";

    break;

  case GCNSchedStageID::PreRARematerialize:

    OS << "Pre-RA Rematerialize";

    break;

  case GCNSchedStageID::ILPInitialSchedule:

    OS << "Max ILP Initial Schedule";

    break;

  case GCNSchedStageID::MemoryClauseInitialSchedule:

    OS << "Max memory clause Initial Schedule";

    break;

  }


  return OS;

}


#endif


GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)

    : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),

      MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}


bool GCNSchedStage::initGCNSchedStage() {

  if (!DAG.LIS)

    return false;


  LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");

  return true;

}


void RewriteMFMAFormStage::findReachingDefs(

    MachineOperand &UseMO, LiveIntervals *LIS,

    SmallVectorImpl<SlotIndex> &DefIdxs) {

  MachineInstr *UseMI = UseMO.getParent();

  LiveInterval &UseLI = LIS->getInterval(UseMO.getReg());

  VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI));


  // If the def is not a PHI, then it must be the only reaching def.

  if (!VNI->isPHIDef()) {

    DefIdxs.push_back(VNI->def);

    return;

  }


  SmallPtrSet<MachineBasicBlock *, 8> Visited = {UseMI->getParent()};

  SmallVector<MachineBasicBlock *, 8> Worklist;


  // Mark the predecessor blocks for traversal

  for (MachineBasicBlock *PredMBB : UseMI->getParent()->predecessors()) {

    Worklist.push_back(PredMBB);

    Visited.insert(PredMBB);

  }


  while (!Worklist.empty()) {

    MachineBasicBlock *CurrMBB = Worklist.pop_back_val();


    SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB);

    VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot());


    MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def);


    // If there is a def in this block, then add it to the list. This is the

    // reaching def of this path.

    if (!VNI->isPHIDef()) {

      DefIdxs.push_back(VNI->def);

      continue;

    }


    for (MachineBasicBlock *PredMBB : DefMBB->predecessors()) {

      if (Visited.insert(PredMBB).second)

        Worklist.push_back(PredMBB);

    }

  }

}


void RewriteMFMAFormStage::findReachingUses(

    MachineInstr *DefMI, LiveIntervals *LIS,

    SmallVectorImpl<MachineOperand *> &ReachingUses) {

  SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI);

  for (MachineOperand &UseMO :

       DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) {

    SmallVector<SlotIndex, 8> ReachingDefIndexes;

    findReachingDefs(UseMO, LIS, ReachingDefIndexes);


    // If we find a use that contains this DefMI in its reachingDefs, then it is

    // a reaching use.

    if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {

          return SlotIndex::isSameInstr(RDIdx, DefIdx);

        }))

      ReachingUses.push_back(&UseMO);

  }

}


bool RewriteMFMAFormStage::initGCNSchedStage() {

  // We only need to run this pass if the architecture supports AGPRs.

  // Additionally, we don't use AGPRs at occupancy levels above 1 so there

  // is no need for this pass in that case, either.

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  if (!ST.hasGFX90AInsts() || MFI.getMinWavesPerEU() > 1)

    return false;


  RegionsWithExcessArchVGPR.resize(DAG.Regions.size());

  RegionsWithExcessArchVGPR.reset();

  for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {

    GCNRegPressure PressureBefore = DAG.Pressure[Region];

    if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs())

      RegionsWithExcessArchVGPR[Region] = true;

  }


  if (RegionsWithExcessArchVGPR.none())

    return false;


  TII = ST.getInstrInfo();

  SRI = ST.getRegisterInfo();


  std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;

  DenseMap<MachineBasicBlock *, std::set<Register>> CopyForUse;

  SmallPtrSet<MachineInstr *, 8> CopyForDef;


  if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))

    return false;


  int64_t Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);


  // If we haven't found the beneficial conditions, prefer the VGPR form which

  // may result in less cross RC copies.

  if (Cost > 0)

    return false;


  return rewrite(RewriteCands);

}


bool UnclusteredHighRPStage::initGCNSchedStage() {

  if (DisableUnclusterHighRP)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())

    return false;


  SavedMutations.swap(DAG.Mutations);

  DAG.addMutation(

      createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));


  InitialOccupancy = DAG.MinOccupancy;

  // Aggressively try to reduce register pressure in the unclustered high RP

  // stage. Temporarily increase occupancy target in the region.

  TempTargetOccupancy = MFI.getMaxWavesPerEU() > DAG.MinOccupancy

                            ? InitialOccupancy + 1

                            : InitialOccupancy;

  IsAnyRegionScheduled = false;

  S.SGPRLimitBias = S.HighRPSGPRBias;

  S.VGPRLimitBias = S.HighRPVGPRBias;


  LLVM_DEBUG(

      dbgs()

      << "Retrying function scheduling without clustering. "

         "Aggressively try to reduce register pressure to achieve occupancy "

      << TempTargetOccupancy << ".\n");


  return true;

}


bool ClusteredLowOccStage::initGCNSchedStage() {

  if (DisableClusteredLowOccupancy)

    return false;


  if (!GCNSchedStage::initGCNSchedStage())

    return false;


  // Don't bother trying to improve ILP in lower RP regions if occupancy has not

  // been dropped. All regions will have already been scheduled with the ideal

  // occupancy targets.

  if (DAG.StartingOccupancy <= DAG.MinOccupancy)

    return false;


  LLVM_DEBUG(

      dbgs() << "Retrying function scheduling with lowest recorded occupancy "

             << DAG.MinOccupancy << ".\n");

  return true;

}


/// Allows to easily filter for this stage's debug output.

#define REMAT_PREFIX "[PreRARemat] "

#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

Printable PreRARematStage::ScoredRemat::print() const {

  return Printable([&](raw_ostream &OS) {

    OS << '(' << MaxFreq << ", " << FreqDiff << ", " << RegionImpact << ')';

  });

}

#endif


bool PreRARematStage::initGCNSchedStage() {

  // FIXME: This pass will invalidate cached BBLiveInMap and MBBLiveIns for

  // regions inbetween the defs and region we sinked the def to. Will need to be

  // fixed if there is another pass after this pass.

  assert(!S.hasNextStage());


  if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)

    return false;


#ifndef NDEBUG

  auto PrintTargetRegions = [&]() -> void {

    if (TargetRegions.none()) {

      dbgs() << REMAT_PREFIX << "No target regions\n";

      return;

    }

    dbgs() << REMAT_PREFIX << "Target regions:\n";

    for (unsigned I : TargetRegions.set_bits())

      dbgs() << REMAT_PREFIX << "  [" << I << "] " << RPTargets[I] << '\n';

  };

#endif


  // Set an objective for the stage based on current RP in each region.

  REMAT_DEBUG({

    dbgs() << "Analyzing ";

    MF.getFunction().printAsOperand(dbgs(), false);

    dbgs() << ": ";

  });

  if (!setObjective()) {

    LLVM_DEBUG(dbgs() << "no objective to achieve, occupancy is maximal at "

                      << MFI.getMaxWavesPerEU() << '\n');

    return false;

  }

  LLVM_DEBUG({

    if (TargetOcc) {

      dbgs() << "increase occupancy from " << *TargetOcc - 1 << '\n';

    } else {

      dbgs() << "reduce spilling (minimum target occupancy is "

             << MFI.getMinWavesPerEU() << ")\n";

    }

    PrintTargetRegions();

  });


  // We need up-to-date live-out info. to query live-out register masks in

  // regions containing rematerializable instructions.

  DAG.RegionLiveOuts.buildLiveRegMap();


  if (!Remater.analyze()) {

    REMAT_DEBUG(dbgs() << "No rematerializable registers\n");

    return false;

  }

  const ScoredRemat::FreqInfo FreqInfo(MF, DAG);


  // Set of registers already marked for potential remterialization; used to

  // avoid rematerialization chains.

  SmallSet<Register, 4> MarkedRegs;


  // Collect candidates. We have more restrictions on what we can track here

  // compared to the rematerializer.

  SmallVector<ScoredRemat, 8> Candidates;

  SmallVector<unsigned> CandidateOrder;

  for (unsigned RegIdx = 0, E = Remater.getNumRegs(); RegIdx < E; ++RegIdx) {

    const Rematerializer::Reg &CandReg = Remater.getReg(RegIdx);


    // Single user only.

    unsigned NumUsers = 0;

    for (const auto &[_, RegionUses] : CandReg.Uses)

      NumUsers += RegionUses.size();

    if (NumUsers != 1)

      continue;


    // We further filter the registers that we can rematerialize based on our

    // current tracking capabilities in the stage. The user cannot itself be

    // marked rematerializable, and no register operand of the defining MI can

    // be marked rematerializable.

    MachineInstr *UseMI = *CandReg.Uses.begin()->getSecond().begin();

    const MachineOperand &UseMO = UseMI->getOperand(0);

    if (UseMO.isReg() && MarkedRegs.contains(UseMO.getReg()))

      continue;

    if (llvm::any_of(CandReg.DefMI->all_uses(),

                     [&MarkedRegs](const MachineOperand &MO) {

                       return MarkedRegs.contains(MO.getReg());

                     }))

      continue;


    // Do not rematerialize an instruction if it uses registers that aren't

    // available at its use. This ensures that we are not extending any live

    // range while rematerializing.

    SlotIndex UseIdx = DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(true);

    if (!VirtRegAuxInfo::allUsesAvailableAt(CandReg.DefMI, UseIdx, *DAG.LIS,

                                            DAG.MRI, *DAG.TII))

      continue;


    MarkedRegs.insert(CandReg.getDefReg());

    ScoredRemat &Cand = Candidates.emplace_back();

    Cand.init(RegIdx, FreqInfo, Remater, DAG);

    Cand.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);

    if (!Cand.hasNullScore())

      CandidateOrder.push_back(Candidates.size() - 1);

  }


  if (TargetOcc) {

    // Every rematerialization we do here is likely to move the instruction

    // into a higher frequency region, increasing the total sum latency of the

    // instruction itself. This is acceptable if we are eliminating a spill in

    // the process, but when the goal is increasing occupancy we get nothing

    // out of rematerialization if occupancy is not increased in the end; in

    // such cases we want to roll back the rematerialization.

    Rollback = std::make_unique<RollbackSupport>(Remater);

  }


  // Rematerialize registers in successive rounds until all RP targets are

  // satisifed or until we run out of rematerialization candidates.

  BitVector RecomputeRP(DAG.Regions.size());

  for (;;) {

    RecomputeRP.reset();


    // Sort candidates in increasing score order.

    sort(CandidateOrder, [&](unsigned LHSIndex, unsigned RHSIndex) {

      return Candidates[LHSIndex] < Candidates[RHSIndex];

    });


    REMAT_DEBUG({

      dbgs() << "==== NEW REMAT ROUND ====\n"

             << REMAT_PREFIX

             << "Candidates with non-null score, in rematerialization order:\n";

      for (const ScoredRemat &Cand : reverse(Candidates)) {

        dbgs() << REMAT_PREFIX << "  " << Cand.print() << " | "

               << Remater.printRematReg(Cand.RegIdx) << '\n';

      }

      PrintTargetRegions();

    });


    // Rematerialize registers in decreasing score order until we estimate

    // that all RP targets are satisfied or until rematerialization candidates

    // are no longer useful to decrease RP.

    while (!CandidateOrder.empty()) {

      const ScoredRemat &Cand = Candidates[CandidateOrder.back()];

      const Rematerializer::Reg &Reg = Remater.getReg(Cand.RegIdx);


      // When previous rematerializations in this round have already satisfied

      // RP targets in all regions this rematerialization can impact, we have a

      // good indication that our scores have diverged significantly from

      // reality, in which case we interrupt this round and re-score. This also

      // ensures that every rematerialization we perform is possibly impactful

      // in at least one target region.

      if (!Cand.maybeBeneficial(TargetRegions, RPTargets)) {

        REMAT_DEBUG(dbgs() << "Interrupt round on stale score for "

                           << Cand.print() << " | "

                           << Remater.printRematReg(Cand.RegIdx));

        break;

      }

      CandidateOrder.pop_back();


#ifdef EXPENSIVE_CHECKS

      // All uses are known to be available / live at the remat point. Thus,

      // the uses should already be live in to the using region.

      for (MachineOperand &MO : Reg.DefMI->operands()) {

        if (!MO.isReg() || !MO.getReg() || !MO.readsReg())

          continue;


        Register UseReg = MO.getReg();

        if (!UseReg.isVirtual())

          continue;


        LiveInterval &LI = DAG.LIS->getInterval(UseReg);

        LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());

        if (LI.hasSubRanges() && MO.getSubReg())

          LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());


        const unsigned UseRegion = Reg.Uses.begin()->first;

        LaneBitmask LiveInMask = DAG.LiveIns[UseRegion].at(UseReg);

        LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);

        // If this register has lanes not covered by the LiveIns, be sure they

        // do not map to any subrange. ref:

        // machine-scheduler-sink-trivial-remats.mir::omitted_subrange

        if (UncoveredLanes.any()) {

          assert(LI.hasSubRanges());

          for (LiveInterval::SubRange &SR : LI.subranges())

            assert((SR.LaneMask & UncoveredLanes).none());

        }

      }

#endif


      // Remove the register from all regions where it is a live-in or live-out,

      // then rematerialize the register.

      REMAT_DEBUG(dbgs() << "** REMAT " << Remater.printRematReg(Cand.RegIdx)

                         << '\n');

      removeFromLiveMaps(Reg.getDefReg(), Cand.LiveIn, Cand.LiveOut);

      if (Rollback) {

        Rollback->LiveMapUpdates.emplace_back(Cand.RegIdx, Cand.LiveIn,

                                              Cand.LiveOut);

      }

      Cand.rematerialize(Remater);


      // Adjust RP targets. The save is guaranteed in regions in which the

      // register is live-through and unused but optimistic in all other regions

      // where the register is live.

      updateRPTargets(Cand.Live, Cand.RPSave);

      RecomputeRP |= Cand.UnpredictableRPSave;

      RescheduleRegions |= Cand.Live;

      if (!TargetRegions.any()) {

        REMAT_DEBUG(dbgs() << "All targets cleared, verifying...\n");

        break;

      }

    }


    if (!updateAndVerifyRPTargets(RecomputeRP) && !TargetRegions.any()) {

      REMAT_DEBUG(dbgs() << "Objectives achieved!\n");

      break;

    }


    // Update the score of remaining candidates and filter out those that have

    // become useless from the vector. Candidates never become useful after

    // having been useless for a round, so we can freely drop them without

    // losing any future rematerialization opportunity.

    unsigned NumUsefulCandidates = 0;

    for (unsigned CandIdx : CandidateOrder) {

      ScoredRemat &Candidate = Candidates[CandIdx];

      Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);

      if (!Candidate.hasNullScore())

        CandidateOrder[NumUsefulCandidates++] = CandIdx;

    }

    if (NumUsefulCandidates == 0) {

      REMAT_DEBUG(dbgs() << "Stop on exhausted rematerialization candidates\n");

      break;

    }

    CandidateOrder.truncate(NumUsefulCandidates);

  }


  if (RescheduleRegions.none())

    return false;


  // Commit all pressure changes to the DAG and compute minimum achieved

  // occupancy in impacted regions.

  REMAT_DEBUG(dbgs() << "==== REMAT RESULTS ====\n");

  unsigned DynamicVGPRBlockSize = MFI.getDynamicVGPRBlockSize();

  for (unsigned I : RescheduleRegions.set_bits()) {

    DAG.Pressure[I] = RPTargets[I].getCurrentRP();

    REMAT_DEBUG(dbgs() << '[' << I << "] Achieved occupancy "

                       << DAG.Pressure[I].getOccupancy(ST, DynamicVGPRBlockSize)

                       << " (" << RPTargets[I] << ")\n");

  }

  AchievedOcc = MFI.getMaxWavesPerEU();

  for (const GCNRegPressure &RP : DAG.Pressure) {

    AchievedOcc =

        std::min(AchievedOcc, RP.getOccupancy(ST, DynamicVGPRBlockSize));

  }


  REMAT_DEBUG({

    dbgs() << "Retrying function scheduling with new min. occupancy of "

           << AchievedOcc << " from rematerializing (original was "

           << DAG.MinOccupancy;

    if (TargetOcc)

      dbgs() << ", target was " << *TargetOcc;

    dbgs() << ")\n";

  });


  DAG.setTargetOccupancy(getStageTargetOccupancy());

  return true;

}


void GCNSchedStage::finalizeGCNSchedStage() {

  DAG.finishBlock();

  LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");

}


void UnclusteredHighRPStage::finalizeGCNSchedStage() {

  SavedMutations.swap(DAG.Mutations);

  S.SGPRLimitBias = S.VGPRLimitBias = 0;

  if (DAG.MinOccupancy > InitialOccupancy) {

    assert(IsAnyRegionScheduled);

    LLVM_DEBUG(dbgs() << StageID

                      << " stage successfully increased occupancy to "

                      << DAG.MinOccupancy << '\n');

  } else if (!IsAnyRegionScheduled) {

    assert(DAG.MinOccupancy == InitialOccupancy);

    LLVM_DEBUG(dbgs() << StageID

                      << ": No regions scheduled, min occupancy stays at "

                      << DAG.MinOccupancy << ", MFI occupancy stays at "

                      << MFI.getOccupancy() << ".\n");

  }


  GCNSchedStage::finalizeGCNSchedStage();

}


bool GCNSchedStage::initGCNRegion() {

  // Skip empty scheduling region.

  if (DAG.begin() == DAG.end())

    return false;


  // Check whether this new region is also a new block.

  if (DAG.RegionBegin->getParent() != CurrentMBB)

    setupNewBlock();


  unsigned NumRegionInstrs = std::distance(DAG.begin(), DAG.end());

  DAG.enterRegion(CurrentMBB, DAG.begin(), DAG.end(), NumRegionInstrs);


  // Skip regions with 1 schedulable instruction.

  if (DAG.begin() == std::prev(DAG.end()))

    return false;


  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");

  LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)

                    << " " << CurrentMBB->getName()

                    << "\n  From: " << *DAG.begin() << "    To: ";

             if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;

             else dbgs() << "End";

             dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');


  // Save original instruction order before scheduling for possible revert.

  Unsched.clear();

  Unsched.reserve(DAG.NumRegionInstrs);

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule) {

    const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG.TII);

    for (auto &I : DAG) {

      Unsched.push_back(&I);

      if (SII->isIGLPMutationOnly(I.getOpcode()))

        DAG.RegionsWithIGLPInstrs[RegionIdx] = true;

    }

  } else {

    for (auto &I : DAG)

      Unsched.push_back(&I);

  }


  PressureBefore = DAG.Pressure[RegionIdx];


  LLVM_DEBUG(

      dbgs() << "Pressure before scheduling:\nRegion live-ins:"

             << print(DAG.LiveIns[RegionIdx], DAG.MRI)

             << "Region live-in pressure:  "

             << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))

             << "Region register pressure: " << print(PressureBefore));


  S.HasHighPressure = false;

  S.KnownExcessRP = isRegionWithExcessRP();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {

    SavedMutations.clear();

    SavedMutations.swap(DAG.Mutations);

    bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||

                          StageID == GCNSchedStageID::ILPInitialSchedule;

    DAG.addMutation(createIGroupLPDAGMutation(

        IsInitialStage ? AMDGPU::SchedulingPhase::Initial

                       : AMDGPU::SchedulingPhase::PreRAReentry));

  }


  return true;

}


bool UnclusteredHighRPStage::initGCNRegion() {

  // Only reschedule regions that have excess register pressure (i.e. spilling)

  // or had minimum occupancy at the beginning of the stage (as long as

  // rescheduling of previous regions did not make occupancy drop back down to

  // the initial minimum).

  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();

  // If no region has been scheduled yet, the DAG has not yet been updated with

  // the occupancy target. So retrieve it from the temporary.

  unsigned CurrentTargetOccupancy =

      IsAnyRegionScheduled ? DAG.MinOccupancy : TempTargetOccupancy;

  if (!DAG.RegionsWithExcessRP[RegionIdx] &&

      (CurrentTargetOccupancy <= InitialOccupancy ||

       DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) !=

           InitialOccupancy))

    return false;


  bool IsSchedulingThisRegion = GCNSchedStage::initGCNRegion();

  // If this is the first region scheduled during this stage, make the target

  // occupancy changes in the DAG and MFI.

  if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {

    IsAnyRegionScheduled = true;

    if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)

      DAG.setTargetOccupancy(TempTargetOccupancy);

  }

  return IsSchedulingThisRegion;

}


bool ClusteredLowOccStage::initGCNRegion() {

  // We may need to reschedule this region if it wasn't rescheduled in the last

  // stage, or if we found it was testing critical register pressure limits in

  // the unclustered reschedule stage. The later is because we may not have been

  // able to raise the min occupancy in the previous stage so the region may be

  // overly constrained even if it was already rescheduled.

  if (!DAG.RegionsWithHighRP[RegionIdx])

    return false;


  return GCNSchedStage::initGCNRegion();

}


bool PreRARematStage::initGCNRegion() {

  return !RevertAllRegions && RescheduleRegions[RegionIdx] &&

         GCNSchedStage::initGCNRegion();

}


void GCNSchedStage::setupNewBlock() {

  if (CurrentMBB)

    DAG.finishBlock();


  CurrentMBB = DAG.RegionBegin->getParent();

  DAG.startBlock(CurrentMBB);

  // Get real RP for the region if it hasn't be calculated before. After the

  // initial schedule stage real RP will be collected after scheduling.

  if (StageID == GCNSchedStageID::OccInitialSchedule ||

      StageID == GCNSchedStageID::ILPInitialSchedule ||

      StageID == GCNSchedStageID::MemoryClauseInitialSchedule)

    DAG.computeBlockPressure(RegionIdx, CurrentMBB);

}


void GCNSchedStage::finalizeGCNRegion() {

  DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);

  if (S.HasHighPressure)

    DAG.RegionsWithHighRP[RegionIdx] = true;


  // Revert scheduling if we have dropped occupancy or there is some other

  // reason that the original schedule is better.

  checkScheduling();


  if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&

      StageID != GCNSchedStageID::UnclusteredHighRPReschedule)

    SavedMutations.swap(DAG.Mutations);

}


void PreRARematStage::finalizeGCNRegion() {

  GCNSchedStage::finalizeGCNRegion();

  // When the goal is to increase occupancy, all regions must reach the target

  // occupancy for rematerializations to be possibly useful, otherwise we will

  // just hurt latency for no benefit. If minimum occupancy drops below the

  // target there is no point in trying to re-schedule further regions.

  if (!TargetOcc)

    return;

  RegionReverts.emplace_back(RegionIdx, Unsched, PressureBefore);

  if (DAG.MinOccupancy < *TargetOcc) {

    REMAT_DEBUG(dbgs() << "Region " << RegionIdx

                       << " cannot meet occupancy target, interrupting "

                          "re-scheduling in all regions\n");

    RevertAllRegions = true;

  }

}


void GCNSchedStage::checkScheduling() {

  // Check the results of scheduling.

  PressureAfter = DAG.getRealRegPressure(RegionIdx);


  LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));

  LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");


  unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();


  if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&

      PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {

    DAG.Pressure[RegionIdx] = PressureAfter;


    // Early out if we have achieved the occupancy target.

    LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");

    return;

  }


  unsigned TargetOccupancy = std::min(

      S.getTargetOccupancy(), ST.getOccupancyWithWorkGroupSizes(MF).second);

  unsigned WavesAfter = std::min(

      TargetOccupancy, PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize));

  unsigned WavesBefore = std::min(

      TargetOccupancy, PressureBefore.getOccupancy(ST, DynamicVGPRBlockSize));

  LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore

                    << ", after " << WavesAfter << ".\n");


  // We may not be able to keep the current target occupancy because of the just

  // scheduled region. We might still be able to revert scheduling if the

  // occupancy before was higher, or if the current schedule has register

  // pressure higher than the excess limits which could lead to more spilling.

  unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);


  // Allow memory bound functions to drop to 4 waves if not limited by an

  // attribute.

  if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&

      WavesAfter >= MFI.getMinAllowedOccupancy()) {

    LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "

                      << MFI.getMinAllowedOccupancy() << " waves\n");

    NewOccupancy = WavesAfter;

  }


  if (NewOccupancy < DAG.MinOccupancy) {

    DAG.MinOccupancy = NewOccupancy;

    MFI.limitOccupancy(DAG.MinOccupancy);

    LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "

                      << DAG.MinOccupancy << ".\n");

  }

  // The maximum number of arch VGPR on non-unified register file, or the

  // maximum VGPR + AGPR in the unified register file case.

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);

  // The maximum number of arch VGPR for both unified and non-unified register

  // file.

  unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);


  if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||

      PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||

      PressureAfter.getAGPRNum() > MaxArchVGPRs ||

      PressureAfter.getSGPRNum() > MaxSGPRs) {

    DAG.RegionsWithHighRP[RegionIdx] = true;

    DAG.RegionsWithExcessRP[RegionIdx] = true;

  }


  // Revert if this region's schedule would cause a drop in occupancy or

  // spilling.

  if (shouldRevertScheduling(WavesAfter)) {

    modifyRegionSchedule(RegionIdx, Unsched);

    std::tie(DAG.RegionBegin, DAG.RegionEnd) = DAG.Regions[RegionIdx];

  } else {

    DAG.Pressure[RegionIdx] = PressureAfter;

  }

}


unsigned


GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,

                                      DenseMap<unsigned, unsigned> &ReadyCycles,

                                      const TargetSchedModel &SM) {

  unsigned ReadyCycle = CurrCycle;

  for (auto &D : SU.Preds) {

    if (D.isAssignedRegDep()) {

      MachineInstr *DefMI = D.getSUnit()->getInstr();

      unsigned Latency = SM.computeInstrLatency(DefMI);

      unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];

      ReadyCycle = std::max(ReadyCycle, DefReady + Latency);

    }

  }

  ReadyCycles[SU.NodeNum] = ReadyCycle;

  return ReadyCycle;

}


#ifndef NDEBUG


struct EarlierIssuingCycle {


  bool operator()(std::pair<MachineInstr *, unsigned> A,

                  std::pair<MachineInstr *, unsigned> B) const {

    return A.second < B.second;

  }


};


static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,

                                        EarlierIssuingCycle> &ReadyCycles) {

  if (ReadyCycles.empty())

    return;

  unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();

  dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum

         << " ##################\n# Cycle #\t\t\tInstruction          "

            "             "

            "                            \n";

  unsigned IPrev = 1;

  for (auto &I : ReadyCycles) {

    if (I.second > IPrev + 1)

      dbgs() << "****************************** BUBBLE OF " << I.second - IPrev

             << " CYCLES DETECTED ******************************\n\n";

    dbgs() << "[ " << I.second << " ]  :  " << *I.first << "\n";

    IPrev = I.second;

  }

}


#endif


ScheduleMetrics


GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &SU : InputSchedule) {

    unsigned ReadyCycle =

        computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


ScheduleMetrics


GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {

#ifndef NDEBUG

  std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>

      ReadyCyclesSorted;

#endif

  const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();

  unsigned SumBubbles = 0;

  DenseMap<unsigned, unsigned> ReadyCycles;

  unsigned CurrCycle = 0;

  for (auto &MI : DAG) {

    SUnit *SU = DAG.getSUnit(&MI);

    if (!SU)

      continue;

    unsigned ReadyCycle =

        computeSUnitReadyCycle(*SU, CurrCycle, ReadyCycles, SM);

    SumBubbles += ReadyCycle - CurrCycle;

#ifndef NDEBUG

    ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));

#endif

    CurrCycle = ++ReadyCycle;

  }

#ifndef NDEBUG

  LLVM_DEBUG(

      printScheduleModel(ReadyCyclesSorted);

      dbgs() << "\n\t"

             << "Metric: "

             << (SumBubbles

                     ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle

                     : 1)

             << "\n\n");

#endif


  return ScheduleMetrics(CurrCycle, SumBubbles);

}


bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (WavesAfter < DAG.MinOccupancy)

    return true;


  // For dynamic VGPR mode, we don't want to waste any VGPR blocks.

  if (DAG.MFI.isDynamicVGPREnabled()) {

    unsigned BlocksBefore = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        ST, DAG.MFI.getDynamicVGPRBlockSize(),

        PressureBefore.getVGPRNum(false));

    unsigned BlocksAfter = AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks(

        ST, DAG.MFI.getDynamicVGPRBlockSize(), PressureAfter.getVGPRNum(false));

    if (BlocksAfter > BlocksBefore)

      return true;

  }


  return false;

}


bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {

  // If RP is not reduced in the unclustered reschedule stage, revert to the

  // old schedule.

  if ((WavesAfter <=

           PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) &&

       mayCauseSpilling(WavesAfter)) ||

      GCNSchedStage::shouldRevertScheduling(WavesAfter)) {

    LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");

    return true;

  }


  // Do not attempt to relax schedule even more if we are already spilling.

  if (isRegionWithExcessRP())

    return false;


  LLVM_DEBUG(

      dbgs()

      << "\n\t      *** In shouldRevertScheduling ***\n"

      << "      *********** BEFORE UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MBefore = getScheduleMetrics(DAG.SUnits);

  LLVM_DEBUG(

      dbgs()

      << "\n      *********** AFTER UnclusteredHighRPStage ***********\n");

  ScheduleMetrics MAfter = getScheduleMetrics(DAG);

  unsigned OldMetric = MBefore.getMetric();

  unsigned NewMetric = MAfter.getMetric();

  unsigned WavesBefore = std::min(

      S.getTargetOccupancy(),

      PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()));

  unsigned Profit =

      ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *

       ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /

       NewMetric) /

      ScheduleMetrics::ScaleFactor;

  LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "

                    << MAfter << "Profit: " << Profit << "\n");

  return Profit < ScheduleMetrics::ScaleFactor;

}


bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (PressureAfter == PressureBefore)

    return false;


  if (GCNSchedStage::shouldRevertScheduling(WavesAfter))

    return true;


  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {

  // When trying to increase occupancy (TargetOcc == true) the stage manages

  // region reverts globally (all or none), so we always return false here.

  return !TargetOcc && mayCauseSpilling(WavesAfter);

}


bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {

  if (mayCauseSpilling(WavesAfter))

    return true;


  return false;

}


bool MemoryClauseInitialScheduleStage::shouldRevertScheduling(

    unsigned WavesAfter) {

  return mayCauseSpilling(WavesAfter);

}


bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {

  if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&

      !PressureAfter.less(MF, PressureBefore)) {

    LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");

    return true;

  }


  return false;

}


void GCNSchedStage::modifyRegionSchedule(unsigned RegionIdx,

                                         ArrayRef<MachineInstr *> MIOrder) {

  assert(static_cast<size_t>(std::distance(DAG.Regions[RegionIdx].first,

                                           DAG.Regions[RegionIdx].second)) ==

             MIOrder.size() &&

         "instruction number mismatch");

  if (MIOrder.empty())

    return;


  LLVM_DEBUG(dbgs() << "Reverting scheduling for region " << RegionIdx << '\n');


  // Reconstruct MI sequence by moving instructions in desired order before

  // the current region's start.

  MachineBasicBlock::iterator RegionEnd = DAG.Regions[RegionIdx].first;

  MachineBasicBlock *MBB = MIOrder.front()->getParent();

  for (MachineInstr *MI : MIOrder) {

    // Either move the next MI in order before the end of the region or move the

    // region end past the MI if it is at the correct position.

    MachineBasicBlock::iterator MII = MI->getIterator();

    if (MII != RegionEnd) {

      // Will subsequent splice move MI up past a non-debug instruction?

      bool NonDebugReordered =

          !MI->isDebugInstr() &&

          skipDebugInstructionsForward(RegionEnd, MII) != MII;

      MBB->splice(RegionEnd, MBB, MI);

      // Only update LiveIntervals information if non-debug instructions are

      // reordered. Otherwise debug instructions could cause code generation to

      // change.

      if (NonDebugReordered)

        DAG.LIS->handleMove(*MI, true);

    } else {

      // MI is already at the expected position. However, earlier splices in

      // this loop may have changed neighboring slot indices, so this MI's

      // slot index can become non-monotonic w.r.t. the physical MBB order.

      // Only re-seat when monotonicity is actually violated to avoid

      // unnecessary LiveInterval changes that could perturb scheduling.

      if (!MI->isDebugInstr()) {

        SlotIndex MIIdx = DAG.LIS->getInstructionIndex(*MI);

        SlotIndex PrevIdx = DAG.LIS->getSlotIndexes()->getIndexBefore(*MI);

        if (PrevIdx >= MIIdx)

          DAG.LIS->handleMove(*MI, true);

      }

      ++RegionEnd;

    }

    if (MI->isDebugInstr()) {

      LLVM_DEBUG(dbgs() << "Scheduling " << *MI);

      continue;

    }


    // Reset read-undef flags and update them later.

    for (MachineOperand &Op : MI->all_defs())

      Op.setIsUndef(false);

    RegisterOperands RegOpers;

    RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);

    if (DAG.ShouldTrackLaneMasks) {

      // Adjust liveness and add missing dead+read-undef flags.

      SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();

      RegOpers.adjustLaneLiveness(*DAG.LIS, DAG.MRI, SlotIdx, MI);

    } else {

      // Adjust for missing dead-def flags.

      RegOpers.detectDeadDefs(*MI, *DAG.LIS);

    }

    LLVM_DEBUG(dbgs() << "Scheduling " << *MI);

  }


  // The region end doesn't change throughout scheduling since it itself is

  // outside the region (whether that is a MBB end or a terminator MI).

  assert(RegionEnd == DAG.Regions[RegionIdx].second && "region end mismatch");

  DAG.Regions[RegionIdx].first = MIOrder.front();

}


/// Returns true when \p RD will already be in AGPR-form after the rewrite, so

/// no bridge copy is needed at this reaching definition.


static bool isReachingDefAGPRForm(MachineInstr *RD,

                                  const DenseSet<Register> &CandSrc2Regs,

                                  const SIInstrInfo &TII) {

  if (TII.isMAI(*RD))

    return true;

  if (RD->getOpcode() == AMDGPU::AV_MOV_B32_IMM_PSEUDO ||

      RD->getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO)

    return true;

  if (RD->isCopy() && CandSrc2Regs.contains(RD->getOperand(1).getReg()))

    return true;

  return false;

}


bool RewriteMFMAFormStage::isRewriteCandidate(MachineInstr *MI) const {

  if (!static_cast<const SIInstrInfo *>(DAG.TII)->isMAI(*MI))

    return false;

  if (AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) == -1)

    return false;

  // Reject candidates whose users force an unavoidable bridge copy.

  Register DstReg = MI->getOperand(0).getReg();

  for (const MachineOperand &Use : DAG.MRI.use_nodbg_operands(DstReg)) {

    if (!TII->isMAI(*Use.getParent()) && !Use.getParent()->isCopy())

      return false;

  }

  return true;

}


bool RewriteMFMAFormStage::initHeuristics(

    std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,

    DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,

    SmallPtrSetImpl<MachineInstr *> &CopyForDef) {

  bool Changed = false;


  // Collect the candidate group, its members share AGPR-form operands

  // post-rewrite, so reaching defs feeding any member don't need bridge copy.

  SmallPtrSet<MachineInstr *, 16> RewriteSet;

  DenseSet<Register> CandSrc2Regs;

  for (MachineBasicBlock &MBB : MF) {

    for (MachineInstr &MI : MBB) {

      if (!isRewriteCandidate(&MI))

        continue;

      RewriteSet.insert(&MI);

      MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);

      if (Src2 && Src2->isReg())

        CandSrc2Regs.insert(Src2->getReg());

    }

  }


  // Prepare for the heuristics

  for (MachineBasicBlock &MBB : MF) {

    for (MachineInstr &MI : MBB) {

      if (!isRewriteCandidate(&MI))

        continue;


      int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode());

      assert(ReplacementOp != -1);


      RewriteCands.push_back({&MI, MI.getOpcode()});

      MI.setDesc(TII->get(ReplacementOp));


      MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);

      if (Src2->isReg()) {

        SmallVector<SlotIndex, 8> Src2ReachingDefs;

        findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);


        for (SlotIndex RDIdx : Src2ReachingDefs) {

          MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx);

          if (isReachingDefAGPRForm(RD, CandSrc2Regs, *TII))

            continue;

          CopyForDef.insert(RD);

        }

      }


      MachineOperand &Dst = MI.getOperand(0);

      SmallVector<MachineOperand *, 8> DstReachingUses;


      findReachingUses(&MI, DAG.LIS, DstReachingUses);


      for (MachineOperand *RUOp : DstReachingUses) {

        MachineInstr *UserMI = RUOp->getParent();

        // Group members read the AGPR result directly.

        if (TII->isMAI(*UserMI) && RewriteSet.contains(UserMI))

          continue;


        // For any user of the result of the MFMA which is not an MFMA, we

        // insert a copy. For a given register, we will only insert one copy

        // per user block.

        CopyForUse[UserMI->getParent()].insert(RUOp->getReg());


        if (TII->isMAI(*UserMI))

          continue;


        SmallVector<SlotIndex, 8> DstUsesReachingDefs;

        findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs);


        for (SlotIndex RDIndex : DstUsesReachingDefs) {

          MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);

          if (TII->isMAI(*RD))

            continue;


          // For any definition of the user of the MFMA which is not an MFMA,

          // we insert a copy. We do this to transform all the reaching defs

          // of this use to AGPR. By doing this, we can insert a copy from

          // AGPR to VGPR at the user rather than after the MFMA.

          CopyForDef.insert(RD);

        }

      }


      // Do the rewrite to allow for updated RP calculation.

      const TargetRegisterClass *VDefRC = DAG.MRI.getRegClass(Dst.getReg());

      const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);

      DAG.MRI.setRegClass(Dst.getReg(), ADefRC);

      if (Src2->isReg()) {

        // Have to get src types separately since subregs may cause C and D

        // registers to be different types even though the actual operand is

        // the same size.

        const TargetRegisterClass *VUseRC = DAG.MRI.getRegClass(Src2->getReg());

        const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);

        DAG.MRI.setRegClass(Src2->getReg(), AUseRC);

      }

      Changed = true;

    }

  }


  return Changed;

}


int64_t RewriteMFMAFormStage::getRewriteCost(

    const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,

    const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,

    const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {

  MachineBlockFrequencyInfo *MBFI = DAG.MBFI;


  int64_t BestSpillCost = 0;

  int64_t Cost = 0;

  uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency();


  std::pair<unsigned, unsigned> MaxVectorRegs =

      ST.getMaxNumVectorRegs(MF.getFunction());

  unsigned ArchVGPRThreshold = MaxVectorRegs.first;

  unsigned AGPRThreshold = MaxVectorRegs.second;

  unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF);


  for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {

    if (!RegionsWithExcessArchVGPR[Region])

      continue;


    GCNRegPressure &PressureBefore = DAG.Pressure[Region];

    unsigned SpillCostBefore = PressureBefore.getVGPRSpills(

        MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);


    // For the cases we care about (i.e. ArchVGPR usage is greater than the

    // addressable limit), rewriting alone should bring pressure to manageable

    // level. If we find any such region, then the rewrite is potentially

    // beneficial.

    GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region);

    unsigned SpillCostAfter = PressureAfter.getVGPRSpills(

        MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);


    uint64_t BlockFreq =

        MBFI->getBlockFreq(DAG.Regions[Region].first->getParent())

            .getFrequency();


    bool RelativeFreqIsDenom = EntryFreq > BlockFreq;

    uint64_t RelativeFreq = EntryFreq && BlockFreq

                                ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq

                                                       : BlockFreq / EntryFreq)

                                : 1;


    // This assumes perfect spilling / splitting -- using one spill / copy

    // instruction and one restoreFrom / copy for each excess register,

    int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;


    // Also account for the block frequency.

    if (RelativeFreqIsDenom)

      SpillCost /= (int64_t)RelativeFreq;

    else

      SpillCost *= (int64_t)RelativeFreq;


    // If we have increased spilling in any block, just bail.

    if (SpillCost > 0)

      return SpillCost;


    if (SpillCost < BestSpillCost)

      BestSpillCost = SpillCost;

  }


  // Set the cost to the largest decrease in spill cost in order to not double

  // count spill reductions.

  Cost = BestSpillCost;

  assert(Cost <= 0);


  unsigned CopyCost = 0;


  // For each CopyForDef, increase the cost by the register size while

  // accounting for block frequency.

  for (MachineInstr *DefMI : CopyForDef) {

    Register DefReg = DefMI->getOperand(0).getReg();

    uint64_t DefFreq =

        EntryFreq

            ? MBFI->getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq

            : 1;


    const TargetRegisterClass *RC = DAG.MRI.getRegClass(DefReg);

    CopyCost += RC->getCopyCost() * DefFreq;

  }


  // Account for CopyForUse copies in each block that the register is used.

  for (auto &[UseBlock, UseRegs] : CopyForUse) {

    uint64_t UseFreq =

        EntryFreq ? MBFI->getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1;


    for (Register UseReg : UseRegs) {

      const TargetRegisterClass *RC = DAG.MRI.getRegClass(UseReg);

      CopyCost += RC->getCopyCost() * UseFreq;

    }

  }


  // Reset the classes that were changed to AGPR for better RB analysis.

  // We must do rewriting after copy-insertion, as some defs of the register

  // may require VGPR.  Additionally, if we bail out and don't perform the

  // rewrite then these need to be restored anyway.

  for (auto &[MI, OriginalOpcode] : RewriteCands) {

    assert(TII->isMAI(*MI));

    const TargetRegisterClass *ADefRC =

        DAG.MRI.getRegClass(MI->getOperand(0).getReg());

    const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);

    DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VDefRC);

    MI->setDesc(TII->get(OriginalOpcode));


    MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);

    assert(Src2);

    if (!Src2->isReg())

      continue;


    // Have to get src types separately since subregs may cause C and D

    // registers to be different types even though the actual operand is

    // the same size.

    const TargetRegisterClass *AUseRC = DAG.MRI.getRegClass(Src2->getReg());

    const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);

    DAG.MRI.setRegClass(Src2->getReg(), VUseRC);

  }


  return Cost + CopyCost;

}


bool RewriteMFMAFormStage::rewrite(

    const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {

  DenseMap<MachineInstr *, unsigned> FirstMIToRegion;

  DenseMap<MachineInstr *, unsigned> LastMIToRegion;


  for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) {

    RegionBoundaries Entry = DAG.Regions[Region];

    if (Entry.first == Entry.second)

      continue;


    FirstMIToRegion[&*Entry.first] = Region;

    if (Entry.second != Entry.first->getParent()->end())

      LastMIToRegion[&*Entry.second] = Region;

  }


  // Rewrite the MFMAs to AGPR, and insert any copies as needed.

  // The general assumption of the algorithm (and the previous cost calculation)

  // is that it is better to insert the copies in the MBB of the def of the src2

  // operands, and in the MBB of the user of the dest operands. This is based on

  // the assumption that the MFMAs are likely to appear in loop bodies, while

  // the src2 and dest operands are live-in / live-out of the loop. Due to this

  // design, the algorithm for finding copy insertion points is more

  // complicated.

  //

  // There are three main cases to handle: 1. the reaching defs of the src2

  // operands, 2. the reaching uses of the dst operands, and 3. the reaching

  // defs of the reaching uses of the dst operand.

  //

  // In the first case, we simply insert copies after each of the reaching

  // definitions. In the second case, we collect all the uses of a given dest

  // and organize them by MBB. Then, we insert 1 copy for each MBB before the

  // earliest use. Since the use may have multiple reaching defs, and since we

  // want to replace the register it is using with the result of the copy, we

  // must handle case 3. In the third case, we simply insert a copy after each

  // of the reaching defs to connect to the copy of the reaching uses of the dst

  // reg. This allows us to avoid inserting copies next to the MFMAs.

  //

  // While inserting the copies, we maintain a map of operands which will use

  // different regs (i.e. the result of the copies). For example, a case 1 src2

  // operand will use the register result of the copies after the reaching defs,

  // as opposed to the original register. Now that we have completed our copy

  // analysis and placement, we can bulk update the registers. We do this

  // separately as to avoid complicating the reachingDef and reachingUse

  // queries.

  //

  // While inserting the copies, we also maintain a list or registers which we

  // will want to reclassify as AGPR. After doing the copy insertion and the

  // register replacement, we can finally do the reclassification. This uses the

  // redef map, as the registers we are interested in reclassifying may be

  // replaced by the result of a copy. We must do this after the copy analysis

  // and placement as we must have an accurate redef map -- otherwise we may end

  // up creating illegal instructions.


  // The original registers of the MFMA that need to be reclassified as AGPR.

  DenseSet<Register> RewriteRegs;

  // The map of an original register in the MFMA to a new register (result of a

  // copy) that it should be replaced with.

  DenseMap<Register, Register> RedefMap;

  // The map of the original MFMA registers to the relevant MFMA operands.

  DenseMap<Register, DenseSet<MachineOperand *>> ReplaceMap;

  // The map of reaching defs for a given register -- to avoid duplicate copies.

  DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;

  // The map of reaching uses for a given register by basic block -- to avoid

  // duplicate copies and to calculate per MBB insert pts.

  DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>

      ReachingUseTracker;


  // Collect the candidate group; its members share AGPR-form operands

  // post-rewrite, so reaching defs feeding any member need no bridge copy.

  SmallPtrSet<MachineInstr *, 16> RewriteCandsSet;

  DenseSet<Register> RewriteSrc2Regs;

  for (auto &[MI, OriginalOpcode] : RewriteCands) {

    RewriteCandsSet.insert(MI);

    MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);

    if (Src2 && Src2->isReg())

      RewriteSrc2Regs.insert(Src2->getReg());

  }


  for (auto &[MI, OriginalOpcode] : RewriteCands) {

    int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode());

    if (ReplacementOp == -1)

      continue;

    MI->setDesc(TII->get(ReplacementOp));


    // Case 1: insert copies for the reaching defs of the Src2Reg.

    MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2);

    if (Src2->isReg()) {

      Register Src2Reg = Src2->getReg();

      if (!Src2Reg.isVirtual())

        return false;


      Register MappedReg = Src2->getReg();

      SmallVector<SlotIndex, 8> Src2ReachingDefs;

      findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs);

      SmallSetVector<MachineInstr *, 8> Src2DefsReplace;


      for (SlotIndex RDIndex : Src2ReachingDefs) {

        MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);

        if (isReachingDefAGPRForm(RD, RewriteSrc2Regs, *TII))

          continue;


        Src2DefsReplace.insert(RD);

      }


      if (!Src2DefsReplace.empty()) {

        auto RI = RedefMap.find(Src2Reg);

        if (RI != RedefMap.end()) {

          MappedReg = RI->second;

        } else {

          assert(!ReachingDefCopyMap.contains(Src2Reg));

          const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg);

          const TargetRegisterClass *VGPRRC =

              SRI->getEquivalentVGPRClass(Src2RC);


          // Track the mapping of the original register to the new register.

          MappedReg = DAG.MRI.createVirtualRegister(VGPRRC);

          RedefMap[Src2Reg] = MappedReg;

        }


        // If none exists, create a copy from this reaching def.

        // We may have inserted a copy already in an earlier iteration.

        for (MachineInstr *RD : Src2DefsReplace) {

          // Do not create redundant copies.

          if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {

            MachineInstrBuilder VGPRCopy =

                BuildMI(*RD->getParent(), std::next(RD->getIterator()),

                        RD->getDebugLoc(), TII->get(TargetOpcode::COPY))

                    .addDef(MappedReg, {}, 0)

                    .addUse(Src2Reg, {}, 0);

            DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);


            // If this reaching def was the last MI in the region, update the

            // region boundaries.

            if (LastMIToRegion.contains(RD)) {

              unsigned UpdateRegion = LastMIToRegion[RD];

              DAG.Regions[UpdateRegion].second = VGPRCopy;

              LastMIToRegion.erase(RD);

            }

          }

        }

      }


      // Track the register for reclassification

      RewriteRegs.insert(Src2Reg);


      // Always insert the operand for replacement. If this corresponds with a

      // chain of tied-def we may not see the VGPR requirement until later.

      ReplaceMap[Src2Reg].insert(Src2);

    }


    // Case 2 and Case 3: insert copies before the reaching uses of the dsts,

    // and after the reaching defs of the reaching uses of the dsts.


    MachineOperand *Dst = &MI->getOperand(0);

    Register DstReg = Dst->getReg();

    if (!DstReg.isVirtual())

      return false;


    Register MappedReg = DstReg;

    SmallVector<MachineOperand *, 8> DstReachingUses;


    SmallVector<MachineOperand *, 8> DstReachingUseCopies;

    SmallVector<MachineInstr *, 8> DstUseDefsReplace;


    findReachingUses(MI, DAG.LIS, DstReachingUses);


    for (MachineOperand *RUOp : DstReachingUses) {

      MachineInstr *UserMI = RUOp->getParent();

      // Group members read the AGPR result directly.

      if (TII->isMAI(*UserMI) && RewriteCandsSet.contains(UserMI))

        continue;


      // If there is a non mai reaching use, then we need a copy.

      if (find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.end())

        DstReachingUseCopies.push_back(RUOp);


      // Non-rewritten MAI: its defs aren't being reclassified.

      if (TII->isMAI(*UserMI))

        continue;


      SmallVector<SlotIndex, 8> DstUsesReachingDefs;

      findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs);


      for (SlotIndex RDIndex : DstUsesReachingDefs) {

        MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex);

        if (TII->isMAI(*RD))

          continue;


        // If there is a non mai reaching def of this reaching use, then we will

        // need a copy.

        if (find(DstUseDefsReplace, RD) == DstUseDefsReplace.end())

          DstUseDefsReplace.push_back(RD);

      }

    }


    if (!DstUseDefsReplace.empty()) {

      auto RI = RedefMap.find(DstReg);

      if (RI != RedefMap.end()) {

        MappedReg = RI->second;

      } else {

        assert(!ReachingDefCopyMap.contains(DstReg));

        const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg);

        const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);


        // Track the mapping of the original register to the new register.

        MappedReg = DAG.MRI.createVirtualRegister(VGPRRC);

        RedefMap[DstReg] = MappedReg;

      }


      // If none exists, create a copy from this reaching def.

      // We may have inserted a copy already in an earlier iteration.

      for (MachineInstr *RD : DstUseDefsReplace) {

        // Do not create reundant copies.

        if (ReachingDefCopyMap[DstReg].insert(RD).second) {

          MachineInstrBuilder VGPRCopy =

              BuildMI(*RD->getParent(), std::next(RD->getIterator()),

                      RD->getDebugLoc(), TII->get(TargetOpcode::COPY))

                  .addDef(MappedReg, {}, 0)

                  .addUse(DstReg, {}, 0);

          DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);


          // If this reaching def was the last MI in the region, update the

          // region boundaries.

          auto LMI = LastMIToRegion.find(RD);

          if (LMI != LastMIToRegion.end()) {

            unsigned UpdateRegion = LMI->second;

            DAG.Regions[UpdateRegion].second = VGPRCopy;

            LastMIToRegion.erase(RD);

          }

        }

      }

    }


    DenseSet<MachineOperand *> &DstRegSet = ReplaceMap[DstReg];

    for (MachineOperand *RU : DstReachingUseCopies) {

      MachineBasicBlock *RUBlock = RU->getParent()->getParent();

      // Just keep track of the reaching use of this register by block. After we

      // have scanned all the MFMAs we can find optimal insert pts.

      if (RUBlock != MI->getParent()) {

        ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU);

        continue;

      }


      // Special case, the use is in the same block as the MFMA. Insert the copy

      // just before the use.

      const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg);

      const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);

      Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC);

      MachineInstr *UseInst = RU->getParent();

      MachineInstrBuilder VGPRCopy =

          BuildMI(*UseInst->getParent(), UseInst->getIterator(),

                  UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY))

              .addDef(NewUseReg, {}, 0)

              .addUse(DstReg, {}, 0);

      DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);

      // Since we know this use has only one reaching def, we can replace the

      // use reg.

      RU->setReg(NewUseReg);

      // Track the copy source operand for r eplacement.

      DstRegSet.insert(&VGPRCopy->getOperand(1));

    }


    // Track the register for reclassification

    RewriteRegs.insert(DstReg);


    // Insert the dst operand for replacement. If this dst is in a chain of

    // tied-def MFMAs, and the first src2 needs to be replaced with a new reg,

    // all the correspond operands need to be replaced.

    DstRegSet.insert(Dst);

  }


  // Handle the copies for dst uses.

  using RUBType =

      std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;

  for (RUBType RUBlockEntry : ReachingUseTracker) {

    using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;

    for (RUDType RUDst : RUBlockEntry.second) {

      MachineOperand *OpBegin = *RUDst.second.begin();

      SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent());


      // Find the earliest use in this block.

      for (MachineOperand *User : RUDst.second) {

        SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent());

        if (SlotIndex::isEarlierInstr(NewInstPt, InstPt))

          InstPt = NewInstPt;

      }


      const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(RUDst.first);

      const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);

      Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC);

      MachineInstr *UseInst = DAG.LIS->getInstructionFromIndex(InstPt);


      MachineInstrBuilder VGPRCopy =

          BuildMI(*UseInst->getParent(), UseInst->getIterator(),

                  UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY))

              .addDef(NewUseReg, {}, 0)

              .addUse(RUDst.first, {}, 0);

      DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);


      // If this UseInst was the first MI in the region, update the region

      // boundaries.

      auto FI = FirstMIToRegion.find(UseInst);

      if (FI != FirstMIToRegion.end()) {

        unsigned UpdateRegion = FI->second;

        DAG.Regions[UpdateRegion].first = VGPRCopy;

        FirstMIToRegion.erase(UseInst);

      }


      // Replace the operand for all users.

      for (MachineOperand *User : RUDst.second) {

        User->setReg(NewUseReg);

      }


      // Track the copy source operand for replacement.

      ReplaceMap[RUDst.first].insert(&VGPRCopy->getOperand(1));

    }

  }


  // We may have needed to insert copies after the reaching defs of the MFMAs.

  // Replace the original register with the result of the copy for all relevant

  // operands.

  for (std::pair<Register, Register> NewDef : RedefMap) {

    Register OldReg = NewDef.first;

    Register NewReg = NewDef.second;


    // Replace the register for any associated operand in the MFMA chain.

    for (MachineOperand *ReplaceOp : ReplaceMap[OldReg])

      ReplaceOp->setReg(NewReg);

  }


  // Finally, do the reclassification of the MFMA registers.

  for (Register RewriteReg : RewriteRegs) {

    Register RegToRewrite = RewriteReg;


    // Be sure to update the replacement register and not the original.

    auto RI = RedefMap.find(RewriteReg);

    if (RI != RedefMap.end())

      RegToRewrite = RI->second;


    const TargetRegisterClass *CurrRC = DAG.MRI.getRegClass(RegToRewrite);

    const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);


    DAG.MRI.setRegClass(RegToRewrite, AGPRRC);

  }


  // Bulk update the LIS.

  DAG.LIS->reanalyze(DAG.MF);

  // Liveins may have been modified for cross RC copies

  RegionPressureMap LiveInUpdater(&DAG, false);

  LiveInUpdater.buildLiveRegMap();


  for (unsigned Region = 0; Region < DAG.Regions.size(); Region++)

    DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);


  DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx);


  return true;

}


unsigned PreRARematStage::getStageTargetOccupancy() const {

  return TargetOcc ? *TargetOcc : MFI.getMinWavesPerEU();

}


bool PreRARematStage::setObjective() {

  const Function &F = MF.getFunction();


  // Set up "spilling targets" for all regions.

  unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);

  unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);

  bool HasVectorRegisterExcess = false;

  for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {

    const GCNRegPressure &RP = DAG.Pressure[I];

    GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP);

    if (!Target.satisfied())

      TargetRegions.set(I);

    HasVectorRegisterExcess |= Target.hasVectorRegisterExcess();

  }


  if (HasVectorRegisterExcess || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {

    // In addition to register usage being above addressable limits, occupancy

    // below the minimum is considered like "spilling" as well.

    TargetOcc = std::nullopt;

  } else {

    // There is no spilling and room to improve occupancy; set up "increased

    // occupancy targets" for all regions.

    TargetOcc = DAG.MinOccupancy + 1;

    const unsigned VGPRBlockSize = MFI.getDynamicVGPRBlockSize();

    MaxSGPRs = ST.getMaxNumSGPRs(*TargetOcc, false);

    MaxVGPRs = ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);

    for (auto [I, Target] : enumerate(RPTargets)) {

      Target.setTarget(MaxSGPRs, MaxVGPRs);

      if (!Target.satisfied())

        TargetRegions.set(I);

    }

  }


  return TargetRegions.any();

}


bool PreRARematStage::ScoredRemat::maybeBeneficial(

    const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets) const {

  for (unsigned I : TargetRegions.set_bits()) {

    if (Live[I] && RPTargets[I].isSaveBeneficial(RPSave))

      return true;

  }

  return false;

}


PreRARematStage::ScoredRemat::FreqInfo::FreqInfo(

    MachineFunction &MF, const GCNScheduleDAGMILive &DAG) {

  assert(DAG.MLI && "MLI not defined in DAG");

  MachineBranchProbabilityInfo MBPI;

  MachineBlockFrequencyInfo MBFI(MF, MBPI, *DAG.MLI);


  const unsigned NumRegions = DAG.Regions.size();

  MinFreq = MBFI.getEntryFreq().getFrequency();

  MaxFreq = 0;

  Regions.reserve(NumRegions);

  for (unsigned I = 0; I < NumRegions; ++I) {

    MachineBasicBlock *MBB = DAG.Regions[I].first->getParent();

    uint64_t BlockFreq = MBFI.getBlockFreq(MBB).getFrequency();

    Regions.push_back(BlockFreq);

    if (BlockFreq && BlockFreq < MinFreq)

      MinFreq = BlockFreq;

    else if (BlockFreq > MaxFreq)

      MaxFreq = BlockFreq;

  }

  if (!MinFreq)

    return;


  // Scale everything down if frequencies are high.

  if (MinFreq >= ScaleFactor * ScaleFactor) {

    for (uint64_t &Freq : Regions)

      Freq /= ScaleFactor;

    MinFreq /= ScaleFactor;

    MaxFreq /= ScaleFactor;

  }

}


void PreRARematStage::ScoredRemat::init(RegisterIdx RegIdx,

                                        const FreqInfo &Freq,

                                        const Rematerializer &Remater,

                                        GCNScheduleDAGMILive &DAG) {

  this->RegIdx = RegIdx;

  const unsigned NumRegions = DAG.Regions.size();

  LiveIn.resize(NumRegions);

  LiveOut.resize(NumRegions);

  Live.resize(NumRegions);

  UnpredictableRPSave.resize(NumRegions);


  const Rematerializer::Reg &Reg = Remater.getReg(RegIdx);

  Register DefReg = Reg.getDefReg();

  assert(Reg.Uses.size() == 1 && "expected users in single region");

  const unsigned UseRegion = Reg.Uses.begin()->first;


  // Mark regions in which the rematerializable register is live.

  for (unsigned I = 0, E = NumRegions; I != E; ++I) {

    if (DAG.LiveIns[I].contains(DefReg))

      LiveIn.set(I);

    if (DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).contains(DefReg))

      LiveOut.set(I);


    // If the register is both unused and live-through in the region, the

    // latter's RP is guaranteed to decrease.

    if (!LiveIn[I] || !LiveOut[I] || I == UseRegion)

      UnpredictableRPSave.set(I);

  }

  Live |= LiveIn;

  Live |= LiveOut;

  RPSave.inc(DefReg, LaneBitmask::getNone(), Reg.Mask, DAG.MRI);


  // Get frequencies of defining and using regions. A rematerialization from the

  // least frequent region to the most frequent region will yield the greatest

  // in order to penalize rematerializations from or into regions whose

  int64_t DefOrMin = std::max(Freq.Regions[Reg.DefRegion], Freq.MinFreq);

  int64_t UseOrMax = Freq.Regions[UseRegion];

  if (!UseOrMax)

    UseOrMax = Freq.MaxFreq;

  FreqDiff = DefOrMin - UseOrMax;

}


void PreRARematStage::ScoredRemat::update(const BitVector &TargetRegions,

                                          ArrayRef<GCNRPTarget> RPTargets,

                                          const FreqInfo &FreqInfo,

                                          bool ReduceSpill) {

  MaxFreq = 0;

  RegionImpact = 0;

  for (unsigned I : TargetRegions.set_bits()) {

    if (!Live[I])

      continue;


    // The rematerialization must contribute positively in at least one

    // register class with usage above the RP target for this region to

    // contribute to the score.

    const GCNRPTarget &RegionTarget = RPTargets[I];

    const unsigned NumRegsBenefit = RegionTarget.getNumRegsBenefit(RPSave);

    if (!NumRegsBenefit)

      continue;


    // Regions in which RP is guaranteed to decrease have more weight.

    RegionImpact += (UnpredictableRPSave[I] ? 1 : 2) * NumRegsBenefit;


    if (ReduceSpill) {

      uint64_t Freq = FreqInfo.Regions[I];

      if (UnpredictableRPSave[I]) {

        // Apply a frequency penalty in regions in which we are not sure that RP

        // will decrease.

        Freq /= 2;

      }

      MaxFreq = std::max(MaxFreq, Freq);

    }

  }

}


void PreRARematStage::ScoredRemat::rematerialize(

    Rematerializer &Remater) const {

  const Rematerializer::Reg &Reg = Remater.getReg(RegIdx);

  Rematerializer::DependencyReuseInfo DRI;

  for (const Rematerializer::Reg::Dependency &Dep : Reg.Dependencies)

    DRI.reuse(Dep.RegIdx);

  unsigned UseRegion = Reg.Uses.begin()->first;

  Remater.rematerializeToRegion(RegIdx, UseRegion, DRI);

}


void PreRARematStage::updateRPTargets(const BitVector &Regions,

                                      const GCNRegPressure &RPSave) {

  for (unsigned I : Regions.set_bits()) {

    RPTargets[I].saveRP(RPSave);

    if (TargetRegions[I] && RPTargets[I].satisfied()) {

      REMAT_DEBUG(dbgs() << "  [" << I << "] Target reached!\n");

      TargetRegions.reset(I);

    }

  }

}


bool PreRARematStage::updateAndVerifyRPTargets(const BitVector &Regions) {

  bool TooOptimistic = false;

  for (unsigned I : Regions.set_bits()) {

    GCNRPTarget &Target = RPTargets[I];

    Target.setRP(DAG.getRealRegPressure(I));


    // Since we were optimistic in assessing RP decreases in these regions, we

    // may need to remark the target as a target region if RP didn't decrease

    // as expected.

    if (!TargetRegions[I] && !Target.satisfied()) {

      REMAT_DEBUG(dbgs() << "  [" << I << "] Incorrect RP estimation\n");

      TooOptimistic = true;

      TargetRegions.set(I);

    }

  }

  return TooOptimistic;

}


void PreRARematStage::removeFromLiveMaps(Register Reg, const BitVector &LiveIn,

                                         const BitVector &LiveOut) {

  assert(LiveIn.size() == DAG.Regions.size() &&

         LiveOut.size() == DAG.Regions.size() && "region num mismatch");

  for (unsigned I : LiveIn.set_bits())

    DAG.LiveIns[I].erase(Reg);

  for (unsigned I : LiveOut.set_bits())

    DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).erase(Reg);

}


void PreRARematStage::addToLiveMaps(Register Reg, LaneBitmask Mask,

                                    const BitVector &LiveIn,

                                    const BitVector &LiveOut) {

  assert(LiveIn.size() == DAG.Regions.size() &&

         LiveOut.size() == DAG.Regions.size() && "region num mismatch");

  std::pair<Register, LaneBitmask> LiveReg(Reg, Mask);

  for (unsigned I : LiveIn.set_bits())

    DAG.LiveIns[I].insert(LiveReg);

  for (unsigned I : LiveOut.set_bits())

    DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I).insert(LiveReg);

}


void PreRARematStage::finalizeGCNSchedStage() {

  // We consider that reducing spilling is always beneficial so we never

  // rollback rematerializations or revert scheduling in such cases.

  if (!TargetOcc)

    return;


  // When increasing occupancy, it is possible that re-scheduling is not able to

  // achieve the target occupancy in all regions, in which case re-scheduling in

  // all regions should be reverted.

  if (DAG.MinOccupancy >= *TargetOcc)

    return;


  // Revert re-scheduling in all affected regions.

  for (const auto &[RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {

    REMAT_DEBUG(dbgs() << "Reverting re-scheduling in region " << RegionIdx

                       << '\n');

    DAG.Pressure[RegionIdx] = MaxPressure;

    modifyRegionSchedule(RegionIdx, OrigMIOrder);

  }


  // It is possible that re-scheduling lowers occupancy over the one achieved

  // just through rematerializations, in which case we revert re-scheduling in

  // all regions but do not roll back rematerializations.

  if (AchievedOcc >= *TargetOcc) {

    DAG.setTargetOccupancy(AchievedOcc);

    return;

  }


  // Reset the target occupancy to what it was pre-rematerialization.

  DAG.setTargetOccupancy(*TargetOcc - 1);


  // Roll back changes made by the stage, then recompute pressure in all

  // affected regions.

  REMAT_DEBUG(dbgs() << "==== ROLLBACK ====\n");

  assert(Rollback && "rollbacker should be defined");

  Rollback->Listener.rollback(Remater);

  for (const auto &[RegIdx, LiveIn, LiveOut] : Rollback->LiveMapUpdates) {

    const Rematerializer::Reg &Reg = Remater.getReg(RegIdx);

    addToLiveMaps(Reg.getDefReg(), Reg.Mask, LiveIn, LiveOut);

  }


#ifdef EXPENSIVE_CHECKS

  // In particular, we want to check for coherent MI/slot order in regions in

  // which reverts and/or rollbacks may have happened.

  MF.verify();

#endif

  for (unsigned I : RescheduleRegions.set_bits())

    DAG.Pressure[I] = DAG.getRealRegPressure(I);


  GCNSchedStage::finalizeGCNSchedStage();

}


void GCNScheduleDAGMILive::setTargetOccupancy(unsigned TargetOccupancy) {

  MinOccupancy = TargetOccupancy;

  if (MFI.getOccupancy() < TargetOccupancy)

    MFI.increaseOccupancy(MF, MinOccupancy);

  else

    MFI.limitOccupancy(MinOccupancy);

}


static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {

  const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);

  return any_of(*DAG, [SII](MachineBasicBlock::iterator MI) {

    return SII->isIGLPMutationOnly(MI->getOpcode());

  });

}


GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(

    MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,

    bool RemoveKillFlags)

    : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}


void GCNPostScheduleDAGMILive::schedule() {

  HasIGLPInstrs = hasIGLPInstrs(this);

  if (HasIGLPInstrs) {

    SavedMutations.clear();

    SavedMutations.swap(Mutations);

    addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));

  }


  ScheduleDAGMI::schedule();

}


void GCNPostScheduleDAGMILive::finalizeSchedule() {

  if (HasIGLPInstrs)

    SavedMutations.swap(Mutations);


  ScheduleDAGMI::finalizeSchedule();

}


UseMI
MachineInstrBuilder & UseMI
Definition AArch64ExpandPseudoInsts.cpp:127

DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition AArch64ExpandPseudoInsts.cpp:128

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUBaseInfo.h

pickOnlyChoice
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
Definition AMDGPUCoExecSchedStrategy.cpp:36

AMDGPUIGroupLP.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

BitVector.h
This file implements the BitVector class.

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CalcSpillWeights.h

GCNHazardRecognizer.h

GCNRegPressure.h
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...

GCNTrackers
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))

DisableClusteredLowOccupancy
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))

REMAT_PREFIX
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
Definition GCNSchedStrategy.cpp:1430

getLastMIForRegion
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
Definition GCNSchedStrategy.cpp:1037

shouldCheckPending
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
Definition GCNSchedStrategy.cpp:395

RelaxedOcc
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))

REMAT_DEBUG
#define REMAT_DEBUG(X)
Definition GCNSchedStrategy.cpp:1431

DisableUnclusterHighRP
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))

printScheduleModel
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
Definition GCNSchedStrategy.cpp:1980

PrintMaxRPRegUsageAfterScheduler
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))

hasIGLPInstrs
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
Definition GCNSchedStrategy.cpp:3134

isReachingDefAGPRForm
static bool isReachingDefAGPRForm(MachineInstr *RD, const DenseSet< Register > &CandSrc2Regs, const SIInstrInfo &TII)
Returns true when RD will already be in AGPR-form after the rewrite, so no bridge copy is needed at t...
Definition GCNSchedStrategy.cpp:2253

DisableRewriteMFMAFormSchedStage
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))

canUsePressureDiffs
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
Definition GCNSchedStrategy.cpp:185

PendingQueueLimit
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))

PrintMaxRPRegUsageBeforeScheduler
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))

ScheduleMetricBias
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))

GCNSchedStrategy.h

UseReg
static Register UseReg(const MachineOperand &MO)
Definition HexagonCopyToCombine.cpp:245

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

_
#define _
Definition HexagonMCCodeEmitter.cpp:46

ReplaceMap
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
Definition HipStdPar.cpp:442

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

LaneBitmask.h
A common definition of LaneBitmask for use in TableGen and CodeGen.

MCSchedule.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineBasicBlock.h

MachineBlockFrequencyInfo.h

MachineBranchProbabilityInfo.h

MachineOperand.h

Reg
Register Reg
Definition MachineSink.cpp:2126

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

T
#define T
Definition Mips16ISelLowering.cpp:282

SM
static constexpr unsigned SM(unsigned Version)
Definition NVPTXSubtarget.cpp:40

if
if(PassOpts->AAPipeline)
Definition PassBuilderBindings.cpp:64

RegisterClassInfo.h

Rematerializer.h
MIR-level target-independent rematerialization helpers.

SIMachineFunctionInfo.h

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

TargetRegistry.h

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::front
const T & front() const
Get the first element.
Definition ArrayRef.h:144

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::ArrayRef::empty
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136

llvm::BitVector
Definition BitVector.h:101

llvm::BitVector::reset
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409

llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159

llvm::BitVector::size
size_type size() const
Returns the number of bits in this bitvector.
Definition BitVector.h:178

llvm::BlockFrequency::getFrequency
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
Definition BlockFrequency.h:39

llvm::ClusteredLowOccStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1410

llvm::ClusteredLowOccStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2139

llvm::ClusteredLowOccStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1819

llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition DenseMap.h:328

llvm::DenseMapBase::end
iterator end()
Definition DenseMap.h:81

llvm::DenseMapBase::contains
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169

llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239

llvm::DenseMap
Definition DenseMap.h:743

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition DenseSet.h:279

llvm::GCNDownwardRPTracker
Definition GCNRegPressure.h:407

llvm::GCNDownwardRPTracker::reset
bool reset(const MachineInstr &MI, MachineBasicBlock::const_iterator End, const LiveRegSet *LiveRegs=nullptr)
Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.
Definition GCNRegPressure.cpp:650

llvm::GCNDownwardRPTracker::bumpDownwardPressure
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
Definition GCNRegPressure.cpp:817

llvm::GCNHazardRecognizer
Definition GCNHazardRecognizer.h:33

llvm::GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:753

llvm::GCNMaxILPSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
Definition GCNSchedStrategy.cpp:758

llvm::GCNMaxMemoryClauseSchedStrategy::tryCandidate
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
Definition GCNSchedStrategy.cpp:860

llvm::GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:844

llvm::GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
Definition GCNSchedStrategy.cpp:740

llvm::GCNPostScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition GCNSchedStrategy.cpp:3157

llvm::GCNPostScheduleDAGMILive::schedule
void schedule() override
Orders nodes according to selected style.
Definition GCNSchedStrategy.cpp:3146

llvm::GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Definition GCNSchedStrategy.cpp:3141

llvm::GCNRPTarget
Models a register pressure target, allowing to evaluate and track register savings against that targe...
Definition GCNRegPressure.h:223

llvm::GCNRPTarget::getNumRegsBenefit
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
Definition GCNRegPressure.cpp:441

llvm::GCNRPTracker
Definition GCNRegPressure.h:321

llvm::GCNRPTracker::getPressure
GCNRegPressure getPressure() const
Definition GCNRegPressure.h:359

llvm::GCNSchedStage::initGCNRegion
virtual bool initGCNRegion()
Definition GCNSchedStrategy.cpp:1726

llvm::GCNSchedStage::S
GCNSchedStrategy & S
Definition GCNSchedStrategy.h:346

llvm::GCNSchedStage::PressureBefore
GCNRegPressure PressureBefore
Definition GCNSchedStrategy.h:366

llvm::GCNSchedStage::isRegionWithExcessRP
bool isRegionWithExcessRP() const
Definition GCNSchedStrategy.h:407

llvm::GCNSchedStage::modifyRegionSchedule
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
Definition GCNSchedStrategy.cpp:2180

llvm::GCNSchedStage::mayCauseSpilling
bool mayCauseSpilling(unsigned WavesAfter)
Definition GCNSchedStrategy.cpp:2170

llvm::GCNSchedStage::getScheduleMetrics
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
Definition GCNSchedStrategy.cpp:2001

llvm::GCNSchedStage::DAG
GCNScheduleDAGMILive & DAG
Definition GCNSchedStrategy.h:344

llvm::GCNSchedStage::StageID
const GCNSchedStageID StageID
Definition GCNSchedStrategy.h:354

llvm::GCNSchedStage::Unsched
std::vector< MachineInstr * > Unsched
Definition GCNSchedStrategy.h:363

llvm::GCNSchedStage::PressureAfter
GCNRegPressure PressureAfter
Definition GCNSchedStrategy.h:369

llvm::GCNSchedStage::MF
MachineFunction & MF
Definition GCNSchedStrategy.h:348

llvm::GCNSchedStage::finalizeGCNRegion
virtual void finalizeGCNRegion()
Definition GCNSchedStrategy.cpp:1850

llvm::GCNSchedStage::MFI
SIMachineFunctionInfo & MFI
Definition GCNSchedStrategy.h:350

llvm::GCNSchedStage::checkScheduling
void checkScheduling()
Definition GCNSchedStrategy.cpp:1881

llvm::GCNSchedStage::RegionIdx
unsigned RegionIdx
Definition GCNSchedStrategy.h:360

llvm::GCNSchedStage::computeSUnitReadyCycle
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
Definition GCNSchedStrategy.cpp:1956

llvm::GCNSchedStage::finalizeGCNSchedStage
virtual void finalizeGCNSchedStage()
Definition GCNSchedStrategy.cpp:1702

llvm::GCNSchedStage::initGCNSchedStage
virtual bool initGCNSchedStage()
Definition GCNSchedStrategy.cpp:1268

llvm::GCNSchedStage::shouldRevertScheduling
virtual bool shouldRevertScheduling(unsigned WavesAfter)
Definition GCNSchedStrategy.cpp:2069

llvm::GCNSchedStage::SavedMutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
Definition GCNSchedStrategy.h:371

llvm::GCNSchedStage::GCNSchedStage
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Definition GCNSchedStrategy.cpp:1264

llvm::GCNSchedStage::setupNewBlock
void setupNewBlock()
Definition GCNSchedStrategy.cpp:1836

llvm::GCNSchedStage::CurrentMBB
MachineBasicBlock * CurrentMBB
Definition GCNSchedStrategy.h:357

llvm::GCNSchedStage::ST
const GCNSubtarget & ST
Definition GCNSchedStrategy.h:352

llvm::GCNSchedStrategy
This is a minimal scheduler strategy.
Definition GCNSchedStrategy.h:48

llvm::GCNSchedStrategy::DownwardTracker
GCNDownwardRPTracker DownwardTracker
Definition GCNSchedStrategy.h:106

llvm::GCNSchedStrategy::useGCNTrackers
bool useGCNTrackers() const
Definition GCNSchedStrategy.h:162

llvm::GCNSchedStrategy::getRegisterPressures
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
Definition GCNSchedStrategy.cpp:201

llvm::GCNSchedStrategy::GCNSchedStrategy
GCNSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:108

llvm::GCNSchedStrategy::SchedStages
SmallVector< GCNSchedStageID, 4 > SchedStages
Definition GCNSchedStrategy.h:100

llvm::GCNSchedStrategy::HasHighPressure
bool HasHighPressure
Definition GCNSchedStrategy.h:118

llvm::GCNSchedStrategy::SGPRCriticalLimit
unsigned SGPRCriticalLimit
Definition GCNSchedStrategy.h:135

llvm::GCNSchedStrategy::MaxPressure
std::vector< unsigned > MaxPressure
Definition GCNSchedStrategy.h:89

llvm::GCNSchedStrategy::hasNextStage
bool hasNextStage() const
Definition GCNSchedStrategy.cpp:689

llvm::GCNSchedStrategy::TargetOccupancy
unsigned TargetOccupancy
Definition GCNSchedStrategy.h:95

llvm::GCNSchedStrategy::pickNodeBidirectional
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
Definition GCNSchedStrategy.cpp:502

llvm::GCNSchedStrategy::KnownExcessRP
bool KnownExcessRP
Definition GCNSchedStrategy.h:122

llvm::GCNSchedStrategy::getCurrentStage
GCNSchedStageID getCurrentStage()
Definition GCNSchedStrategy.cpp:674

llvm::GCNSchedStrategy::VGPRExcessLimit
unsigned VGPRExcessLimit
Definition GCNSchedStrategy.h:93

llvm::GCNSchedStrategy::MF
MachineFunction * MF
Definition GCNSchedStrategy.h:97

llvm::GCNSchedStrategy::tryPendingCandidate
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
Definition GCNSchedStrategy.cpp:699

llvm::GCNSchedStrategy::advanceStage
bool advanceStage()
Definition GCNSchedStrategy.cpp:679

llvm::GCNSchedStrategy::CurrentStage
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
Definition GCNSchedStrategy.h:103

llvm::GCNSchedStrategy::VGPRCriticalLimit
unsigned VGPRCriticalLimit
Definition GCNSchedStrategy.h:137

llvm::GCNSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition GCNSchedStrategy.cpp:664

llvm::GCNSchedStrategy::GCNTrackersOverride
std::optional< bool > GCNTrackersOverride
Definition GCNSchedStrategy.h:113

llvm::GCNSchedStrategy::getDownwardTracker
GCNDownwardRPTracker * getDownwardTracker()
Definition GCNSchedStrategy.h:168

llvm::GCNSchedStrategy::SGPRLimitBias
unsigned SGPRLimitBias
Definition GCNSchedStrategy.h:139

llvm::GCNSchedStrategy::SGPRExcessLimit
unsigned SGPRExcessLimit
Definition GCNSchedStrategy.h:91

llvm::GCNSchedStrategy::Pressure
std::vector< unsigned > Pressure
Definition GCNSchedStrategy.h:87

llvm::GCNSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition GCNSchedStrategy.cpp:115

llvm::GCNSchedStrategy::UpwardTracker
GCNUpwardRPTracker UpwardTracker
Definition GCNSchedStrategy.h:109

llvm::GCNSchedStrategy::printCandidateDecision
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
Definition GCNSchedStrategy.cpp:413

llvm::GCNSchedStrategy::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
Definition GCNSchedStrategy.cpp:431

llvm::GCNSchedStrategy::getStructuralStallCycles
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
Definition GCNSchedStrategy.cpp:235

llvm::GCNSchedStrategy::ErrorMargin
unsigned ErrorMargin
Definition GCNSchedStrategy.h:127

llvm::GCNSchedStrategy::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
Definition GCNSchedStrategy.cpp:272

llvm::GCNSchedStrategy::VGPRLimitBias
unsigned VGPRLimitBias
Definition GCNSchedStrategy.h:141

llvm::GCNSchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
Definition GCNSchedStrategy.cpp:599

llvm::GCNSchedStrategy::getUpwardTracker
GCNUpwardRPTracker * getUpwardTracker()
Definition GCNSchedStrategy.h:170

llvm::GCNSchedStrategy::getNextStage
GCNSchedStageID getNextStage() const
Definition GCNSchedStrategy.cpp:694

llvm::GCNScheduleDAGMILive
Definition GCNSchedStrategy.h:260

llvm::GCNScheduleDAGMILive::finalizeSchedule
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition GCNSchedStrategy.cpp:1160

llvm::GCNScheduleDAGMILive::schedule
void schedule() override
Orders nodes according to selected style.
Definition GCNSchedStrategy.cpp:1021

llvm::GCNScheduleDAGMILive::GCNScheduleDAGMILive
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition GCNSchedStrategy.cpp:978

llvm::GCNSubtarget
Definition GCNSubtarget.h:35

llvm::GCNUpwardRPTracker
Definition GCNRegPressure.h:374

llvm::GCNUpwardRPTracker::recede
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
Definition GCNRegPressure.cpp:587

llvm::GCNUpwardRPTracker::reset
void reset(const MachineInstr &MI)
Resets tracker to the point just after MI (in program order), which can be a debug instruction.
Definition GCNRegPressure.h:382

llvm::GenericSchedulerBase::traceCandidate
void traceCandidate(const SchedCandidate &Cand)
Definition MachineScheduler.cpp:3362

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition MachineScheduler.h:1220

llvm::GenericSchedulerBase::setPolicy
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition MachineScheduler.cpp:3281

llvm::GenericSchedulerBase::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition MachineScheduler.h:1218

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition MachineScheduler.h:1212

llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition MachineScheduler.h:1211

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition MachineScheduler.h:1105

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition MachineScheduler.h:1110

llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition MachineScheduler.h:1112

llvm::GenericSchedulerBase::ResourceReduce
@ ResourceReduce
Definition MachineScheduler.h:1111

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition MachineScheduler.h:1108

llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition MachineScheduler.h:1102

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition MachineScheduler.h:1106

llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition MachineScheduler.h:1117

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition MachineScheduler.h:1104

llvm::GenericSchedulerBase::Stall
@ Stall
Definition MachineScheduler.h:1107

llvm::GenericSchedulerBase::Weak
@ Weak
Definition MachineScheduler.h:1109

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition MachineScheduler.h:1213

llvm::GenericScheduler::BotCand
SchedCandidate BotCand
Candidate last picked from Bot boundary.
Definition MachineScheduler.h:1324

llvm::GenericScheduler::Top
SchedBoundary Top
Definition MachineScheduler.h:1315

llvm::GenericScheduler::TopCand
SchedCandidate TopCand
Candidate last picked from Top boundary.
Definition MachineScheduler.h:1322

llvm::GenericScheduler::TopClusterID
unsigned TopClusterID
Definition MachineScheduler.h:1318

llvm::GenericScheduler::Bot
SchedBoundary Bot
Definition MachineScheduler.h:1316

llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition MachineScheduler.cpp:3951

llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition MachineScheduler.h:1312

llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition MachineScheduler.cpp:3641

llvm::GenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
Definition MachineScheduler.cpp:4267

llvm::GenericScheduler::GenericScheduler
GenericScheduler(const MachineSchedContext *C)
Definition MachineScheduler.h:1269

llvm::GenericScheduler::BotClusterID
unsigned BotClusterID
Definition MachineScheduler.h:1319

llvm::ILPInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2158

llvm::LiveInterval::SubRange
A live range for subregisters.
Definition LiveInterval.h:705

llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition LiveInterval.h:698

llvm::LiveInterval::hasSubRanges
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition LiveInterval.h:821

llvm::LiveInterval::subranges
iterator_range< subrange_iterator > subranges()
Definition LiveInterval.h:793

llvm::LiveIntervals
Definition LiveIntervals.h:55

llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition LiveIntervals.h:253

llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition LiveIntervals.h:268

llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition LiveIntervals.h:133

llvm::LiveIntervals::dump
LLVM_ABI void dump() const
Definition LiveIntervals.cpp:219

llvm::LiveIntervals::getMBBFromIndex
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
Definition LiveIntervals.h:280

llvm::LiveRange::getVNInfoAt
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition LiveInterval.h:431

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition MachineBasicBlock.h:1274

llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition MachineBasicBlock.h:450

llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition MachineBasicBlock.h:462

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:384

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:386

llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition MachineBasicBlock.h:467

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:348

llvm::MachineBlockFrequencyInfo
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Definition MachineBlockFrequencyInfo.h:35

llvm::MachineBlockFrequencyInfo::getBlockFreq
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
Definition MachineBlockFrequencyInfo.cpp:261

llvm::MachineBlockFrequencyInfo::getEntryFreq
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Definition MachineBlockFrequencyInfo.cpp:308

llvm::MachineBranchProbabilityInfo
Definition MachineBranchProbabilityInfo.h:23

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:218

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:601

llvm::MachineInstr::isCopy
bool isCopy() const
Definition MachineInstr.h:1456

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:373

llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition MachineInstr.h:1158

llvm::MachineInstr::operands
mop_range operands()
Definition MachineInstr.h:707

llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition MachineInstr.h:525

llvm::MachineInstr::all_uses
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
Definition MachineInstr.h:778

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:609

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::getParent
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Definition MachineOperand.h:246

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MemoryClauseInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2165

llvm::OccInitialScheduleStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2087

llvm::PreRARematStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2152

llvm::PreRARematStage::finalizeGCNRegion
void finalizeGCNRegion() override
Definition GCNSchedStrategy.cpp:1864

llvm::PreRARematStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1831

llvm::PreRARematStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1441

llvm::PressureChange
Capture a change in pressure for a single pressure set.
Definition RegisterPressure.h:103

llvm::PressureChange::setUnitInc
void setUnitInc(int Inc)
Definition RegisterPressure.h:127

llvm::Printable
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition MachineScheduler.h:571

llvm::ReadyQueue::empty
bool empty() const
Definition MachineScheduler.h:586

llvm::ReadyQueue::size
unsigned size() const
Definition MachineScheduler.h:590

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition RegisterPressure.h:361

llvm::RegPressureTracker::advance
LLVM_ABI void advance()
Advance across the current instruction.
Definition RegisterPressure.cpp:936

llvm::RegPressureTracker::getDownwardPressure
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
Definition RegisterPressure.cpp:1375

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition RegisterPressure.h:467

llvm::RegPressureTracker::getUpwardPressure
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
Definition RegisterPressure.cpp:1359

llvm::RegionPressureMap::buildLiveRegMap
void buildLiveRegMap()
Definition GCNSchedStrategy.cpp:1144

llvm::Region
Definition RegionInfo.h:887

llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition RegisterPressure.h:167

llvm::RegisterOperands::collect
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition RegisterPressure.cpp:568

llvm::RegisterOperands::adjustLaneLiveness
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
Definition RegisterPressure.cpp:598

llvm::RegisterOperands::detectDeadDefs
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition RegisterPressure.cpp:579

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79

llvm::Rematerializer
MIR-level target-independent rematerializer.
Definition Rematerializer.h:97

llvm::RewriteMFMAFormStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1338

llvm::SIInstrInfo
Definition SIInstrInfo.h:101

llvm::SIInstrInfo::isIGLPMutationOnly
bool isIGLPMutationOnly(unsigned Opcode) const
Definition SIInstrInfo.h:1181

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:415

llvm::SIMachineFunctionInfo::getOccupancy
unsigned getOccupancy() const
Definition SIMachineFunctionInfo.h:1183

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition SIMachineFunctionInfo.h:851

llvm::SIMachineFunctionInfo::getMinAllowedOccupancy
unsigned getMinAllowedOccupancy() const
Definition SIMachineFunctionInfo.h:1187

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition ScheduleDAG.h:387

llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition ScheduleDAG.h:285

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition ScheduleDAG.h:277

llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition ScheduleDAG.h:312

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition ScheduleDAG.h:305

llvm::SUnit::ParentClusterIdx
unsigned ParentClusterIdx
The parent cluster id.
Definition ScheduleDAG.h:288

llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition ScheduleDAG.h:286

llvm::SUnit::hasReservedResource
bool hasReservedResource
Uses a reserved resource.
Definition ScheduleDAG.h:310

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition ScheduleDAG.h:476

llvm::SUnit::isTopReady
bool isTopReady() const
Definition ScheduleDAG.h:473

llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition ScheduleDAG.h:269

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition ScheduleDAG.h:399

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition MachineScheduler.h:863

llvm::SchedBoundary::releasePending
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
Definition MachineScheduler.cpp:3110

llvm::SchedBoundary::getLatencyStallCycles
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition MachineScheduler.cpp:2584

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition MachineScheduler.h:872

llvm::SchedBoundary::isTop
bool isTop() const
Definition MachineScheduler.h:987

llvm::SchedBoundary::pickOnlyChoice
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition MachineScheduler.cpp:3151

llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition MachineScheduler.h:879

llvm::SchedBoundary::Available
ReadyQueue Available
Definition MachineScheduler.h:876

llvm::SchedBoundary::bumpCycle
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
Definition MachineScheduler.cpp:2853

llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition MachineScheduler.h:995

llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition MachineScheduler.h:992

llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition MachineScheduler.h:877

llvm::SchedBoundary::checkHazard
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
Definition MachineScheduler.cpp:2703

llvm::SchedBoundary::getNextResourceCycle
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
Definition MachineScheduler.cpp:2622

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition ScheduleDAGInstrs.h:118

llvm::ScheduleDAGInstrs::ScheduleSingleMIRegions
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
Definition ScheduleDAGInstrs.h:131

llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition ScheduleDAGInstrs.h:153

llvm::ScheduleDAGInstrs::finalizeSchedule
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
Definition ScheduleDAGInstrs.h:366

llvm::ScheduleDAGInstrs::exitRegion
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Definition ScheduleDAGInstrs.cpp:208

llvm::ScheduleDAGInstrs::MLI
const MachineLoopInfo * MLI
Definition ScheduleDAGInstrs.h:120

llvm::ScheduleDAGInstrs::RemoveKillFlags
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
Definition ScheduleDAGInstrs.h:128

llvm::ScheduleDAGInstrs::RegionBegin
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
Definition ScheduleDAGInstrs.h:150

llvm::ScheduleDAGMILive::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition MachineScheduler.cpp:1692

llvm::ScheduleDAGMILive::ScheduleDAGMILive
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
Definition MachineScheduler.h:468

llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition MachineScheduler.h:452

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition MachineScheduler.h:314

llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition MachineScheduler.h:363

llvm::ScheduleDAGMI::schedule
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
Definition MachineScheduler.cpp:1058

llvm::ScheduleDAGMI::ScheduleDAGMI
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Definition MachineScheduler.h:337

llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition MachineScheduler.h:317

llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition MachineScheduler.h:322

llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition ScheduleDAG.h:591

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition ScheduleDAG.h:588

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition ScheduleDAG.h:590

llvm::ScheduleHazardRecognizer::isEnabled
bool isEnabled() const
Definition ScheduleHazardRecognizer.h:45

llvm::ScheduleMetrics
Definition GCNSchedStrategy.h:203

llvm::ScheduleMetrics::ScaleFactor
static const unsigned ScaleFactor
Definition GCNSchedStrategy.h:219

llvm::ScheduleMetrics::getMetric
unsigned getMetric() const
Definition GCNSchedStrategy.h:213

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151

llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66

llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition SlotIndexes.h:174

llvm::SlotIndex::isEarlierInstr
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition SlotIndexes.h:180

llvm::SlotIndex::getPrevSlot
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
Definition SlotIndexes.h:270

llvm::SlotIndexes::getMBBStartIdx
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition SlotIndexes.h:458

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:387

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition SmallPtrSet.h:467

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:533

llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134

llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition SmallSet.h:229

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition SmallVector.h:681

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:966

llvm::SmallVectorTemplateBase::pop_back
void pop_back()
Definition SmallVector.h:435

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition SmallVector.h:278

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:83

llvm::SmallVectorTemplateCommon::back
reference back()
Definition SmallVector.h:317

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::TargetRegisterClass::getCopyCost
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Definition TargetRegisterInfo.h:114

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::TargetSchedModel::hasInstrSchedModel
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
Definition TargetSchedule.cpp:35

llvm::TargetSchedModel::getMicroOpBufferSize
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
Definition TargetSchedule.h:173

llvm::UnclusteredHighRPStage::initGCNSchedStage
bool initGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1377

llvm::UnclusteredHighRPStage::initGCNRegion
bool initGCNRegion() override
Definition GCNSchedStrategy.cpp:1792

llvm::UnclusteredHighRPStage::finalizeGCNSchedStage
void finalizeGCNSchedStage() override
Definition GCNSchedStrategy.cpp:1707

llvm::UnclusteredHighRPStage::shouldRevertScheduling
bool shouldRevertScheduling(unsigned WavesAfter) override
Definition GCNSchedStrategy.cpp:2100

llvm::VNInfo
VNInfo - Value Number Information.
Definition LiveInterval.h:54

llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition LiveInterval.h:62

llvm::VNInfo::isPHIDef
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition LiveInterval.h:79

llvm::VirtRegAuxInfo::allUsesAvailableAt
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
Definition CalcSpillWeights.cpp:149

llvm::cl::opt
Definition CommandLine.h:1454

llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202

llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

uint64_t

Changed
Changed
Definition ObjCARCOpts.cpp:2366

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

false
Definition MachinePipeliner.cpp:245

llvm::AArch64::RP
@ RP
Definition AArch64ISelLowering.h:33

llvm::AMDGPU::Exp::Target
Target
Definition SIDefines.h:1023

llvm::AMDGPU::HSAMD::AddressSpaceQualifier::Region
@ Region
Definition AMDGPUMetadata.h:75

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1456

llvm::AMDGPU::IsaInfo::getAllocatedNumVGPRBlocks
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1570

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1399

llvm::AMDGPU::SchedulingPhase::PostRA
@ PostRA
Definition AMDGPUIGroupLP.h:19

llvm::AMDGPU::SchedulingPhase::PreRAReentry
@ PreRAReentry
Definition AMDGPUIGroupLP.h:19

llvm::AMDGPU::SchedulingPhase::Initial
@ Initial
Definition AMDGPUIGroupLP.h:19

llvm::AMDGPU::getMFMASrcCVDstAGPROp
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)

llvm::COFF::Entry
@ Entry
Definition COFF.h:862

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::objcarc::ARCInstKind::User
@ User
could "use" a pointer
Definition ObjCARCInstKind.h:52

llvm::rdf::Use
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::biasPhysReg
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop, bool BiasPRegsExtra=false)
Minimize physical register live ranges.
Definition MachineScheduler.cpp:3840

llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764

llvm::isEqual
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Definition GCNRegPressure.cpp:25

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition GCNRegPressure.cpp:246

llvm::getWeakLeft
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition MachineScheduler.cpp:3829

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::Latency
@ Latency
Definition SIMachineScheduler.h:34

llvm::NodeOrder
@ NodeOrder
Definition SIMachineScheduler.h:37

llvm::NoCand
@ NoCand
Definition SIMachineScheduler.h:32

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553

llvm::getRegPressure
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
Definition GCNRegPressure.h:554

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::createIGroupLPDAGMutation
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
Definition AMDGPUIGroupLP.cpp:2728

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546

llvm::RegionBoundaries
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
Definition GCNSchedStrategy.h:257

llvm::skipDebugInstructionsForward
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
Definition MachineBasicBlock.h:1488

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::tryPressure
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition MachineScheduler.cpp:3791

llvm::GCNSchedStageID
GCNSchedStageID
Definition GCNSchedStrategy.h:31

llvm::GCNSchedStageID::UnclusteredHighRPReschedule
@ UnclusteredHighRPReschedule
Definition GCNSchedStrategy.h:34

llvm::GCNSchedStageID::MemoryClauseInitialSchedule
@ MemoryClauseInitialSchedule
Definition GCNSchedStrategy.h:38

llvm::GCNSchedStageID::ILPInitialSchedule
@ ILPInitialSchedule
Definition GCNSchedStrategy.h:37

llvm::GCNSchedStageID::PreRARematerialize
@ PreRARematerialize
Definition GCNSchedStrategy.h:36

llvm::GCNSchedStageID::RewriteMFMAForm
@ RewriteMFMAForm
Definition GCNSchedStrategy.h:33

llvm::GCNSchedStageID::OccInitialSchedule
@ OccInitialSchedule
Definition GCNSchedStrategy.h:32

llvm::GCNSchedStageID::ClusteredLowOccupancyReschedule
@ ClusteredLowOccupancyReschedule
Definition GCNSchedStrategy.h:35

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::VerifyScheduling
LLVM_ABI cl::opt< bool > VerifyScheduling

llvm::tryLatency
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition MachineScheduler.cpp:3475

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::skipDebugInstructionsBackward
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
Definition MachineBasicBlock.h:1501

llvm::errs
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition raw_ostream.cpp:904

llvm::isTheSameCluster
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
Definition ScheduleDAG.h:244

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::tryGreater
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition MachineScheduler.cpp:3459

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition APFixedPoint.h:312

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916

llvm::getLiveRegMap
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
Definition GCNRegPressure.h:504

llvm::getLiveRegsBefore
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
Definition GCNRegPressure.h:547

llvm::tryLess
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition MachineScheduler.cpp:3443

llvm::dumpMaxRegPressure
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
Definition GCNRegPressure.cpp:1075

llvm::printMBBReference
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition MachineBasicBlock.cpp:120

std
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874

true
Definition SPIRVConvergenceRegionAnalysis.cpp:41

EarlierIssuingCycle
Definition GCNSchedStrategy.cpp:1973

EarlierIssuingCycle::operator()
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
Definition GCNSchedStrategy.cpp:1974

llvm::GCNRegPressure
Definition GCNRegPressure.h:32

llvm::GCNRegPressure::SGPR
@ SGPR
Definition GCNRegPressure.h:33

llvm::GCNRegPressure::VGPR
@ VGPR
Definition GCNRegPressure.h:33

llvm::GCNRegPressure::getArchVGPRNum
unsigned getArchVGPRNum() const
Definition GCNRegPressure.h:86

llvm::GCNRegPressure::getAGPRNum
unsigned getAGPRNum() const
Definition GCNRegPressure.h:88

llvm::GCNRegPressure::getSGPRNum
unsigned getSGPRNum() const
Definition GCNRegPressure.h:57

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition MachineScheduler.h:1126

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition MachineScheduler.h:1127

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition MachineScheduler.h:1164

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition MachineScheduler.h:1197

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition MachineScheduler.h:1168

llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition MachineScheduler.h:1185

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition MachineScheduler.cpp:3242

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition MachineScheduler.h:1177

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition MachineScheduler.h:1174

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition MachineScheduler.h:1180

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition MachineScheduler.h:1194

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition MachineScheduler.h:1171

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition MachineScheduler.h:1165

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition MachineScheduler.h:1144

llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition MachineScheduler.h:1146

llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition MachineScheduler.h:1149

llvm::LaneBitmask
Definition LaneBitmask.h:40

llvm::LaneBitmask::any
constexpr bool any() const
Definition LaneBitmask.h:53

llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition LaneBitmask.h:81

llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129

llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition MCSchedule.h:74

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition MachineScheduler.h:145

llvm::PreRARematStage::ScoredRemat::FreqInfo
Execution frequency information required by scoring heuristics.
Definition GCNSchedStrategy.h:568

llvm::PreRARematStage::ScoredRemat::FreqInfo::MaxFreq
uint64_t MaxFreq
Definition GCNSchedStrategy.h:572

llvm::PreRARematStage::ScoredRemat::FreqInfo::Regions
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
Definition GCNSchedStrategy.h:570

llvm::PreRARematStage::ScoredRemat::FreqInfo::MinFreq
uint64_t MinFreq
Minimum and maximum observed frequencies.
Definition GCNSchedStrategy.h:572

llvm::PreRARematStage::ScoredRemat::FreqInfo::FreqInfo
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
Definition GCNSchedStrategy.cpp:2907

llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition RegisterPressure.h:245

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition RegisterPressure.h:246

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition RegisterPressure.h:244

llvm::Rematerializer::DependencyReuseInfo::reuse
DependencyReuseInfo & reuse(RegisterIdx DepIdx)
Definition Rematerializer.h:319

llvm::Rematerializer::Reg::Dependency::RegIdx
RegisterIdx RegIdx
The corresponding register's index in the rematerializer.
Definition Rematerializer.h:133

llvm::Rematerializer::Reg
A rematerializable register defined by a single machine instruction.
Definition Rematerializer.h:115

llvm::Rematerializer::Reg::DefMI
MachineInstr * DefMI
Single MI defining the rematerializable register.
Definition Rematerializer.h:117

llvm::Rematerializer::Reg::Uses
SmallDenseMap< unsigned, RegionUsers, 2 > Uses
Uses of the register, mapped by region.
Definition Rematerializer.h:125

llvm::Rematerializer::Reg::getDefReg
Register getDefReg() const
Returns the rematerializable register from its defining instruction.
Definition Rematerializer.h:143

llvm::cl::desc
Definition CommandLine.h:410