docs/doxygen/AMDGPUCoExecSchedStrategy_8cpp_source.html

//===- AMDGPUCoExecSchedStrategy.cpp - CoExec Scheduling Strategy ---------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Coexecution-focused scheduling strategy for AMDGPU.

//

//===----------------------------------------------------------------------===//


#include "AMDGPUCoExecSchedStrategy.h"

#include "llvm/Support/Debug.h"


using namespace llvm;

using namespace llvm::AMDGPU;


#define DEBUG_TYPE "machine-scheduler"


namespace {


// Used to disable post-RA scheduling with function level granularity.

class GCNNoopPostScheduleDAG final : public ScheduleDAGInstrs {

public:

  explicit GCNNoopPostScheduleDAG(MachineSchedContext *C)

      : ScheduleDAGInstrs(*C->MF, C->MLI, /*RemoveKillFlags=*/true) {}


  // Do nothing.

  void schedule() override {}

};


} // namespace


static SUnit *pickOnlyChoice(SchedBoundary &Zone) {

  // pickOnlyChoice() releases pending instructions and checks for new hazards.

  SUnit *OnlyChoice = Zone.pickOnlyChoice();

  if (!Zone.Pending.empty())

    return nullptr;


  return OnlyChoice;

}


InstructionFlavor llvm::AMDGPU::classifyFlavor(const MachineInstr &MI,

                                               const SIInstrInfo &SII) {

  if (MI.isDebugInstr())

    return InstructionFlavor::Other;


  unsigned Opc = MI.getOpcode();


  // Check for specific opcodes first.

  if (Opc == AMDGPU::ATOMIC_FENCE || Opc == AMDGPU::S_WAIT_ASYNCCNT ||

      Opc == AMDGPU::S_WAIT_TENSORCNT || Opc == AMDGPU::S_BARRIER_WAIT ||

      Opc == AMDGPU::S_BARRIER_SIGNAL_IMM)

    return InstructionFlavor::Fence;


  if (SII.isLDSDMA(MI))

    return InstructionFlavor::DMA;


  if (SII.isMFMAorWMMA(MI))

    return InstructionFlavor::WMMA;


  if (SII.isTRANS(MI))

    return InstructionFlavor::TRANS;


  if (SII.isVALU(MI))

    return InstructionFlavor::SingleCycleVALU;


  if (SII.isDS(MI))

    return InstructionFlavor::DS;


  if (SII.isFLAT(MI) || SII.isFLATGlobal(MI) || SII.isFLATScratch(MI))

    return InstructionFlavor::VMEM;


  if (SII.isSALU(MI))

    return InstructionFlavor::SALU;


  return InstructionFlavor::Other;

}


SUnit *HardwareUnitInfo::getNextTargetSU(bool LookDeep) const {

  for (auto *PrioritySU : PrioritySUs) {

    if (!PrioritySU->isTopReady())

      return PrioritySU;

  }


  if (!LookDeep)

    return nullptr;


  unsigned MinDepth = std::numeric_limits<unsigned int>::max();

  SUnit *TargetSU = nullptr;

  for (auto *SU : AllSUs) {

    if (SU->isScheduled)

      continue;


    if (SU->isTopReady())

      continue;


    if (SU->getDepth() < MinDepth) {

      MinDepth = SU->getDepth();

      TargetSU = SU;

    }

  }

  return TargetSU;

}


void HardwareUnitInfo::insert(SUnit *SU, unsigned BlockingCycles) {

#ifndef NDEBUG

  bool Inserted = AllSUs.insert(SU);

  assert(Inserted);

#else

  AllSUs.insert(SU);

#endif


  TotalCycles += BlockingCycles;


  if (PrioritySUs.empty()) {

    PrioritySUs.insert(SU);

    return;

  }

  unsigned SUDepth = SU->getDepth();

  unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();

  if (SUDepth > CurrDepth)

    return;


  if (SUDepth == CurrDepth) {

    PrioritySUs.insert(SU);

    return;

  }


  // SU is lower depth and should be prioritized.

  PrioritySUs.clear();

  PrioritySUs.insert(SU);

}


void HardwareUnitInfo::markScheduled(SUnit *SU, unsigned BlockingCycles) {

  // We may want to ignore some HWUIs (e.g. InstructionFlavor::Other). To do so,

  // we just clear the HWUI. However, we still have instructions which map to

  // this HWUI. Don't bother managing the state for these HWUI.

  if (TotalCycles == 0)

    return;


  AllSUs.remove(SU);

  PrioritySUs.remove(SU);


  TotalCycles -= BlockingCycles;


  if (AllSUs.empty())

    return;

  if (PrioritySUs.empty()) {

    for (auto SU : AllSUs) {

      if (PrioritySUs.empty()) {

        PrioritySUs.insert(SU);

        continue;

      }

      unsigned SUDepth = SU->getDepth();

      unsigned CurrDepth = (*PrioritySUs.begin())->getDepth();

      if (SUDepth > CurrDepth)

        continue;


      if (SUDepth == CurrDepth) {

        PrioritySUs.insert(SU);

        continue;

      }


      // SU is lower depth and should be prioritized.

      PrioritySUs.clear();

      PrioritySUs.insert(SU);

    }

  }

}


HardwareUnitInfo *


CandidateHeuristics::getHWUIFromFlavor(InstructionFlavor Flavor) {

  for (auto &HWUICand : HWUInfo) {

    if (HWUICand.getType() == Flavor) {

      return &HWUICand;

    }

  }

  return nullptr;

}


unsigned CandidateHeuristics::getHWUICyclesForInst(SUnit *SU) {

  assert(SchedModel && SchedModel->hasInstrSchedModel());

  unsigned ReleaseAtCycle = 0;

  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);

  for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC),

                                     PE = SchedModel->getWriteProcResEnd(SC);

       PI != PE; ++PI) {

    ReleaseAtCycle = std::max(ReleaseAtCycle, (unsigned)PI->ReleaseAtCycle);

  }

  return ReleaseAtCycle;

}


void CandidateHeuristics::updateForScheduling(SUnit *SU) {

  HardwareUnitInfo *HWUI =

      getHWUIFromFlavor(classifyFlavor(*SU->getInstr(), *SII));

  assert(HWUI);

  HWUI->markScheduled(SU, getHWUICyclesForInst(SU));

}


void CandidateHeuristics::initialize(ScheduleDAGMI *SchedDAG,

                                     const TargetSchedModel *TargetSchedModel,

                                     const TargetRegisterInfo *TRI) {

  DAG = SchedDAG;

  SchedModel = TargetSchedModel;

  assert(SchedModel && SchedModel->hasInstrSchedModel());


  SRI = static_cast<const SIRegisterInfo *>(TRI);

  SII = static_cast<const SIInstrInfo *>(DAG->TII);


  HWUInfo.resize((int)InstructionFlavor::NUM_FLAVORS);


  for (unsigned I = 0; I < HWUInfo.size(); I++) {

    HWUInfo[I].reset();

    HWUInfo[I].setType(I);

  }


  HWUInfo[(int)InstructionFlavor::WMMA].setProducesCoexecWindow(true);

  HWUInfo[(int)InstructionFlavor::MultiCycleVALU].setProducesCoexecWindow(true);

  HWUInfo[(int)InstructionFlavor::TRANS].setProducesCoexecWindow(true);


  collectHWUIPressure();

}


void CandidateHeuristics::collectHWUIPressure() {

  if (!SchedModel || !SchedModel->hasInstrSchedModel())

    return;


  for (auto &SU : DAG->SUnits) {

    const InstructionFlavor Flavor = classifyFlavor(*SU.getInstr(), *SII);

    HWUInfo[(int)(Flavor)].insert(&SU, getHWUICyclesForInst(&SU));

  }


  LLVM_DEBUG(dumpRegionSummary());

}


void CandidateHeuristics::dumpRegionSummary() {

  MachineBasicBlock *BB = DAG->begin()->getParent();

  dbgs() << "\n=== Region: " << DAG->MF.getName() << " BB" << BB->getNumber()

         << " (" << DAG->SUnits.size() << " SUs) ===\n";


  dbgs() << "\nHWUI Resource Pressure:\n";

  for (auto &HWUI : HWUInfo) {

    if (HWUI.getTotalCycles() == 0)

      continue;


    StringRef Name = getFlavorName(HWUI.getType());

    dbgs() << "  " << Name << ": " << HWUI.getTotalCycles() << " cycles, "

           << HWUI.size() << " instrs\n";

  }

  dbgs() << "\n";

}


void CandidateHeuristics::sortHWUIResources() {

  // Highest priority should be first.

  llvm::sort(HWUInfo, [](HardwareUnitInfo &A, HardwareUnitInfo &B) {

    // Prefer CoexecWindow producers

    if (A.producesCoexecWindow() != B.producesCoexecWindow())

      return A.producesCoexecWindow();


    // Prefer more demanded resources

    if (A.getTotalCycles() != B.getTotalCycles())

      return A.getTotalCycles() > B.getTotalCycles();


    // In ties -- prefer the resource with more instructions

    if (A.size() != B.size())

      return A.size() < B.size();


    // Default to Flavor order

    return (unsigned)A.getType() < (unsigned)B.getType();

  });

}


bool CandidateHeuristics::tryCriticalResourceDependency(

    GenericSchedulerBase::SchedCandidate &TryCand,

    GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const {


  auto HasPrioritySU = [this, &Cand, &TryCand](unsigned ResourceIdx) {

    const HardwareUnitInfo &HWUI = HWUInfo[ResourceIdx];


    auto CandFlavor = classifyFlavor(*Cand.SU->getInstr(), *SII);

    auto TryCandFlavor = classifyFlavor(*TryCand.SU->getInstr(), *SII);

    bool LookDeep = (CandFlavor == InstructionFlavor::DS ||

                     TryCandFlavor == InstructionFlavor::DS) &&

                    HWUI.getType() == InstructionFlavor::WMMA;

    auto *TargetSU = HWUI.getNextTargetSU(LookDeep);


    // If we do not have a TargetSU for this resource, then it is not critical.

    if (!TargetSU)

      return false;


    return true;

  };


  auto TryEnablesResource = [&Cand, &TryCand, this](unsigned ResourceIdx) {

    const HardwareUnitInfo &HWUI = HWUInfo[ResourceIdx];

    auto CandFlavor = classifyFlavor(*Cand.SU->getInstr(), *SII);


    // We want to ensure our DS order matches WMMA order.

    bool LookDeep = CandFlavor == InstructionFlavor::DS &&

                    HWUI.getType() == InstructionFlavor::WMMA;

    auto *TargetSU = HWUI.getNextTargetSU(LookDeep);


    bool CandEnables =

        TargetSU != Cand.SU && DAG->IsReachable(TargetSU, Cand.SU);

    bool TryCandEnables =

        TargetSU != TryCand.SU && DAG->IsReachable(TargetSU, TryCand.SU);


    if (!CandEnables && !TryCandEnables)

      return false;


    if (CandEnables && !TryCandEnables) {

      if (Cand.Reason > GenericSchedulerBase::RegCritical)

        Cand.Reason = GenericSchedulerBase::RegCritical;


      return true;

    }


    if (!CandEnables && TryCandEnables) {

      TryCand.Reason = GenericSchedulerBase::RegCritical;

      return true;

    }


    // Both enable, prefer the critical path.

    unsigned CandHeight = Cand.SU->getHeight();

    unsigned TryCandHeight = TryCand.SU->getHeight();


    if (CandHeight > TryCandHeight) {

      if (Cand.Reason > GenericSchedulerBase::RegCritical)

        Cand.Reason = GenericSchedulerBase::RegCritical;


      return true;

    }


    if (CandHeight < TryCandHeight) {

      TryCand.Reason = GenericSchedulerBase::RegCritical;

      return true;

    }


    // Same critical path, just prefer original candidate.

    if (Cand.Reason > GenericSchedulerBase::RegCritical)

      Cand.Reason = GenericSchedulerBase::RegCritical;


    return true;

  };


  for (unsigned I = 0; I < HWUInfo.size(); I++) {

    // If we have encountered a resource that is not critical, then neither

    // candidate enables a critical resource

    if (!HasPrioritySU(I))

      continue;


    bool Enabled = TryEnablesResource(I);

    // If neither has enabled the resource, continue to the next resource

    if (Enabled)

      return true;

  }

  return false;

}


bool CandidateHeuristics::tryCriticalResource(

    GenericSchedulerBase::SchedCandidate &TryCand,

    GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const {

  for (unsigned I = 0; I < HWUInfo.size(); I++) {

    const HardwareUnitInfo &HWUI = HWUInfo[I];


    bool CandUsesCrit = HWUI.contains(Cand.SU);

    bool TryCandUsesCrit = HWUI.contains(TryCand.SU);


    if (!CandUsesCrit && !TryCandUsesCrit)

      continue;


    if (CandUsesCrit != TryCandUsesCrit) {

      if (CandUsesCrit) {

        if (Cand.Reason > GenericSchedulerBase::RegCritical)

          Cand.Reason = GenericSchedulerBase::RegCritical;

        return true;

      }

      TryCand.Reason = GenericSchedulerBase::RegCritical;

      return true;

    }


    // Otherwise, both use the critical resource

    // For longer latency InstructionFlavors, we should prioritize first by

    // their enablement of critical resources

    if (HWUI.getType() == InstructionFlavor::DS) {

      if (tryCriticalResourceDependency(TryCand, Cand, Zone))

        return true;

    }


    // Prioritize based on HWUI priorities.

    SUnit *Match = HWUI.getHigherPriority(Cand.SU, TryCand.SU);

    if (Match) {

      if (Match == Cand.SU) {

        if (Cand.Reason > GenericSchedulerBase::RegCritical)

          Cand.Reason = GenericSchedulerBase::RegCritical;

        return true;

      }

      TryCand.Reason = GenericSchedulerBase::RegCritical;

      return true;

    }

  }


  return false;

}


AMDGPUCoExecSchedStrategy::AMDGPUCoExecSchedStrategy(

    const MachineSchedContext *C)

    : GCNSchedStrategy(C) {

  SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule);

  SchedStages.push_back(GCNSchedStageID::RewriteMFMAForm);

  SchedStages.push_back(GCNSchedStageID::PreRARematerialize);

  // Use more accurate GCN pressure trackers.

  UseGCNTrackers = true;

}


void AMDGPUCoExecSchedStrategy::initPolicy(MachineBasicBlock::iterator Begin,

                                           MachineBasicBlock::iterator End,

                                           unsigned NumRegionInstrs) {

  GCNSchedStrategy::initPolicy(Begin, End, NumRegionInstrs);

  assert((PreRADirection == MISched::Unspecified ||

          PreRADirection == MISched::TopDown) &&

         "coexec scheduler only supports top-down scheduling");

  RegionPolicy.OnlyTopDown = true;

  RegionPolicy.OnlyBottomUp = false;

  RegionPolicy.ShouldTrackLaneMasks = true;

}


void AMDGPUCoExecSchedStrategy::initialize(ScheduleDAGMI *DAG) {

  // Coexecution scheduling strategy is only done top-down to support new

  // resource balancing heuristics.

  RegionPolicy.OnlyTopDown = true;

  RegionPolicy.OnlyBottomUp = false;


  GCNSchedStrategy::initialize(DAG);

  Heurs.initialize(DAG, SchedModel, TRI);

}


void AMDGPUCoExecSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {

  Heurs.updateForScheduling(SU);

  GCNSchedStrategy::schedNode(SU, IsTopNode);

}


SUnit *AMDGPUCoExecSchedStrategy::pickNode(bool &IsTopNode) {

  assert(RegionPolicy.OnlyTopDown && !RegionPolicy.OnlyBottomUp &&

         "coexec scheduler only supports top-down scheduling");


  if (DAG->top() == DAG->bottom()) {

    assert(Top.Available.empty() && Top.Pending.empty() &&

           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");

    return nullptr;

  }


  bool PickedPending = false;

  SUnit *SU = nullptr;

#ifndef NDEBUG

  SchedCandidate *PickedCand = nullptr;

#endif

  do {

    PickedPending = false;

    SU = pickOnlyChoice(Top);

    if (!SU) {

      CandPolicy NoPolicy;

      TopCand.reset(NoPolicy);

      pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,

                        PickedPending, /*IsBottomUp=*/false);

      assert(TopCand.Reason != NoCand && "failed to find a candidate");

      SU = TopCand.SU;

#ifndef NDEBUG

      PickedCand = &TopCand;

#endif

    }

    IsTopNode = true;

  } while (SU->isScheduled);


  LLVM_DEBUG(if (PickedCand) dumpPickSummary(SU, IsTopNode, *PickedCand));


  if (PickedPending) {

    unsigned ReadyCycle = SU->TopReadyCycle;

    unsigned CurrentCycle = Top.getCurrCycle();

    if (ReadyCycle > CurrentCycle)

      Top.bumpCycle(ReadyCycle);


    // checkHazard() does not expose the exact cycle where the hazard clears.

    while (Top.checkHazard(SU))

      Top.bumpCycle(Top.getCurrCycle() + 1);


    Top.releasePending();

  }


  if (SU->isTopReady())

    Top.removeReady(SU);

  if (SU->isBottomReady())

    Bot.removeReady(SU);


  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "

                    << *SU->getInstr());


  assert(IsTopNode && "coexec scheduler must only schedule from top boundary");

  return SU;

}


void AMDGPUCoExecSchedStrategy::pickNodeFromQueue(

    SchedBoundary &Zone, const CandPolicy &ZonePolicy,

    const RegPressureTracker &RPTracker, SchedCandidate &Cand,

    bool &PickedPending, bool IsBottomUp) {

  assert(Zone.isTop() && "coexec scheduler only supports top boundary");

  assert(!IsBottomUp && "coexec scheduler only supports top-down scheduling");


  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);

  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();

  unsigned SGPRPressure = 0;

  unsigned VGPRPressure = 0;

  PickedPending = false;

  if (DAG->isTrackingPressure()) {

    if (!useGCNTrackers()) {

      SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];

      VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];

    } else {

      SGPRPressure = DownwardTracker.getPressure().getSGPRNum();

      VGPRPressure = DownwardTracker.getPressure().getArchVGPRNum();

    }

  }


  auto EvaluateQueue = [&](ReadyQueue &Q, bool FromPending) {

    for (SUnit *SU : Q) {

      SchedCandidate TryCand(ZonePolicy);

      initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure,

                    VGPRPressure, IsBottomUp);

      SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;

      tryCandidateCoexec(Cand, TryCand, ZoneArg);

      if (TryCand.Reason != NoCand) {

        if (TryCand.ResDelta == SchedResourceDelta())

          TryCand.initResourceDelta(Zone.DAG, SchedModel);

        LLVM_DEBUG(printCandidateDecision(Cand, TryCand));

        PickedPending = FromPending;

        Cand.setBest(TryCand);

      } else {

        LLVM_DEBUG(printCandidateDecision(TryCand, Cand));

      }

    }

  };


  LLVM_DEBUG(dbgs() << "Available Q:\n");

  EvaluateQueue(Zone.Available, /*FromPending=*/false);


  LLVM_DEBUG(dbgs() << "Pending Q:\n");

  EvaluateQueue(Zone.Pending, /*FromPending=*/true);

}


#ifndef NDEBUG


void AMDGPUCoExecSchedStrategy::dumpPickSummary(SUnit *SU, bool IsTopNode,

                                                SchedCandidate &Cand) {

  const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(DAG->TII);

  unsigned Cycle = IsTopNode ? Top.getCurrCycle() : Bot.getCurrCycle();


  dbgs() << "=== Pick @ Cycle " << Cycle << " ===\n";


  const InstructionFlavor Flavor = classifyFlavor(*SU->getInstr(), *SII);

  dbgs() << "Picked: SU(" << SU->NodeNum << ") ";

  SU->getInstr()->print(dbgs(), /*IsStandalone=*/true, /*SkipOpers=*/false,

                        /*SkipDebugLoc=*/true);

  dbgs() << " [" << getFlavorName(Flavor) << "]\n";


  dbgs() << "  Reason: ";

  if (LastAMDGPUReason != AMDGPUSchedReason::None)

    dbgs() << getReasonName(LastAMDGPUReason);

  else if (Cand.Reason != NoCand)

    dbgs() << GenericSchedulerBase::getReasonStr(Cand.Reason);

  else

    dbgs() << "Unknown";

  dbgs() << "\n\n";


  LastAMDGPUReason = AMDGPUSchedReason::None;

}


#endif


bool AMDGPUCoExecSchedStrategy::tryCandidateCoexec(SchedCandidate &Cand,

                                                   SchedCandidate &TryCand,

                                                   SchedBoundary *Zone) {

  // Initialize the candidate if needed.

  if (!Cand.isValid()) {

    TryCand.Reason = FirstValid;

    return true;

  }


  // Bias PhysReg Defs and copies to their uses and defined respectively.

  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),

                 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))

    return TryCand.Reason != NoCand;


  // Avoid exceeding the target's limit.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,

                  RegExcess, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  // We only compare a subset of features when comparing nodes between

  // Top and Bottom boundary. Some properties are simply incomparable, in many

  // other instances we should only override the other boundary if something

  // is a clear good pick on one boundary. Skip heuristics that are more

  // "tie-breaking" in nature.

  bool SameBoundary = Zone != nullptr;

  if (SameBoundary) {

    // Compare candidates by the stall they would introduce if

    // scheduled in the current cycle.

    if (tryEffectiveStall(Cand, TryCand, *Zone))

      return TryCand.Reason != NoCand;


    Heurs.sortHWUIResources();

    if (Heurs.tryCriticalResource(TryCand, Cand, Zone)) {

      LastAMDGPUReason = AMDGPUSchedReason::CritResourceBalance;

      return TryCand.Reason != NoCand;

    }


    if (Heurs.tryCriticalResourceDependency(TryCand, Cand, Zone)) {

      LastAMDGPUReason = AMDGPUSchedReason::CritResourceDep;

      return TryCand.Reason != NoCand;

    }

  }


  // Keep clustered nodes together to encourage downstream peephole

  // optimizations which may reduce resource requirements.

  //

  // This is a best effort to set things up for a post-RA pass. Optimizations

  // like generating loads of multiple registers should ideally be done within

  // the scheduler pass by combining the loads during DAG postprocessing.

  unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;

  unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;

  bool CandIsClusterSucc =

      isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);

  bool TryCandIsClusterSucc =

      isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);


  if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,

                 Cluster))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Weak edges are for clustering and other constraints.

    if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),

                getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak))

      return TryCand.Reason != NoCand;

  }


  // Avoid increasing the max pressure of the entire region.

  if (DAG->isTrackingPressure() &&

      tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand,

                  Cand, RegMax, TRI, DAG->MF))

    return TryCand.Reason != NoCand;


  if (SameBoundary) {

    // Avoid serializing long latency dependence chains.

    // For acyclic path limited loops, latency was already checked above.

    if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&

        !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))

      return TryCand.Reason != NoCand;


    // Fall through to original instruction order.

    if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||

        (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {

      TryCand.Reason = NodeOrder;

      return true;

    }

  }


  return false;

}


bool AMDGPUCoExecSchedStrategy::tryEffectiveStall(SchedCandidate &Cand,

                                                  SchedCandidate &TryCand,

                                                  SchedBoundary &Zone) const {

  // Treat structural and latency stalls as a single scheduling cost for the

  // current cycle.

  struct StallCosts {

    unsigned Ready = 0;

    unsigned Structural = 0;

    unsigned Latency = 0;

    unsigned Effective = 0;

  };


  unsigned CurrCycle = Zone.getCurrCycle();

  auto GetStallCosts = [&](SUnit *SU) {

    unsigned ReadyCycle = Zone.isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;

    StallCosts Costs;

    Costs.Ready = ReadyCycle > CurrCycle ? ReadyCycle - CurrCycle : 0;

    Costs.Structural = getStructuralStallCycles(Zone, SU);

    Costs.Latency = Zone.getLatencyStallCycles(SU);

    Costs.Effective = std::max({Costs.Ready, Costs.Structural, Costs.Latency});

    return Costs;

  };


  StallCosts TryCosts = GetStallCosts(TryCand.SU);

  StallCosts CandCosts = GetStallCosts(Cand.SU);


  LLVM_DEBUG(if (TryCosts.Effective || CandCosts.Effective) {

    dbgs() << "Effective stalls: try=" << TryCosts.Effective

           << " (ready=" << TryCosts.Ready << ", struct=" << TryCosts.Structural

           << ", lat=" << TryCosts.Latency << ") cand=" << CandCosts.Effective

           << " (ready=" << CandCosts.Ready

           << ", struct=" << CandCosts.Structural

           << ", lat=" << CandCosts.Latency << ")\n";

  });


  return tryLess(TryCosts.Effective, CandCosts.Effective, TryCand, Cand, Stall);

}


ScheduleDAGInstrs *


llvm::createGCNCoExecMachineScheduler(MachineSchedContext *C) {

  LLVM_DEBUG(dbgs() << "AMDGPU coexec preRA scheduler selected for "

                    << C->MF->getName() << '\n');

  return new GCNScheduleDAGMILive(

      C, std::make_unique<AMDGPUCoExecSchedStrategy>(C));

}


ScheduleDAGInstrs *


llvm::createGCNNoopPostMachineScheduler(MachineSchedContext *C) {

  LLVM_DEBUG(dbgs() << "AMDGPU nop postRA scheduler selected for "

                    << C->MF->getName() << '\n');

  return new GCNNoopPostScheduleDAG(C);

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

pickOnlyChoice
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
Definition AMDGPUCoExecSchedStrategy.cpp:36

AMDGPUCoExecSchedStrategy.h
Coexecution-focused scheduling strategy for AMDGPU.

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

I
#define I(x, y, z)
Definition MD5.cpp:57

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2127

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

llvm::AMDGPUCoExecSchedStrategy::tryEffectiveStall
bool tryEffectiveStall(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone) const
Definition AMDGPUCoExecSchedStrategy.cpp:672

llvm::AMDGPUCoExecSchedStrategy::LastAMDGPUReason
AMDGPU::AMDGPUSchedReason LastAMDGPUReason
Definition AMDGPUCoExecSchedStrategy.h:304

llvm::AMDGPUCoExecSchedStrategy::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
Definition AMDGPUCoExecSchedStrategy.cpp:419

llvm::AMDGPUCoExecSchedStrategy::Heurs
CandidateHeuristics Heurs
Definition AMDGPUCoExecSchedStrategy.h:305

llvm::AMDGPUCoExecSchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
Definition AMDGPUCoExecSchedStrategy.cpp:446

llvm::AMDGPUCoExecSchedStrategy::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &PickedPending, bool IsBottomUp)
Definition AMDGPUCoExecSchedStrategy.cpp:505

llvm::AMDGPUCoExecSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition AMDGPUCoExecSchedStrategy.cpp:431

llvm::AMDGPUCoExecSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition AMDGPUCoExecSchedStrategy.cpp:441

llvm::AMDGPUCoExecSchedStrategy::AMDGPUCoExecSchedStrategy
AMDGPUCoExecSchedStrategy(const MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:409

llvm::AMDGPUCoExecSchedStrategy::dumpPickSummary
void dumpPickSummary(SUnit *SU, bool IsTopNode, SchedCandidate &Cand)
Definition AMDGPUCoExecSchedStrategy.cpp:554

llvm::AMDGPUCoExecSchedStrategy::tryCandidateCoexec
bool tryCandidateCoexec(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Definition AMDGPUCoExecSchedStrategy.cpp:580

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::CandidateHeuristics::updateForScheduling
void updateForScheduling(SUnit *SU)
Update the state to reflect that SU is going to be scheduled.
Definition AMDGPUCoExecSchedStrategy.cpp:196

llvm::CandidateHeuristics::getHWUIFromFlavor
HardwareUnitInfo * getHWUIFromFlavor(AMDGPU::InstructionFlavor Flavor)
Given a Flavor , find the corresponding HardwareUnit.
Definition AMDGPUCoExecSchedStrategy.cpp:175

llvm::CandidateHeuristics::sortHWUIResources
void sortHWUIResources()
Sort the HWUInfo vector.
Definition AMDGPUCoExecSchedStrategy.cpp:256

llvm::CandidateHeuristics::tryCriticalResource
bool tryCriticalResource(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const
Check for critical resource consumption.
Definition AMDGPUCoExecSchedStrategy.cpp:363

llvm::CandidateHeuristics::tryCriticalResourceDependency
bool tryCriticalResourceDependency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const
Check for dependencies of instructions that use prioritized HardwareUnits.
Definition AMDGPUCoExecSchedStrategy.cpp:276

llvm::CandidateHeuristics::SII
const SIInstrInfo * SII
Definition AMDGPUCoExecSchedStrategy.h:249

llvm::CandidateHeuristics::HWUInfo
SmallVector< HardwareUnitInfo, 8 > HWUInfo
Definition AMDGPUCoExecSchedStrategy.h:252

llvm::CandidateHeuristics::dumpRegionSummary
void dumpRegionSummary()
Definition AMDGPUCoExecSchedStrategy.cpp:239

llvm::CandidateHeuristics::SRI
const SIRegisterInfo * SRI
Definition AMDGPUCoExecSchedStrategy.h:250

llvm::CandidateHeuristics::DAG
ScheduleDAGMI * DAG
Definition AMDGPUCoExecSchedStrategy.h:248

llvm::CandidateHeuristics::SchedModel
const TargetSchedModel * SchedModel
Definition AMDGPUCoExecSchedStrategy.h:251

llvm::CandidateHeuristics::collectHWUIPressure
void collectHWUIPressure()
Walk over the region and collect total usage per HardwareUnit.
Definition AMDGPUCoExecSchedStrategy.cpp:227

llvm::CandidateHeuristics::initialize
void initialize(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel, const TargetRegisterInfo *TRI)
Definition AMDGPUCoExecSchedStrategy.cpp:203

llvm::CandidateHeuristics::getHWUICyclesForInst
unsigned getHWUICyclesForInst(SUnit *SU)
Compute the blocking cycles for the appropriate HardwareUnit given an SU.
Definition AMDGPUCoExecSchedStrategy.cpp:184

llvm::GCNSchedStrategy::DownwardTracker
GCNDownwardRPTracker DownwardTracker
Definition GCNSchedStrategy.h:106

llvm::GCNSchedStrategy::useGCNTrackers
bool useGCNTrackers() const
Definition GCNSchedStrategy.h:162

llvm::GCNSchedStrategy::GCNSchedStrategy
GCNSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:108

llvm::GCNSchedStrategy::SchedStages
SmallVector< GCNSchedStageID, 4 > SchedStages
Definition GCNSchedStrategy.h:100

llvm::GCNSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition GCNSchedStrategy.cpp:664

llvm::GCNSchedStrategy::Pressure
std::vector< unsigned > Pressure
Definition GCNSchedStrategy.h:87

llvm::GCNSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition GCNSchedStrategy.cpp:115

llvm::GCNSchedStrategy::printCandidateDecision
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
Definition GCNSchedStrategy.cpp:413

llvm::GCNSchedStrategy::getStructuralStallCycles
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
Definition GCNSchedStrategy.cpp:235

llvm::GCNSchedStrategy::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
Definition GCNSchedStrategy.cpp:272

llvm::GCNSchedStrategy::UseGCNTrackers
bool UseGCNTrackers
Definition GCNSchedStrategy.h:111

llvm::GCNScheduleDAGMILive
Definition GCNSchedStrategy.h:260

llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition MachineScheduler.h:1220

llvm::GenericSchedulerBase::NumRegionInstrs
unsigned NumRegionInstrs
Definition MachineScheduler.h:1216

llvm::GenericSchedulerBase::RegionPolicy
MachineSchedPolicy RegionPolicy
Definition MachineScheduler.h:1218

llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition MachineScheduler.h:1212

llvm::GenericSchedulerBase::getReasonStr
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
Definition MachineScheduler.cpp:3336

llvm::GenericSchedulerBase::RegExcess
@ RegExcess
Definition MachineScheduler.h:1105

llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition MachineScheduler.h:1110

llvm::GenericSchedulerBase::FirstValid
@ FirstValid
Definition MachineScheduler.h:1118

llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition MachineScheduler.h:1108

llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition MachineScheduler.h:1106

llvm::GenericSchedulerBase::PhysReg
@ PhysReg
Definition MachineScheduler.h:1104

llvm::GenericSchedulerBase::Stall
@ Stall
Definition MachineScheduler.h:1107

llvm::GenericSchedulerBase::Weak
@ Weak
Definition MachineScheduler.h:1109

llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition MachineScheduler.h:1213

llvm::GenericScheduler::Top
SchedBoundary Top
Definition MachineScheduler.h:1315

llvm::GenericScheduler::TopCand
SchedCandidate TopCand
Candidate last picked from Top boundary.
Definition MachineScheduler.h:1322

llvm::GenericScheduler::TopClusterID
unsigned TopClusterID
Definition MachineScheduler.h:1318

llvm::GenericScheduler::Bot
SchedBoundary Bot
Definition MachineScheduler.h:1316

llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition MachineScheduler.h:1312

llvm::GenericScheduler::BotClusterID
unsigned BotClusterID
Definition MachineScheduler.h:1319

llvm::HardwareUnitInfo
HardwareUnitInfo is a wrapper class which maps to some real hardware resource.
Definition AMDGPUCoExecSchedStrategy.h:154

llvm::HardwareUnitInfo::markScheduled
void markScheduled(SUnit *SU, unsigned BlockingCycles)
Update the state for SU being scheduled by removing it from the AllSus and reducing its BlockingCycle...
Definition AMDGPUCoExecSchedStrategy.cpp:137

llvm::HardwareUnitInfo::contains
bool contains(SUnit *SU) const
Definition AMDGPUCoExecSchedStrategy.h:194

llvm::HardwareUnitInfo::getNextTargetSU
SUnit * getNextTargetSU(bool LookDeep=false) const
Definition AMDGPUCoExecSchedStrategy.cpp:82

llvm::HardwareUnitInfo::insert
void insert(SUnit *SU, unsigned BlockingCycles)
Insert the SU into the AllSUs and account its BlockingCycles into the TotalCycles.
Definition AMDGPUCoExecSchedStrategy.cpp:108

llvm::HardwareUnitInfo::getType
AMDGPU::InstructionFlavor getType() const
Definition AMDGPUCoExecSchedStrategy.h:188

llvm::HardwareUnitInfo::getHigherPriority
SUnit * getHigherPriority(SUnit *SU, SUnit *Other) const
Definition AMDGPUCoExecSchedStrategy.h:201

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition MachineBasicBlock.h:1274

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:348

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::print
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition MachineInstr.cpp:1803

llvm::MachineSchedStrategy::initPolicy
virtual void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs)
Optionally override the per-region scheduling policy.
Definition MachineScheduler.h:255

llvm::ReadyQueue
Helpers for implementing custom MachineSchedStrategy classes.
Definition MachineScheduler.h:571

llvm::ReadyQueue::empty
bool empty() const
Definition MachineScheduler.h:586

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition RegisterPressure.h:361

llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition RegisterPressure.h:467

llvm::SIInstrInfo
Definition SIInstrInfo.h:101

llvm::SIInstrInfo::isDS
static bool isDS(const MachineInstr &MI)
Definition SIInstrInfo.h:623

llvm::SIInstrInfo::isFLATScratch
static bool isFLATScratch(const MachineInstr &MI)
Definition SIInstrInfo.h:707

llvm::SIInstrInfo::isSALU
static bool isSALU(const MachineInstr &MI)
Definition SIInstrInfo.h:472

llvm::SIInstrInfo::isMFMAorWMMA
static bool isMFMAorWMMA(const MachineInstr &MI)
Definition SIInstrInfo.h:1001

llvm::SIInstrInfo::isFLATGlobal
static bool isFLATGlobal(const MachineInstr &MI)
Definition SIInstrInfo.h:699

llvm::SIInstrInfo::isTRANS
static bool isTRANS(const MachineInstr &MI)
Definition SIInstrInfo.h:929

llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition SIInstrInfo.h:683

llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition SIInstrInfo.h:480

llvm::SIInstrInfo::isLDSDMA
static bool isLDSDMA(const MachineInstr &MI)
Definition SIInstrInfo.h:631

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition ScheduleDAG.h:285

llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition ScheduleDAG.h:277

llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition ScheduleDAG.h:433

llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition ScheduleDAG.h:425

llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition ScheduleDAG.h:305

llvm::SUnit::ParentClusterIdx
unsigned ParentClusterIdx
The parent cluster id.
Definition ScheduleDAG.h:288

llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition ScheduleDAG.h:476

llvm::SUnit::isTopReady
bool isTopReady() const
Definition ScheduleDAG.h:473

llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition ScheduleDAG.h:399

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition MachineScheduler.h:863

llvm::SchedBoundary::getLatencyStallCycles
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition MachineScheduler.cpp:2584

llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition MachineScheduler.h:872

llvm::SchedBoundary::isTop
bool isTop() const
Definition MachineScheduler.h:987

llvm::SchedBoundary::pickOnlyChoice
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
Definition MachineScheduler.cpp:3151

llvm::SchedBoundary::Available
ReadyQueue Available
Definition MachineScheduler.h:876

llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition MachineScheduler.h:992

llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition MachineScheduler.h:877

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition ScheduleDAGInstrs.h:118

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition MachineScheduler.h:314

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:242

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::TargetSchedModel::ProcResIter
const MCWriteProcResEntry * ProcResIter
Definition TargetSchedule.h:142

unsigned

llvm::AMDGPU
Definition AMDGPUMetadataVerifier.h:34

llvm::AMDGPU::InstructionFlavor
InstructionFlavor
Definition AMDGPUCoExecSchedStrategy.h:28

llvm::AMDGPU::InstructionFlavor::NUM_FLAVORS
@ NUM_FLAVORS
Definition AMDGPUCoExecSchedStrategy.h:39

llvm::AMDGPU::InstructionFlavor::WMMA
@ WMMA
Definition AMDGPUCoExecSchedStrategy.h:29

llvm::AMDGPU::InstructionFlavor::DMA
@ DMA
Definition AMDGPUCoExecSchedStrategy.h:36

llvm::AMDGPU::InstructionFlavor::DS
@ DS
Definition AMDGPUCoExecSchedStrategy.h:34

llvm::AMDGPU::InstructionFlavor::VMEM
@ VMEM
Definition AMDGPUCoExecSchedStrategy.h:33

llvm::AMDGPU::InstructionFlavor::Other
@ Other
Definition AMDGPUCoExecSchedStrategy.h:38

llvm::AMDGPU::InstructionFlavor::SALU
@ SALU
Definition AMDGPUCoExecSchedStrategy.h:35

llvm::AMDGPU::InstructionFlavor::SingleCycleVALU
@ SingleCycleVALU
Definition AMDGPUCoExecSchedStrategy.h:30

llvm::AMDGPU::InstructionFlavor::TRANS
@ TRANS
Definition AMDGPUCoExecSchedStrategy.h:31

llvm::AMDGPU::InstructionFlavor::MultiCycleVALU
@ MultiCycleVALU
Definition AMDGPUCoExecSchedStrategy.h:32

llvm::AMDGPU::InstructionFlavor::Fence
@ Fence
Definition AMDGPUCoExecSchedStrategy.h:37

llvm::AMDGPU::AMDGPUSchedReason::CritResourceDep
@ CritResourceDep
Definition AMDGPUCoExecSchedStrategy.h:127

llvm::AMDGPU::AMDGPUSchedReason::None
@ None
Definition AMDGPUCoExecSchedStrategy.h:125

llvm::AMDGPU::AMDGPUSchedReason::CritResourceBalance
@ CritResourceBalance
Definition AMDGPUCoExecSchedStrategy.h:126

llvm::AMDGPU::classifyFlavor
InstructionFlavor classifyFlavor(const MachineInstr &MI, const SIInstrInfo &SII)
Definition AMDGPUCoExecSchedStrategy.cpp:45

llvm::AMDGPU::getReasonName
StringRef getReasonName(AMDGPUSchedReason R)
Definition AMDGPUCoExecSchedStrategy.h:131

llvm::AMDGPU::getFlavorName
StringRef getFlavorName(InstructionFlavor F)
Definition AMDGPUCoExecSchedStrategy.h:42

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::MISched::Unspecified
@ Unspecified
Definition MachineScheduler.h:112

llvm::MISched::TopDown
@ TopDown
Definition MachineScheduler.h:113

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::biasPhysReg
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop, bool BiasPRegsExtra=false)
Minimize physical register live ranges.
Definition MachineScheduler.cpp:3840

llvm::getWeakLeft
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
Definition MachineScheduler.cpp:3829

llvm::Latency
@ Latency
Definition SIMachineScheduler.h:34

llvm::NodeOrder
@ NodeOrder
Definition SIMachineScheduler.h:37

llvm::NoCand
@ NoCand
Definition SIMachineScheduler.h:32

llvm::Cycle
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26

llvm::tryPressure
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition MachineScheduler.cpp:3791

llvm::GCNSchedStageID::ILPInitialSchedule
@ ILPInitialSchedule
Definition GCNSchedStrategy.h:37

llvm::GCNSchedStageID::PreRARematerialize
@ PreRARematerialize
Definition GCNSchedStrategy.h:36

llvm::GCNSchedStageID::RewriteMFMAForm
@ RewriteMFMAForm
Definition GCNSchedStrategy.h:33

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::createGCNNoopPostMachineScheduler
ScheduleDAGInstrs * createGCNNoopPostMachineScheduler(MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:719

llvm::tryLatency
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition MachineScheduler.cpp:3475

llvm::createGCNCoExecMachineScheduler
ScheduleDAGInstrs * createGCNCoExecMachineScheduler(MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:711

llvm::isTheSameCluster
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
Definition ScheduleDAG.h:244

llvm::tryGreater
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition MachineScheduler.cpp:3459

llvm::tryLess
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition MachineScheduler.cpp:3443

llvm::Enabled
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:31

llvm::PreRADirection
LLVM_ABI cl::opt< MISched::Direction > PreRADirection

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition MachineScheduler.h:1126

llvm::GenericSchedulerBase::CandPolicy::ReduceLatency
bool ReduceLatency
Definition MachineScheduler.h:1127

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition MachineScheduler.h:1164

llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition MachineScheduler.h:1197

llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition MachineScheduler.h:1168

llvm::GenericSchedulerBase::SchedCandidate::initResourceDelta
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition MachineScheduler.cpp:3242

llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition MachineScheduler.h:1177

llvm::GenericSchedulerBase::SchedCandidate::AtTop
bool AtTop
Definition MachineScheduler.h:1174

llvm::GenericSchedulerBase::SchedCandidate::ResDelta
SchedResourceDelta ResDelta
Definition MachineScheduler.h:1180

llvm::GenericSchedulerBase::SchedCandidate::isValid
bool isValid() const
Definition MachineScheduler.h:1194

llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition MachineScheduler.h:1171

llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition MachineScheduler.h:1165

llvm::GenericSchedulerBase::SchedResourceDelta
Status of an instruction's critical resource consumption.
Definition MachineScheduler.h:1144

llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition MachineScheduler.h:145

llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition RegisterPressure.h:246

llvm::RegPressureDelta::Excess
PressureChange Excess
Definition RegisterPressure.h:244