doxygen/AMDGPUCoExecSchedStrategy_8h_source.html

//===- AMDGPUCoExecSchedStrategy.h - CoExec Scheduling Strategy -*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Coexecution-focused scheduling strategy for AMDGPU.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOEXECSCHEDSTRATEGY_H

#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOEXECSCHEDSTRATEGY_H


#include "GCNSchedStrategy.h"

#include "llvm/CodeGen/MachineScheduler.h"


namespace llvm {


namespace AMDGPU {


//===----------------------------------------------------------------------===//

// Instruction Flavor Classification

//===----------------------------------------------------------------------===//


enum class InstructionFlavor : uint8_t {

  WMMA,            // WMMA/MFMA matrix operations

  SingleCycleVALU, // Single-cycle VALU (not TRANS32, not multi-cycle CVT)

  TRANS,           // Transcendental ops (v_exp, v_log, etc.)

  MultiCycleVALU,  // VALU instructions with repeat rate > 1

  VMEM,            // FLAT/GLOBAL memory operations

  DS,              // LDS/GDS operations

  SALU,            // Scalar ALU

  DMA,             // Tensor DMA operations

  Fence,           // Fences and waits

  Other,           // Everything else

  NUM_FLAVORS

};


inline StringRef getFlavorName(InstructionFlavor F) {

  switch (F) {

  case InstructionFlavor::WMMA:

    return "WMMA";

  case InstructionFlavor::SingleCycleVALU:

    return "VALU(1c)";

  case InstructionFlavor::TRANS:

    return "TRANS";

  case InstructionFlavor::MultiCycleVALU:

    return "VALU(Nc)";

  case InstructionFlavor::VMEM:

    return "VMEM";

  case InstructionFlavor::DS:

    return "DS";

  case InstructionFlavor::SALU:

    return "SALU";

  case InstructionFlavor::DMA:

    return "DMA";

  case InstructionFlavor::Fence:

    return "Fence";

  case InstructionFlavor::Other:

    return "Other";

  case InstructionFlavor::NUM_FLAVORS:

    llvm_unreachable("Unknown InstructionFlavor");

  }

  llvm_unreachable("Unknown InstructionFlavor");

}


inline StringRef getFlavorShortName(InstructionFlavor F) {

  switch (F) {

  case InstructionFlavor::WMMA:

    return "W";

  case InstructionFlavor::SingleCycleVALU:

    return "V";

  case InstructionFlavor::TRANS:

    return "T";

  case InstructionFlavor::MultiCycleVALU:

    return "C";

  case InstructionFlavor::VMEM:

    return "M";

  case InstructionFlavor::DS:

    return "D";

  case InstructionFlavor::SALU:

    return "S";

  case InstructionFlavor::DMA:

    return "X";

  case InstructionFlavor::Fence:

    return "F";

  case InstructionFlavor::Other:

    return "O";

  case InstructionFlavor::NUM_FLAVORS:

    llvm_unreachable("Unknown InstructionFlavor");

  }

  llvm_unreachable("Unknown InstructionFlavor");

}


InstructionFlavor classifyFlavor(const MachineInstr &MI,

                                 const SIInstrInfo &SII);


using FlavorGroup = SmallVector<InstructionFlavor, 4>;


namespace FlavorGroups {


inline FlavorGroup allVALU() {

  return {InstructionFlavor::SingleCycleVALU, InstructionFlavor::TRANS,

          InstructionFlavor::MultiCycleVALU};

}


inline FlavorGroup allMem() {

  return {InstructionFlavor::VMEM, InstructionFlavor::DS,

          InstructionFlavor::DMA};

}


inline FlavorGroup individual(InstructionFlavor F) { return {F}; }


inline FlavorGroup all() {

  FlavorGroup G;

  for (unsigned I = 0;

       I < static_cast<unsigned>(InstructionFlavor::NUM_FLAVORS); ++I)

    G.push_back(static_cast<InstructionFlavor>(I));

  return G;

}


} // namespace FlavorGroups


/// AMDGPU-specific scheduling decision reasons. These provide more granularity

/// than the generic CandReason enum for debugging purposes.


enum class AMDGPUSchedReason : uint8_t {

  None,

  CritResourceBalance, // tryCriticalResource chose based on resource pressure

  CritResourceDep,     // tryCriticalResourceDependency chose based on enabling

  NUM_REASONS

};


inline StringRef getReasonName(AMDGPUSchedReason R) {

  switch (R) {

  case AMDGPUSchedReason::None:

    return "None";

  case AMDGPUSchedReason::CritResourceBalance:

    return "CritResource";

  case AMDGPUSchedReason::CritResourceDep:

    return "CritResourceDep";

  case AMDGPUSchedReason::NUM_REASONS:

    llvm_unreachable("Unknown AMDGPUSchedReason");

  }

  llvm_unreachable("Unknown AMDGPUSchedReason");

}


} // End namespace AMDGPU


//===----------------------------------------------------------------------===//

// Hardware Unit Information

//===----------------------------------------------------------------------===//


/// HardwareUnitInfo is a wrapper class which maps to some real hardware

/// resource. This is used to model hardware resource pressure per region, and

/// guide scheduling heuristics.


class HardwareUnitInfo {

private:

  /// PrioritySUs maintains a list of the SUs we want to prioritize scheduling

  /// for this HardwareUnit. This is used for agreement between

  /// tryCriticalResourceDependency and tryCriticalResource: we schedule the

  /// dependencies for a SU on critical resource, then schedule that same SU on

  /// the critical resource. This agreement results in shorter live ranges and

  /// more regular HardwareUnit access patterns. SUs are prioritized based on

  /// depth for top-down scheduling.

  SmallSetVector<SUnit *, 16> PrioritySUs;

  /// All the SUs in the region that consume this resource.

  SmallSetVector<SUnit *, 16> AllSUs;

  /// The total number of busy cycles for this HardwareUnit for a given region.

  unsigned TotalCycles = 0;

  /// InstructionFlavor mapping.

  AMDGPU::InstructionFlavor Type;

  /// Whether or not instructions on this HardwareUnit may produce a window in

  /// which instructions in other HardwareUnits can coexecute. For example, WMMA

  /// / MFMA instructions may take multiple cycles, which may be overlapped with

  /// instructions on other HardwareUnits.

  bool ProducesCoexecWindow = false;


public:

  HardwareUnitInfo() {}


  unsigned size() { return AllSUs.size(); }


  unsigned getTotalCycles() { return TotalCycles; }


  void setType(unsigned TheType) {

    assert(TheType < (unsigned)AMDGPU::InstructionFlavor::NUM_FLAVORS);

    Type = (AMDGPU::InstructionFlavor)(TheType);

  }


  AMDGPU::InstructionFlavor getType() const { return Type; }


  bool producesCoexecWindow() const { return ProducesCoexecWindow; }


  void setProducesCoexecWindow(bool Val) { ProducesCoexecWindow = Val; }


  bool contains(SUnit *SU) const { return AllSUs.contains(SU); }


  /// \returns the SUnit with higher priority or nullptr if they are the same.

  /// This method looks through the PrioritySUs to determine if one SU is more

  /// prioritized than the other. If neither are in the PrioritySUs list, then

  /// neither have priority over each other.


  SUnit *getHigherPriority(SUnit *SU, SUnit *Other) const {

    for (SUnit *SUOrder : PrioritySUs) {

      if (SUOrder == SU)

        return SU;


      if (SUOrder == Other)

        return Other;

    }

    return nullptr;

  }


  void reset() {

    AllSUs.clear();

    PrioritySUs.clear();

    TotalCycles = 0;

    Type = AMDGPU::InstructionFlavor::Other;

    ProducesCoexecWindow = false;

  }


  /// \returns the next SU in PrioritySUs that is not ready. If \p LookDeep is

  /// set, we will look beyond the PrioritySUs (if all the PrioritySUs are

  /// ready) to AllSUs to attempt to find a target SU. When looking through

  /// AllSUs we sort pick the target SU by minimal depth for top-down

  /// scheduling. getNextTargetSU is useful for determining which SU on this

  /// HardwareUnit we are trying to schedule - this info helps us determine

  /// which dependencies to schedule. LookDeep is useful if the dependencies are

  /// long latency (e.g. memory instructions). If we have many long latency

  /// dependencies, it is beneficial to enable SUs multiple levels ahead.

  SUnit *getNextTargetSU(bool LookDeep = false) const;

  /// Insert the \p SU into AllSUs and account its \p BlockingCycles into

  /// the TotalCycles. This maintains the list of PrioritySUs.

  void insert(SUnit *SU, unsigned BlockingCycles);

  /// Update the state for \p SU being scheduled by removing it from the AllSUs

  /// and reducing its \p BlockingCycles from the TotalCycles. This maintains

  /// the list of PrioritySUs.

  void markScheduled(SUnit *SU, unsigned BlockingCycles);

};


//===----------------------------------------------------------------------===//

// Candidate Heuristics

//===----------------------------------------------------------------------===//


/// CandidateHeuristics contains state and implementations to facilitate making

/// per instruction scheduling decisions; it contains methods used in

/// tryCandidate to decide which instruction to schedule next.


class CandidateHeuristics {

protected:

  ScheduleDAGMI *DAG;

  const SIInstrInfo *SII;

  const SIRegisterInfo *SRI;

  const TargetSchedModel *SchedModel;

  SmallVector<HardwareUnitInfo, 8> HWUInfo;


  /// Walk over the region and collect total usage per HardwareUnit.

  void collectHWUIPressure();


  /// Compute the blocking cycles for the appropriate HardwareUnit given an \p

  /// SU.

  unsigned getHWUICyclesForInst(SUnit *SU);


  /// Given a \p Flavor , find the corresponding HardwareUnit. \returns the

  /// mapped HardwareUnit.

  HardwareUnitInfo *getHWUIFromFlavor(AMDGPU::InstructionFlavor Flavor);


public:

  CandidateHeuristics() = default;


  void initialize(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel,

                  const TargetRegisterInfo *TRI);


  /// Update the state to reflect that \p SU is going to be scheduled.

  void updateForScheduling(SUnit *SU);


  /// Sort the HWUInfo vector. After sorting, the HardwareUnits that are highest

  /// priority are first. Priority is determined by maximizing coexecution and

  /// keeping the critical HardwareUnit busy.

  void sortHWUIResources();


  /// Check for critical resource consumption. Prefer the candidate that uses

  /// the most prioritized HardwareUnit. If both candidates use the same

  /// HarwareUnit, prefer the candidate with higher priority on that

  /// HardwareUnit.

  bool tryCriticalResource(GenericSchedulerBase::SchedCandidate &TryCand,

                           GenericSchedulerBase::SchedCandidate &Cand,

                           SchedBoundary *Zone) const;


  /// Check for dependencies of instructions that use prioritized HardwareUnits.

  /// Prefer the candidate that is a dependency of an instruction that uses the

  /// most prioritized HardwareUnit. If both candidates enable the same

  /// HardwareUnit, prefer the candidate that enables the higher priority

  /// instruction on that HardwareUnit.

  bool

  tryCriticalResourceDependency(GenericSchedulerBase::SchedCandidate &TryCand,

                                GenericSchedulerBase::SchedCandidate &Cand,

                                SchedBoundary *Zone) const;


  void dumpRegionSummary();

};


class AMDGPUCoExecSchedStrategy final : public GCNSchedStrategy {

protected:

  bool tryEffectiveStall(SchedCandidate &Cand, SchedCandidate &TryCand,

                         SchedBoundary &Zone) const;

  AMDGPU::AMDGPUSchedReason LastAMDGPUReason = AMDGPU::AMDGPUSchedReason::None;

  CandidateHeuristics Heurs;


#ifndef NDEBUG

  void dumpPickSummary(SUnit *SU, bool IsTopNode, SchedCandidate &Cand);

#endif


  bool tryCandidateCoexec(SchedCandidate &Cand, SchedCandidate &TryCand,

                          SchedBoundary *Zone);

  void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,

                         const RegPressureTracker &RPTracker,

                         SchedCandidate &Cand, bool &PickedPending,

                         bool IsBottomUp);


public:

  AMDGPUCoExecSchedStrategy(const MachineSchedContext *C);


  void initPolicy(MachineBasicBlock::iterator Begin,

                  MachineBasicBlock::iterator End,

                  unsigned NumRegionInstrs) override;

  void initialize(ScheduleDAGMI *DAG) override;

  SUnit *pickNode(bool &IsTopNode) override;

  void schedNode(SUnit *SU, bool IsTopNode) override;

};


ScheduleDAGInstrs *createGCNCoExecMachineScheduler(MachineSchedContext *C);

ScheduleDAGInstrs *createGCNNoopPostMachineScheduler(MachineSchedContext *C);


} // End namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUCOEXECSCHEDSTRATEGY_H

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

GCNSchedStrategy.h

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

G
#define G(x, y, z)
Definition MD5.cpp:55

MachineScheduler.h

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2127

llvm::AMDGPUCoExecSchedStrategy::tryEffectiveStall
bool tryEffectiveStall(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone) const
Definition AMDGPUCoExecSchedStrategy.cpp:670

llvm::AMDGPUCoExecSchedStrategy::LastAMDGPUReason
AMDGPU::AMDGPUSchedReason LastAMDGPUReason
Definition AMDGPUCoExecSchedStrategy.h:303

llvm::AMDGPUCoExecSchedStrategy::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Optionally override the per-region scheduling policy.
Definition AMDGPUCoExecSchedStrategy.cpp:417

llvm::AMDGPUCoExecSchedStrategy::Heurs
CandidateHeuristics Heurs
Definition AMDGPUCoExecSchedStrategy.h:304

llvm::AMDGPUCoExecSchedStrategy::pickNode
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
Definition AMDGPUCoExecSchedStrategy.cpp:444

llvm::AMDGPUCoExecSchedStrategy::pickNodeFromQueue
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &PickedPending, bool IsBottomUp)
Definition AMDGPUCoExecSchedStrategy.cpp:503

llvm::AMDGPUCoExecSchedStrategy::initialize
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
Definition AMDGPUCoExecSchedStrategy.cpp:429

llvm::AMDGPUCoExecSchedStrategy::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
Definition AMDGPUCoExecSchedStrategy.cpp:439

llvm::AMDGPUCoExecSchedStrategy::AMDGPUCoExecSchedStrategy
AMDGPUCoExecSchedStrategy(const MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:407

llvm::AMDGPUCoExecSchedStrategy::dumpPickSummary
void dumpPickSummary(SUnit *SU, bool IsTopNode, SchedCandidate &Cand)
Definition AMDGPUCoExecSchedStrategy.cpp:552

llvm::AMDGPUCoExecSchedStrategy::tryCandidateCoexec
bool tryCandidateCoexec(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone)
Definition AMDGPUCoExecSchedStrategy.cpp:578

llvm::CandidateHeuristics
CandidateHeuristics contains state and implementations to facilitate making per instruction schedulin...
Definition AMDGPUCoExecSchedStrategy.h:245

llvm::CandidateHeuristics::updateForScheduling
void updateForScheduling(SUnit *SU)
Update the state to reflect that SU is going to be scheduled.
Definition AMDGPUCoExecSchedStrategy.cpp:193

llvm::CandidateHeuristics::getHWUIFromFlavor
HardwareUnitInfo * getHWUIFromFlavor(AMDGPU::InstructionFlavor Flavor)
Given a Flavor , find the corresponding HardwareUnit.
Definition AMDGPUCoExecSchedStrategy.cpp:172

llvm::CandidateHeuristics::sortHWUIResources
void sortHWUIResources()
Sort the HWUInfo vector.
Definition AMDGPUCoExecSchedStrategy.cpp:253

llvm::CandidateHeuristics::tryCriticalResource
bool tryCriticalResource(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const
Check for critical resource consumption.
Definition AMDGPUCoExecSchedStrategy.cpp:361

llvm::CandidateHeuristics::tryCriticalResourceDependency
bool tryCriticalResourceDependency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const
Check for dependencies of instructions that use prioritized HardwareUnits.
Definition AMDGPUCoExecSchedStrategy.cpp:274

llvm::CandidateHeuristics::SII
const SIInstrInfo * SII
Definition AMDGPUCoExecSchedStrategy.h:248

llvm::CandidateHeuristics::HWUInfo
SmallVector< HardwareUnitInfo, 8 > HWUInfo
Definition AMDGPUCoExecSchedStrategy.h:251

llvm::CandidateHeuristics::dumpRegionSummary
void dumpRegionSummary()
Definition AMDGPUCoExecSchedStrategy.cpp:236

llvm::CandidateHeuristics::SRI
const SIRegisterInfo * SRI
Definition AMDGPUCoExecSchedStrategy.h:249

llvm::CandidateHeuristics::CandidateHeuristics
CandidateHeuristics()=default

llvm::CandidateHeuristics::DAG
ScheduleDAGMI * DAG
Definition AMDGPUCoExecSchedStrategy.h:247

llvm::CandidateHeuristics::SchedModel
const TargetSchedModel * SchedModel
Definition AMDGPUCoExecSchedStrategy.h:250

llvm::CandidateHeuristics::collectHWUIPressure
void collectHWUIPressure()
Walk over the region and collect total usage per HardwareUnit.
Definition AMDGPUCoExecSchedStrategy.cpp:224

llvm::CandidateHeuristics::initialize
void initialize(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel, const TargetRegisterInfo *TRI)
Definition AMDGPUCoExecSchedStrategy.cpp:200

llvm::CandidateHeuristics::getHWUICyclesForInst
unsigned getHWUICyclesForInst(SUnit *SU)
Compute the blocking cycles for the appropriate HardwareUnit given an SU.
Definition AMDGPUCoExecSchedStrategy.cpp:181

llvm::GCNSchedStrategy::GCNSchedStrategy
GCNSchedStrategy(const MachineSchedContext *C)
Definition GCNSchedStrategy.cpp:135

llvm::GenericSchedulerBase::NumRegionInstrs
unsigned NumRegionInstrs
Definition MachineScheduler.h:1216

llvm::GenericScheduler::DAG
ScheduleDAGMILive * DAG
Definition MachineScheduler.h:1312

llvm::HardwareUnitInfo
HardwareUnitInfo is a wrapper class which maps to some real hardware resource.
Definition AMDGPUCoExecSchedStrategy.h:154

llvm::HardwareUnitInfo::getTotalCycles
unsigned getTotalCycles()
Definition AMDGPUCoExecSchedStrategy.h:181

llvm::HardwareUnitInfo::HardwareUnitInfo
HardwareUnitInfo()
Definition AMDGPUCoExecSchedStrategy.h:177

llvm::HardwareUnitInfo::markScheduled
void markScheduled(SUnit *SU, unsigned BlockingCycles)
Update the state for SU being scheduled by removing it from the AllSUs and reducing its BlockingCycle...
Definition AMDGPUCoExecSchedStrategy.cpp:134

llvm::HardwareUnitInfo::contains
bool contains(SUnit *SU) const
Definition AMDGPUCoExecSchedStrategy.h:194

llvm::HardwareUnitInfo::getNextTargetSU
SUnit * getNextTargetSU(bool LookDeep=false) const
Definition AMDGPUCoExecSchedStrategy.cpp:83

llvm::HardwareUnitInfo::insert
void insert(SUnit *SU, unsigned BlockingCycles)
Insert the SU into AllSUs and account its BlockingCycles into the TotalCycles.
Definition AMDGPUCoExecSchedStrategy.cpp:109

llvm::HardwareUnitInfo::setType
void setType(unsigned TheType)
Definition AMDGPUCoExecSchedStrategy.h:183

llvm::HardwareUnitInfo::getType
AMDGPU::InstructionFlavor getType() const
Definition AMDGPUCoExecSchedStrategy.h:188

llvm::HardwareUnitInfo::producesCoexecWindow
bool producesCoexecWindow() const
Definition AMDGPUCoExecSchedStrategy.h:190

llvm::HardwareUnitInfo::setProducesCoexecWindow
void setProducesCoexecWindow(bool Val)
Definition AMDGPUCoExecSchedStrategy.h:192

llvm::HardwareUnitInfo::size
unsigned size()
Definition AMDGPUCoExecSchedStrategy.h:179

llvm::HardwareUnitInfo::getHigherPriority
SUnit * getHigherPriority(SUnit *SU, SUnit *Other) const
Definition AMDGPUCoExecSchedStrategy.h:200

llvm::HardwareUnitInfo::reset
void reset()
Definition AMDGPUCoExecSchedStrategy.h:211

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:343

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:74

llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition RegisterPressure.h:361

llvm::SIInstrInfo
Definition SIInstrInfo.h:108

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:250

llvm::SchedBoundary
Each Scheduling boundary is associated with ready queues.
Definition MachineScheduler.h:863

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition ScheduleDAGInstrs.h:118

llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition MachineScheduler.h:314

llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1236

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:72

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

uint8_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::FlavorGroups
Definition AMDGPUCoExecSchedStrategy.h:103

llvm::AMDGPU::FlavorGroups::individual
FlavorGroup individual(InstructionFlavor F)
Definition AMDGPUCoExecSchedStrategy.h:112

llvm::AMDGPU::FlavorGroups::allMem
FlavorGroup allMem()
Definition AMDGPUCoExecSchedStrategy.h:108

llvm::AMDGPU::FlavorGroups::all
FlavorGroup all()
Definition AMDGPUCoExecSchedStrategy.h:113

llvm::AMDGPU::FlavorGroups::allVALU
FlavorGroup allVALU()
Definition AMDGPUCoExecSchedStrategy.h:104

llvm::AMDGPU::WMMA
Definition SIDefines.h:1268

llvm::AMDGPU
Definition AMDGPUMetadataVerifier.h:34

llvm::AMDGPU::InstructionFlavor
InstructionFlavor
Definition AMDGPUCoExecSchedStrategy.h:28

llvm::AMDGPU::InstructionFlavor::NUM_FLAVORS
@ NUM_FLAVORS
Definition AMDGPUCoExecSchedStrategy.h:39

llvm::AMDGPU::InstructionFlavor::WMMA
@ WMMA
Definition AMDGPUCoExecSchedStrategy.h:29

llvm::AMDGPU::InstructionFlavor::DMA
@ DMA
Definition AMDGPUCoExecSchedStrategy.h:36

llvm::AMDGPU::InstructionFlavor::DS
@ DS
Definition AMDGPUCoExecSchedStrategy.h:34

llvm::AMDGPU::InstructionFlavor::VMEM
@ VMEM
Definition AMDGPUCoExecSchedStrategy.h:33

llvm::AMDGPU::InstructionFlavor::Other
@ Other
Definition AMDGPUCoExecSchedStrategy.h:38

llvm::AMDGPU::InstructionFlavor::SALU
@ SALU
Definition AMDGPUCoExecSchedStrategy.h:35

llvm::AMDGPU::InstructionFlavor::SingleCycleVALU
@ SingleCycleVALU
Definition AMDGPUCoExecSchedStrategy.h:30

llvm::AMDGPU::InstructionFlavor::TRANS
@ TRANS
Definition AMDGPUCoExecSchedStrategy.h:31

llvm::AMDGPU::InstructionFlavor::MultiCycleVALU
@ MultiCycleVALU
Definition AMDGPUCoExecSchedStrategy.h:32

llvm::AMDGPU::InstructionFlavor::Fence
@ Fence
Definition AMDGPUCoExecSchedStrategy.h:37

llvm::AMDGPU::getFlavorShortName
StringRef getFlavorShortName(InstructionFlavor F)
Definition AMDGPUCoExecSchedStrategy.h:70

llvm::AMDGPU::AMDGPUSchedReason
AMDGPUSchedReason
AMDGPU-specific scheduling decision reasons.
Definition AMDGPUCoExecSchedStrategy.h:124

llvm::AMDGPU::AMDGPUSchedReason::CritResourceDep
@ CritResourceDep
Definition AMDGPUCoExecSchedStrategy.h:127

llvm::AMDGPU::AMDGPUSchedReason::None
@ None
Definition AMDGPUCoExecSchedStrategy.h:125

llvm::AMDGPU::AMDGPUSchedReason::CritResourceBalance
@ CritResourceBalance
Definition AMDGPUCoExecSchedStrategy.h:126

llvm::AMDGPU::AMDGPUSchedReason::NUM_REASONS
@ NUM_REASONS
Definition AMDGPUCoExecSchedStrategy.h:128

llvm::AMDGPU::None
@ None
Definition AMDGPURegBankLegalizeRules.h:168

llvm::AMDGPU::classifyFlavor
InstructionFlavor classifyFlavor(const MachineInstr &MI, const SIInstrInfo &SII)
Definition AMDGPUCoExecSchedStrategy.cpp:46

llvm::AMDGPU::getReasonName
StringRef getReasonName(AMDGPUSchedReason R)
Definition AMDGPUCoExecSchedStrategy.h:131

llvm::AMDGPU::FlavorGroup
SmallVector< InstructionFlavor, 4 > FlavorGroup
Definition AMDGPUCoExecSchedStrategy.h:101

llvm::AMDGPU::getFlavorName
StringRef getFlavorName(InstructionFlavor F)
Definition AMDGPUCoExecSchedStrategy.h:42

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::createGCNNoopPostMachineScheduler
ScheduleDAGInstrs * createGCNNoopPostMachineScheduler(MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:719

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::createGCNCoExecMachineScheduler
ScheduleDAGInstrs * createGCNCoExecMachineScheduler(MachineSchedContext *C)
Definition AMDGPUCoExecSchedStrategy.cpp:709

llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition MachineScheduler.h:1126

llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition MachineScheduler.h:1164

llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition MachineScheduler.h:145