Go to the documentation of this file.
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
28 #include "llvm/IR/IntrinsicsR600.h"
35 #define DEBUG_TYPE "amdgpu-subtarget"
37 #define GET_SUBTARGETINFO_TARGET_DESC
38 #define GET_SUBTARGETINFO_CTOR
39 #define AMDGPUSubtarget GCNSubtarget
40 #include "AMDGPUGenSubtargetInfo.inc"
41 #undef AMDGPUSubtarget
44 "amdgpu-disable-power-sched",
45 cl::desc(
"Disable scheduling to minimize mAI power bursts"),
49 "amdgpu-vgpr-index-mode",
50 cl::desc(
"Use GPR indexing mode instead of movrel for vector indexing"),
54 cl::desc(
"Enable the use of AA during codegen."),
75 FullFS +=
"+flat-for-global,+unaligned-access-mode,+trap-handler,";
77 FullFS +=
"+enable-prt-strict-null,";
80 if (
FS.contains_insensitive(
"+wavefrontsize")) {
81 if (!
FS.contains_insensitive(
"wavefrontsize16"))
82 FullFS +=
"-wavefrontsize16,";
83 if (!
FS.contains_insensitive(
"wavefrontsize32"))
84 FullFS +=
"-wavefrontsize32,";
85 if (!
FS.contains_insensitive(
"wavefrontsize64"))
86 FullFS +=
"-wavefrontsize64,";
114 ToggleFeature(AMDGPU::FeatureFlatForGlobal);
120 ToggleFeature(AMDGPU::FeatureFlatForGlobal);
166 InstrItins(getInstrItineraryForCPU(GPU)),
167 InstrInfo(initializeSubtargetDependencies(TT, GPU,
FS)),
185 case AMDGPU::V_LSHLREV_B64_e64:
186 case AMDGPU::V_LSHLREV_B64_gfx10:
187 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
188 case AMDGPU::V_LSHL_B64_e64:
189 case AMDGPU::V_LSHRREV_B64_e64:
190 case AMDGPU::V_LSHRREV_B64_gfx10:
191 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
192 case AMDGPU::V_LSHR_B64_e64:
193 case AMDGPU::V_ASHRREV_I64_e64:
194 case AMDGPU::V_ASHRREV_I64_gfx10:
195 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
196 case AMDGPU::V_ASHR_I64_e64:
206 case AMDGPU::V_CVT_F16_F32_e32:
207 case AMDGPU::V_CVT_F16_F32_e64:
208 case AMDGPU::V_CVT_F16_U16_e32:
209 case AMDGPU::V_CVT_F16_U16_e64:
210 case AMDGPU::V_CVT_F16_I16_e32:
211 case AMDGPU::V_CVT_F16_I16_e64:
212 case AMDGPU::V_RCP_F16_e64:
213 case AMDGPU::V_RCP_F16_e32:
214 case AMDGPU::V_RSQ_F16_e64:
215 case AMDGPU::V_RSQ_F16_e32:
216 case AMDGPU::V_SQRT_F16_e64:
217 case AMDGPU::V_SQRT_F16_e32:
218 case AMDGPU::V_LOG_F16_e64:
219 case AMDGPU::V_LOG_F16_e32:
220 case AMDGPU::V_EXP_F16_e64:
221 case AMDGPU::V_EXP_F16_e32:
222 case AMDGPU::V_SIN_F16_e64:
223 case AMDGPU::V_SIN_F16_e32:
224 case AMDGPU::V_COS_F16_e64:
225 case AMDGPU::V_COS_F16_e32:
226 case AMDGPU::V_FLOOR_F16_e64:
227 case AMDGPU::V_FLOOR_F16_e32:
228 case AMDGPU::V_CEIL_F16_e64:
229 case AMDGPU::V_CEIL_F16_e32:
230 case AMDGPU::V_TRUNC_F16_e64:
231 case AMDGPU::V_TRUNC_F16_e32:
232 case AMDGPU::V_RNDNE_F16_e64:
233 case AMDGPU::V_RNDNE_F16_e32:
234 case AMDGPU::V_FRACT_F16_e64:
235 case AMDGPU::V_FRACT_F16_e32:
236 case AMDGPU::V_FREXP_MANT_F16_e64:
237 case AMDGPU::V_FREXP_MANT_F16_e32:
238 case AMDGPU::V_FREXP_EXP_I16_F16_e64:
239 case AMDGPU::V_FREXP_EXP_I16_F16_e32:
240 case AMDGPU::V_LDEXP_F16_e64:
241 case AMDGPU::V_LDEXP_F16_e32:
242 case AMDGPU::V_LSHLREV_B16_e64:
243 case AMDGPU::V_LSHLREV_B16_e32:
244 case AMDGPU::V_LSHRREV_B16_e64:
245 case AMDGPU::V_LSHRREV_B16_e32:
246 case AMDGPU::V_ASHRREV_I16_e64:
247 case AMDGPU::V_ASHRREV_I16_e32:
248 case AMDGPU::V_ADD_U16_e64:
249 case AMDGPU::V_ADD_U16_e32:
250 case AMDGPU::V_SUB_U16_e64:
251 case AMDGPU::V_SUB_U16_e32:
252 case AMDGPU::V_SUBREV_U16_e64:
253 case AMDGPU::V_SUBREV_U16_e32:
254 case AMDGPU::V_MUL_LO_U16_e64:
255 case AMDGPU::V_MUL_LO_U16_e32:
256 case AMDGPU::V_ADD_F16_e64:
257 case AMDGPU::V_ADD_F16_e32:
258 case AMDGPU::V_SUB_F16_e64:
259 case AMDGPU::V_SUB_F16_e32:
260 case AMDGPU::V_SUBREV_F16_e64:
261 case AMDGPU::V_SUBREV_F16_e32:
262 case AMDGPU::V_MUL_F16_e64:
263 case AMDGPU::V_MUL_F16_e32:
264 case AMDGPU::V_MAX_F16_e64:
265 case AMDGPU::V_MAX_F16_e32:
266 case AMDGPU::V_MIN_F16_e64:
267 case AMDGPU::V_MIN_F16_e32:
268 case AMDGPU::V_MAX_U16_e64:
269 case AMDGPU::V_MAX_U16_e32:
270 case AMDGPU::V_MIN_U16_e64:
271 case AMDGPU::V_MIN_U16_e32:
272 case AMDGPU::V_MAX_I16_e64:
273 case AMDGPU::V_MAX_I16_e32:
274 case AMDGPU::V_MIN_I16_e64:
275 case AMDGPU::V_MIN_I16_e32:
276 case AMDGPU::V_MAD_F16_e64:
277 case AMDGPU::V_MAD_U16_e64:
278 case AMDGPU::V_MAD_I16_e64:
279 case AMDGPU::V_FMA_F16_e64:
280 case AMDGPU::V_DIV_FIXUP_F16_e64:
283 case AMDGPU::V_MADAK_F16:
284 case AMDGPU::V_MADMK_F16:
285 case AMDGPU::V_MAC_F16_e64:
286 case AMDGPU::V_MAC_F16_e32:
287 case AMDGPU::V_FMAMK_F16:
288 case AMDGPU::V_FMAAK_F16:
289 case AMDGPU::V_FMAC_F16_e64:
290 case AMDGPU::V_FMAC_F16_e32:
295 case AMDGPU::V_MAD_MIXLO_F16:
296 case AMDGPU::V_MAD_MIXHI_F16:
308 if (!WorkGroupsPerCu)
319 if (!MaxWorkGroupsPerCu)
334 NumGroups =
std::min(MaxWorkGroupsPerCu, NumGroups);
337 const unsigned MaxGroupNumWaves = (MaxWorkGroupSize + WaveSize - 1) / WaveSize;
338 unsigned MaxWaves = NumGroups * MaxGroupNumWaves;
347 "computed invalid occupancy");
357 std::pair<unsigned, unsigned>
375 std::pair<unsigned, unsigned> Default =
380 F,
"amdgpu-flat-work-group-size", Default);
383 if (Requested.first > Requested.second)
396 const Function &
F, std::pair<unsigned, unsigned> FlatWorkGroupSizes)
const {
404 unsigned MinImpliedByFlatWorkGroupSize =
406 Default.first = MinImpliedByFlatWorkGroupSize;
410 F,
"amdgpu-waves-per-eu", Default,
true);
413 if (Requested.second && Requested.first > Requested.second)
423 if (Requested.first < MinImpliedByFlatWorkGroupSize)
430 auto Node = Kernel.
getMetadata(
"reqd_work_group_size");
431 if (Node && Node->getNumOperands() == 3)
432 return mdconst::extract<ConstantInt>(Node->getOperand(
Dim))->getZExtValue();
441 unsigned Dimension)
const {
449 Function *Kernel =
I->getParent()->getParent();
450 unsigned MinSize = 0;
452 bool IdQuery =
false;
455 if (
auto *CI = dyn_cast<CallInst>(
I)) {
456 const Function *
F = CI->getCalledFunction();
458 unsigned Dim = UINT_MAX;
459 switch (
F->getIntrinsicID()) {
460 case Intrinsic::amdgcn_workitem_id_x:
461 case Intrinsic::r600_read_tidig_x:
464 case Intrinsic::r600_read_local_size_x:
467 case Intrinsic::amdgcn_workitem_id_y:
468 case Intrinsic::r600_read_tidig_y:
471 case Intrinsic::r600_read_local_size_y:
474 case Intrinsic::amdgcn_workitem_id_z:
475 case Intrinsic::r600_read_tidig_z:
478 case Intrinsic::r600_read_local_size_z:
488 MinSize = MaxSize = ReqdSize;
506 I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
515 if (
F.hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
527 Align &MaxAlign)
const {
536 const bool IsByRef =
Arg.hasByRefAttr();
537 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
538 Align Alignment =
DL.getValueOrABITypeAlignment(
539 IsByRef ?
Arg.getParamAlign() :
None, ArgTy);
540 uint64_t AllocSize =
DL.getTypeAllocSize(ArgTy);
541 ExplicitArgBytes =
alignTo(ExplicitArgBytes, Alignment) + AllocSize;
542 MaxAlign =
std::max(MaxAlign, Alignment);
545 return ExplicitArgBytes;
549 Align &MaxAlign)
const {
554 uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
556 if (ImplicitBytes != 0) {
558 TotalSize =
alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
559 MaxAlign =
std::max(MaxAlign, Alignment);
572 unsigned NumRegionInstrs)
const {
629 unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule;
677 const Function &
F, std::pair<unsigned, unsigned> WavesPerEU,
678 unsigned PreloadedSGPRs,
unsigned ReservedNumSGPRs)
const {
682 unsigned MaxAddressableNumSGPRs =
getMaxNumSGPRs(WavesPerEU.first,
true);
686 if (
F.hasFnAttribute(
"amdgpu-num-sgpr")) {
688 F,
"amdgpu-num-sgpr", MaxNumSGPRs);
691 if (Requested && (Requested <= ReservedNumSGPRs))
701 unsigned InputNumSGPRs = PreloadedSGPRs;
702 if (Requested && Requested < InputNumSGPRs)
703 Requested = InputNumSGPRs;
707 if (Requested && Requested >
getMaxNumSGPRs(WavesPerEU.first,
false))
709 if (WavesPerEU.second &&
714 MaxNumSGPRs = Requested;
720 return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
732 unsigned MaxUserSGPRs = 4 +
740 unsigned MaxSystemSGPRs = 1 +
745 return MaxUserSGPRs + MaxSystemSGPRs;
754 const Function &
F, std::pair<unsigned, unsigned> WavesPerEU)
const {
761 if (
F.hasFnAttribute(
"amdgpu-num-vgpr")) {
763 F,
"amdgpu-num-vgpr", MaxNumVGPRs);
772 if (WavesPerEU.second &&
777 MaxNumVGPRs = Requested;
794 int UseOpIdx,
SDep &Dep)
const {
796 !
Def->isInstr() || !
Use->isInstr())
808 for (++
I;
I !=
E &&
I->isBundledWithPred(); ++
I) {
809 if (
I->modifiesRegister(
Reg,
TRI))
821 for (++
I;
I !=
E &&
I->isBundledWithPred() && Lat; ++
I) {
822 if (
I->readsRegister(
Reg,
TRI))
833 DefI, DefOpIdx, UseI, UseOpIdx));
845 bool isSALU(
const SUnit *SU)
const {
847 return MI &&
TII->isSALU(*
MI) && !
MI->isTerminator();
850 bool isVALU(
const SUnit *SU)
const {
855 bool canAddEdge(
const SUnit *Succ,
const SUnit *Pred)
const {
861 for (
unsigned I = 0;
I < Succs.size(); ++
I) {
862 for (
const SDep &
SI : Succs[
I]->Succs) {
863 const SUnit *SU =
SI.getSUnit();
870 while (!Preds.empty()) {
871 const SUnit *SU = Preds.pop_back_val();
876 if (
SI.getSUnit() != SU && !Visited.
count(
SI.getSUnit()))
877 Preds.push_back(
SI.getSUnit());
890 while (!Worklist.empty() && MaxChain-- > 0) {
891 SUnit *SU = Worklist.pop_back_val();
892 if (!Visited.
insert(SU).second)
903 if (SUv !=
From && isVALU(SUv) && canAddEdge(SUv, SU))
909 if (Succ != SU && isSALU(Succ) && canAddEdge(
From, Succ))
910 Worklist.push_back(Succ);
923 if (!TSchedModel || DAG->
SUnits.empty())
930 auto LastSALU = DAG->
SUnits.begin();
935 if (!
TII->isMAI(MAI) ||
936 MAI.
getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
937 MAI.
getOpcode() == AMDGPU::V_ACCVGPR_READ_B32_e64)
940 unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
943 dbgs() <<
"Need " << Lat
944 <<
" instructions to cover latency.\n");
948 for ( ; Lat && LastSALU !=
E; ++LastSALU) {
949 if (Visited.
count(&*LastSALU))
952 if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
955 Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
963 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const {
964 Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
967 std::unique_ptr<ScheduleDAGMutation>
969 return std::make_unique<FillMFMAShadowMutation>(&InstrInfo);
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Align getAlignmentForImplicitArgPtr() const
This class represents an incoming formal argument to a Function.
This is an optimization pass for GlobalISel generic memory operations.
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
A parsed version of the target data layout string in and methods for querying it.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
Information about stack frame layout on the target.
bool hasVGPRIndexMode() const
@ Artificial
Arbitrary strong DAG edge (no real dependence).
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getMinWavesPerEU() const =0
Reg
All possible values of the reg field in the ModR/M byte.
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Triple - Helper class for working with autoconf configuration names.
unsigned getAmdhsaCodeObjectVersion()
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
The instances of the Type class are immutable: once they are created, they are never changed.
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))
unsigned getMaxWavesPerEU() const
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
virtual unsigned getMaxFlatWorkGroupSize() const =0
static unsigned getMaxNumPreloadedSGPRs()
unsigned const TargetRegisterInfo * TRI
unsigned getNumPreloadedSGPRs() const
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
const SIRegisterInfo * getRegisterInfo() const override
This class provides the information for the target register banks.
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
LLVM_READNONE bool isKernel(CallingConv::ID CC)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
TargetInstrInfo - Interface to description of machine instruction set.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
static const AMDGPUSubtarget & get(const MachineFunction &MF)
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
bool isShader(CallingConv::ID cc)
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool useVGPRIndexMode() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const TargetSchedModel & getSchedModel() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)
const SITargetLowering * getTargetLowering() const override
unsigned NodeNum
Entry # of node in the node vector.
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getImplicitArgNumBytes(const Function &F) const
bool HasArchitectedFlatScratch
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
const HexagonInstrInfo * TII
bool isMesaKernel(const Function &F) const
bool hasSGPRInitBug() const
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
TargetIDSetting getSramEccSetting() const
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
This struct is a compact representation of a valid (non-zero power of two) alignment.
ArchType getArch() const
Get the parsed architecture type of this triple.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
unsigned getLocalMemorySize() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
AMDGPUSubtarget(const Triple &TT)
unsigned getWavefrontSize() const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Provide an instruction scheduling machine model to CodeGen passes.
Representation of each machine instruction.
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
unsigned getTotalNumVGPRs() const
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
unsigned getReg() const
Returns the register associated with this edge.
initializer< Ty > init(const Ty &Val)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
StandardInstrumentations SI(Debug, VerifyEach)
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
TargetIDSetting getXnackSetting() const
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
bool hasGFX90AInsts() const
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))
unsigned MaxPrivateElementSize
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
unsigned getExplicitKernelArgOffset(const Function &F) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
instr_iterator instr_end()
Class for arbitrary precision integers.
virtual unsigned getMinFlatWorkGroupSize() const =0
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
StringRef - Represent a constant reference to a string, i.e.
Analysis the ScalarEvolution expression for r is this
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
MachineFunction & MF
Machine function.
if(llvm_vc STREQUAL "") set(fake_version_inc "$
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const MachineBasicBlock * getParent() const
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep) const override
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation >> &Mutations) const override
bool hasFlatAddressSpace() const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
unsigned getVGPRAllocGranule() const
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
std::vector< SUnit > SUnits
The scheduling units.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Iterator for intrusive lists based on ilist_node.
void setLatency(unsigned Lat)
Sets the latency for this edge.
bool useAA() const override
std::pair< unsigned, unsigned > getWavesPerEU() const
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Generation getGeneration() const
bool isXNACKEnabled() const
static cl::opt< bool > DisablePowerSched("amdgpu-disable-power-sched", cl::desc("Disable scheduling to minimize mAI power bursts"), cl::init(false))
void dumpNode(const SUnit &SU) const override
unsigned getConstantBusLimit(unsigned Opcode) const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
const Triple & getTargetTriple() const
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
bool enableSIScheduler() const
const InstrItineraryData * getInstrItineraryData() const override
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
Mutate the DAG as a postpass after normal DAG building.
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
const char LLVMTargetMachineRef TM
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
void setTargetIDFromFeaturesString(StringRef FS)
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
std::unique_ptr< ScheduleDAGMutation > createFillMFMAShadowMutation(const TargetInstrInfo *TII) const
BlockVerifier::State From
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
A ScheduleDAG for scheduling lists of MachineInstr.
std::pair< int, int > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< int, int > Default, bool OnlyFirstRequired)
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
bool hasFlatScratchInit() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU) const
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.