13#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
30class MachineFrameInfo;
32class SIMachineFunctionInfo;
34class TargetRegisterClass;
95 std::optional<unsigned>
Mask;
137 if (
YamlIO.outputting()) {
139 YamlIO.mapRequired(
"reg",
A.RegisterName);
141 YamlIO.mapRequired(
"offset",
A.StackOffset);
146 YamlIO.mapRequired(
"reg",
A.RegisterName);
148 YamlIO.mapRequired(
"offset",
A.StackOffset);
150 YamlIO.setError(
"missing required key 'reg' or 'offset'");
152 YamlIO.mapOptional(
"mask",
A.Mask);
154 static const bool flow =
true;
196 YamlIO.mapOptional(
"privateSegmentWaveByteOffset",
244 YamlIO.mapOptional(
"dx10-clamp",
Mode.DX10Clamp,
true);
245 YamlIO.mapOptional(
"fp32-input-denormals",
Mode.FP32InputDenormals,
true);
246 YamlIO.mapOptional(
"fp32-output-denormals",
Mode.FP32OutputDenormals,
true);
247 YamlIO.mapOptional(
"fp64-fp16-input-denormals",
Mode.FP64FP16InputDenormals,
true);
248 YamlIO.mapOptional(
"fp64-fp16-output-denormals",
Mode.FP64FP16OutputDenormals,
true);
316 YamlIO.mapOptional(
"highBitsOf32BitAddress",
367 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
371 Register FrameOffsetReg = AMDGPU::FP_REG;
376 Register StackPtrOffsetReg = AMDGPU::SP_REG;
381 unsigned PSInputAddr = 0;
382 unsigned PSInputEnable = 0;
393 unsigned BytesInStackArgArea = 0;
395 bool ReturnsVoid =
true;
399 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
403 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
405 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
408 unsigned NumUserSGPRs = 0;
409 unsigned NumSystemSGPRs = 0;
411 bool HasSpilledSGPRs =
false;
412 bool HasSpilledVGPRs =
false;
413 bool HasNonSpillStackObjects =
false;
414 bool IsStackRealigned =
false;
416 unsigned NumSpilledSGPRs = 0;
417 unsigned NumSpilledVGPRs = 0;
420 bool PrivateSegmentBuffer : 1;
421 bool DispatchPtr : 1;
423 bool KernargSegmentPtr : 1;
425 bool FlatScratchInit : 1;
428 bool WorkGroupIDX : 1;
429 bool WorkGroupIDY : 1;
430 bool WorkGroupIDZ : 1;
431 bool WorkGroupInfo : 1;
432 bool LDSKernelId : 1;
433 bool PrivateSegmentWaveByteOffset : 1;
435 bool WorkItemIDX : 1;
436 bool WorkItemIDY : 1;
437 bool WorkItemIDZ : 1;
442 bool ImplicitBufferPtr : 1;
446 bool ImplicitArgPtr : 1;
448 bool MayNeedAGPRs : 1;
455 unsigned HighBitsOf32BitAddress;
460 mutable std::optional<bool> UsesAGPRs;
480 PrologEpilogSGPRSpillToVGPRLanes;
481 unsigned NumVGPRSpillLanes = 0;
482 unsigned NumVGPRPrologEpilogSpillLanes = 0;
518 std::optional<int> ScavengeFI;
525 bool allocateVGPRForPrologEpilogSGPRSpills(
MachineFunction &MF,
int FI,
530 return VGPRForAGPRCopy;
534 VGPRForAGPRCopy = NewVGPRForAGPRCopy;
559 auto I = SGPRSpillToVGPRLanes.
find(FrameIndex);
560 return (
I == SGPRSpillToVGPRLanes.
end())
570 return PrologEpilogSGPRSpills;
575 PrologEpilogSGPRSpills.
insert(std::make_pair(
Reg,
SI));
586 auto I = PrologEpilogSGPRSpills.
find(
Reg);
587 if (
I != PrologEpilogSGPRSpills.
end() &&
589 return I->second.getReg();
591 return AMDGPU::NoRegister;
596 for (
const auto &
SI : PrologEpilogSGPRSpills) {
604 return find_if(PrologEpilogSGPRSpills,
607 return SI.second.getKind() ==
609 SI.second.getIndex() == FI;
610 }) != PrologEpilogSGPRSpills.
end();
615 auto I = PrologEpilogSGPRSpills.
find(
Reg);
623 auto I = PrologEpilogSGPRSpillToVGPRLanes.
find(FrameIndex);
624 return (
I == PrologEpilogSGPRSpillToVGPRLanes.
end())
646 auto I = VGPRToAGPRSpills.
find(FrameIndex);
647 return (
I == VGPRToAGPRSpills.
end()) ? (
MCPhysReg)AMDGPU::NoRegister
648 :
I->second.Lanes[Lane];
652 auto I = VGPRToAGPRSpills.
find(FrameIndex);
653 if (
I != VGPRToAGPRSpills.
end())
654 I->second.IsDead =
true;
658 bool IsPrologEpilog =
false);
664 bool ResetSGPRSpillStackIDs);
670 return BytesInStackArgArea;
674 BytesInStackArgArea = Bytes;
696 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP9 : getNextSystemSGPR();
698 if (!HasArchitectedSGPRs)
701 return ArgInfo.WorkGroupIDX.getRegister();
705 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
706 unsigned Mask = HasArchitectedSGPRs &&
hasWorkGroupIDZ() ? 0xffff : ~0u;
708 if (!HasArchitectedSGPRs)
711 return ArgInfo.WorkGroupIDY.getRegister();
715 Register Reg = HasArchitectedSGPRs ? AMDGPU::TTMP7 : getNextSystemSGPR();
716 unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
718 if (!HasArchitectedSGPRs)
721 return ArgInfo.WorkGroupIDZ.getRegister();
727 return ArgInfo.WorkGroupInfo.getRegister();
744 ArgInfo.PrivateSegmentWaveByteOffset
747 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
755 return PrivateSegmentBuffer;
767 return KernargSegmentPtr;
775 return FlatScratchInit;
791 return WorkGroupInfo;
797 return PrivateSegmentWaveByteOffset;
813 return ImplicitArgPtr;
817 return ImplicitBufferPtr;
828 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
845 return HighBitsOf32BitAddress;
853 return NumUserSGPRs + NumSystemSGPRs;
857 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
863 return ScratchRSrcReg;
867 assert(
Reg != 0 &&
"Should never be unset");
868 ScratchRSrcReg =
Reg;
872 return FrameOffsetReg;
876 assert(
Reg != 0 &&
"Should never be unset");
877 FrameOffsetReg =
Reg;
881 assert(
Reg != 0 &&
"Should never be unset");
882 StackPtrOffsetReg =
Reg;
890 return StackPtrOffsetReg;
894 return ArgInfo.QueuePtr.getRegister();
898 return ArgInfo.ImplicitBufferPtr.getRegister();
902 return HasSpilledSGPRs;
906 HasSpilledSGPRs = Spill;
910 return HasSpilledVGPRs;
914 HasSpilledVGPRs = Spill;
918 return HasNonSpillStackObjects;
922 HasNonSpillStackObjects = StackObject;
926 return IsStackRealigned;
930 IsStackRealigned = Realigned;
934 return NumSpilledSGPRs;
938 return NumSpilledVGPRs;
942 NumSpilledSGPRs += num;
946 NumSpilledVGPRs += num;
954 return PSInputEnable;
958 return PSInputAddr & (1 <<
Index);
962 PSInputAddr |= 1 <<
Index;
966 PSInputEnable |= 1 <<
Index;
980 return FlatWorkGroupSizes;
985 return FlatWorkGroupSizes.first;
990 return FlatWorkGroupSizes.second;
1001 return WavesPerEU.first;
1006 return WavesPerEU.second;
1014 return ArgInfo.WorkGroupIDX.getRegister();
1017 return ArgInfo.WorkGroupIDY.getRegister();
1020 return ArgInfo.WorkGroupIDZ.getRegister();
1027 return &GWSResourcePSV;
1037 return (Occupancy < 4) ? Occupancy : 4;
1043 if (Occupancy > Limit)
1048 if (Occupancy < Limit)
1054 return MayNeedAGPRs;
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Provides AMDGPU specific target descriptions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
std::array< StringRef, 64 > Keys
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
static bool classof(const PseudoSourceValue *V)
AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
bool isMemoryBound() const
bool needsWaveLimiter() const
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Allocate memory in an ever growing pool, as if by bump-pointer.
iterator find(const_arg_type_t< KeyT > Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Lightweight error class with error context and mandatory checking.
Wrapper class representing physical registers. Should be passed by value.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I)
SGPRSaveKind getKind() const
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool hasNonSpillStackObjects() const
const WWMSpillsMap & getWWMSpills() const
bool usesAGPRs(const MachineFunction &MF) const
bool isPSInputAllocated(unsigned Index) const
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
unsigned getOccupancy() const
unsigned getNumPreloadedSGPRs() const
void setWorkItemIDY(ArgDescriptor Arg)
const PrologEpilogSGPRSpillsMap & getPrologEpilogSGPRSpills() const
unsigned getNumSpilledVGPRs() const
bool hasLDSKernelId() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
void setWorkItemIDZ(ArgDescriptor Arg)
std::pair< unsigned, unsigned > getWavesPerEU() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
unsigned getNumSpilledSGPRs() const
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Register addDispatchPtr(const SIRegisterInfo &TRI)
bool hasSpilledVGPRs() const
void setVGPRToAGPRSpillDead(int FrameIndex)
unsigned getMaxFlatWorkGroupSize() const
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
Register getStackPtrOffsetReg() const
bool isStackRealigned() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
bool hasFlatScratchInit() const
unsigned getMaxWavesPerEU() const
bool hasDispatchID() const
void setStackPtrOffsetReg(Register Reg)
Register addReservedUserSGPR()
Increment user SGPRs used for padding the argument list only.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
bool hasImplicitBufferPtr() const
Register addQueuePtr(const SIRegisterInfo &TRI)
bool hasWorkGroupIDZ() const
Register getQueuePtrUserSGPR() const
ArrayRef< SIRegisterInfo::SpilledReg > getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool IsPrologEpilog=false)
SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI)=default
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Register getGITPtrLoReg(const MachineFunction &MF) const
bool hasWorkGroupIDY() const
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
void setBytesInStackArgArea(unsigned Bytes)
SIModeRegisterDefaults getMode() const
void setFrameOffsetReg(Register Reg)
Register addWorkGroupIDX(bool HasArchitectedSGPRs)
Register addPrivateSegmentWaveByteOffset()
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool hasWorkGroupInfo() const
bool mayUseAGPRs(const Function &F) const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) const
Register addWorkGroupIDY(bool HasArchitectedSGPRs)
bool hasWorkItemIDY() const
unsigned getMinFlatWorkGroupSize() const
Register addLDSKernelId()
Register getVGPRForAGPRCopy() const
void setPrivateSegmentWaveByteOffset(Register Reg)
unsigned getMinWavesPerEU() const
Register getFrameOffsetReg() const
bool hasWorkGroupIDX() const
const AMDGPUFunctionArgInfo & getArgInfo() const
unsigned getBytesInStackArgArea() const
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Register addDispatchID(const SIRegisterInfo &TRI)
void setHasSpilledVGPRs(bool Spill=true)
void setIfReturnsVoid(bool Value)
void limitOccupancy(unsigned Limit)
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
bool hasDispatchPtr() const
void markPSInputAllocated(unsigned Index)
void setWorkItemIDX(ArgDescriptor Arg)
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
void markPSInputEnabled(unsigned Index)
void addToSpilledVGPRs(unsigned num)
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool hasPrivateSegmentBuffer() const
uint32_t get32BitAddressHighBits() const
unsigned getMinAllowedOccupancy() const
void setHasSpilledSGPRs(bool Spill=true)
Register getWorkGroupIDSGPR(unsigned Dim) const
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
bool hasWorkItemIDX() const
unsigned getNumUserSGPRs() const
Register addWorkGroupIDZ(bool HasArchitectedSGPRs)
const ReservedRegSet & getWWMReservedRegs() const
Register getImplicitBufferPtrUserSGPR() const
std::optional< int > getOptionalScavengeFI() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
AMDGPUFunctionArgInfo & getArgInfo()
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
void setHasNonSpillStackObjects(bool StackObject=true)
void setIsStackRealigned(bool Realigned=true)
unsigned getGITPtrHigh() const
void limitOccupancy(const MachineFunction &MF)
bool hasSpilledSGPRs() const
bool hasKernargSegmentPtr() const
ArrayRef< Register > getSGPRSpillVGPRs() const
unsigned getPSInputAddr() const
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Register getScratchSGPRCopyDstReg(Register Reg) const
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const
bool hasImplicitArgPtr() const
bool mayNeedAGPRs() const
Register addWorkGroupInfo()
bool hasWorkItemIDZ() const
unsigned getPSInputEnable() const
void setScratchRSrcReg(Register Reg)
void addToSpilledSGPRs(unsigned num)
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
void reserveWWMRegister(Register Reg)
bool hasPrivateSegmentWaveByteOffset() const
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Represents a range in source code.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
LLVM Value Representation.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
Helper struct shared between Function Specialization and SCCP Solver.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
SmallVector< MCPhysReg, 32 > Lanes
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
static void mapping(IO &YamlIO, SIArgument &A)
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
static void mapping(IO &YamlIO, SIMode &Mode)
std::optional< SIArgument > PrivateSegmentWaveByteOffset
std::optional< SIArgument > WorkGroupIDY
std::optional< SIArgument > FlatScratchInit
std::optional< SIArgument > DispatchPtr
std::optional< SIArgument > DispatchID
std::optional< SIArgument > WorkItemIDY
std::optional< SIArgument > WorkGroupIDX
std::optional< SIArgument > ImplicitArgPtr
std::optional< SIArgument > QueuePtr
std::optional< SIArgument > WorkGroupInfo
std::optional< SIArgument > LDSKernelId
std::optional< SIArgument > ImplicitBufferPtr
std::optional< SIArgument > WorkItemIDX
std::optional< SIArgument > KernargSegmentPtr
std::optional< SIArgument > WorkItemIDZ
std::optional< SIArgument > PrivateSegmentSize
std::optional< SIArgument > PrivateSegmentBuffer
std::optional< SIArgument > WorkGroupIDZ
std::optional< unsigned > Mask
SIArgument(const SIArgument &Other)
SIArgument & operator=(const SIArgument &Other)
static SIArgument createArgument(bool IsReg)
SmallVector< StringValue > WWMReservedRegs
uint32_t HighBitsOf32BitAddress
SIMachineFunctionInfo()=default
StringValue FrameOffsetReg
uint64_t ExplicitKernArgSize
void mappingImpl(yaml::IO &YamlIO) override
~SIMachineFunctionInfo()=default
StringValue VGPRForAGPRCopy
std::optional< SIArgumentInfo > ArgInfo
std::optional< FrameIndex > ScavengeFI
unsigned BytesInStackArgArea
StringValue ScratchRSrcReg
StringValue StackPtrOffsetReg
SIMode(const SIModeRegisterDefaults &Mode)
bool FP64FP16OutputDenormals
bool operator==(const SIMode Other) const
bool FP64FP16InputDenormals
A wrapper around std::string which contains a source range that's being set during parsing.