Go to the documentation of this file.
13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
28 class MachineFrameInfo;
29 class MachineFunction;
30 class SIMachineFunctionInfo;
32 class TargetRegisterClass;
161 if (
YamlIO.outputting()) {
163 YamlIO.mapRequired(
"reg", A.RegisterName);
165 YamlIO.mapRequired(
"offset", A.StackOffset);
167 auto Keys =
YamlIO.keys();
170 YamlIO.mapRequired(
"reg", A.RegisterName);
172 YamlIO.mapRequired(
"offset", A.StackOffset);
174 YamlIO.setError(
"missing required key 'reg' or 'offset'");
176 YamlIO.mapOptional(
"mask", A.Mask);
178 static const bool flow =
true;
218 YamlIO.mapOptional(
"privateSegmentWaveByteOffset",
263 YamlIO.mapOptional(
"dx10-clamp",
Mode.DX10Clamp,
true);
264 YamlIO.mapOptional(
"fp32-input-denormals",
Mode.FP32InputDenormals,
true);
265 YamlIO.mapOptional(
"fp32-output-denormals",
Mode.FP32OutputDenormals,
true);
266 YamlIO.mapOptional(
"fp64-fp16-input-denormals",
Mode.FP64FP16InputDenormals,
true);
267 YamlIO.mapOptional(
"fp64-fp16-output-denormals",
Mode.FP64FP16OutputDenormals,
true);
335 YamlIO.mapOptional(
"highBitsOf32BitAddress",
354 Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
358 Register FrameOffsetReg = AMDGPU::FP_REG;
363 Register StackPtrOffsetReg = AMDGPU::SP_REG;
368 unsigned PSInputAddr = 0;
369 unsigned PSInputEnable = 0;
380 unsigned BytesInStackArgArea = 0;
382 bool ReturnsVoid =
true;
386 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
390 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
392 const AMDGPUBufferPseudoSourceValue BufferPSV;
393 const AMDGPUImagePseudoSourceValue ImagePSV;
394 const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
397 unsigned NumUserSGPRs = 0;
398 unsigned NumSystemSGPRs = 0;
400 bool HasSpilledSGPRs =
false;
401 bool HasSpilledVGPRs =
false;
402 bool HasNonSpillStackObjects =
false;
403 bool IsStackRealigned =
false;
409 bool PrivateSegmentBuffer : 1;
410 bool DispatchPtr : 1;
412 bool KernargSegmentPtr : 1;
414 bool FlatScratchInit : 1;
417 bool WorkGroupIDX : 1;
418 bool WorkGroupIDY : 1;
419 bool WorkGroupIDZ : 1;
420 bool WorkGroupInfo : 1;
421 bool PrivateSegmentWaveByteOffset : 1;
423 bool WorkItemIDX : 1;
424 bool WorkItemIDY : 1;
425 bool WorkItemIDZ : 1;
430 bool ImplicitBufferPtr : 1;
434 bool ImplicitArgPtr : 1;
436 bool MayNeedAGPRs : 1;
443 unsigned HighBitsOf32BitAddress;
448 mutable Optional<bool> UsesAGPRs;
494 unsigned NumVGPRSpillLanes = 0;
514 return VGPRForAGPRCopy;
518 VGPRForAGPRCopy = NewVGPRForAGPRCopy;
555 return (
I == SGPRToVGPRSpills.
end())
572 return (
I == VGPRToAGPRSpills.
end()) ? (
MCPhysReg)AMDGPU::NoRegister
573 :
I->second.Lanes[Lane];
578 if (
I != VGPRToAGPRSpills.
end())
579 I->second.IsDead =
true;
583 unsigned NumLane)
const;
590 bool ResetSGPRSpillStackIDs);
596 return BytesInStackArgArea;
600 BytesInStackArgArea = Bytes;
616 return ArgInfo.WorkGroupIDX.getRegister();
622 return ArgInfo.WorkGroupIDY.getRegister();
628 return ArgInfo.WorkGroupIDZ.getRegister();
634 return ArgInfo.WorkGroupInfo.getRegister();
651 ArgInfo.PrivateSegmentWaveByteOffset
654 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
662 return PrivateSegmentBuffer;
674 return KernargSegmentPtr;
682 return FlatScratchInit;
698 return WorkGroupInfo;
702 return PrivateSegmentWaveByteOffset;
718 return ImplicitArgPtr;
722 return ImplicitBufferPtr;
733 std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
750 return HighBitsOf32BitAddress;
758 return NumUserSGPRs + NumSystemSGPRs;
762 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
768 return ScratchRSrcReg;
772 assert(
Reg != 0 &&
"Should never be unset");
773 ScratchRSrcReg =
Reg;
777 return FrameOffsetReg;
781 assert(
Reg != 0 &&
"Should never be unset");
782 FrameOffsetReg =
Reg;
786 assert(
Reg != 0 &&
"Should never be unset");
787 StackPtrOffsetReg =
Reg;
795 return StackPtrOffsetReg;
799 return ArgInfo.QueuePtr.getRegister();
803 return ArgInfo.ImplicitBufferPtr.getRegister();
807 return HasSpilledSGPRs;
811 HasSpilledSGPRs = Spill;
815 return HasSpilledVGPRs;
819 HasSpilledVGPRs = Spill;
823 return HasNonSpillStackObjects;
827 HasNonSpillStackObjects = StackObject;
831 return IsStackRealigned;
835 IsStackRealigned = Realigned;
839 return NumSpilledSGPRs;
843 return NumSpilledVGPRs;
847 NumSpilledSGPRs += num;
851 NumSpilledVGPRs += num;
859 return PSInputEnable;
863 return PSInputAddr & (1 <<
Index);
867 PSInputAddr |= 1 <<
Index;
871 PSInputEnable |= 1 <<
Index;
885 return FlatWorkGroupSizes;
890 return FlatWorkGroupSizes.first;
895 return FlatWorkGroupSizes.second;
906 return WavesPerEU.first;
911 return WavesPerEU.second;
919 return ArgInfo.WorkGroupIDX.getRegister();
922 return ArgInfo.WorkGroupIDY.getRegister();
925 return ArgInfo.WorkGroupIDZ.getRegister();
942 return &GWSResourcePSV;
952 return (Occupancy < 4) ? Occupancy : 4;
958 if (Occupancy > Limit)
963 if (Occupancy < Limit)
982 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
unsigned getMinFlatWorkGroupSize() const
void limitOccupancy(unsigned Limit)
static bool classof(const PseudoSourceValue *V)
void setIfReturnsVoid(bool Value)
void setIsStackRealigned(bool Realigned=true)
uint32_t HighBitsOf32BitAddress
ArrayRef< SGPRSpillVGPR > getSGPRSpillVGPRs() const
auto wwmAllocation() const
bool hasSpilledVGPRs() const
Register addPrivateSegmentWaveByteOffset()
SmallVector< int, 8 > WWMReservedFrameIndexes
Track stack slots used for save/restore of reserved WWM VGPRs in the prolog/epilog.
This is an optimization pass for GlobalISel generic memory operations.
Register getVGPRForAGPRCopy() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool mayNeedAGPRs() const
void markPSInputAllocated(unsigned Index)
bool operator==(const SIMode Other) const
unsigned getMinAllowedOccupancy() const
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
Optional< SIArgumentInfo > ArgInfo
static bool classof(const PseudoSourceValue *V)
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Register addWorkGroupIDY()
Optional< SIArgument > WorkGroupIDX
unsigned getNumSpilledSGPRs() const
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
Optional< SIArgument > DispatchPtr
Register addDispatchPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > PrivateSegmentWaveByteOffset
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
AMDGPUFunctionArgInfo & getArgInfo()
AMDGPUImagePseudoSourceValue(const AMDGPUTargetMachine &TM)
StringValue VGPRForAGPRCopy
Reg
All possible values of the reg field in the ModR/M byte.
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
Register addWorkGroupIDZ()
bool hasImplicitArgPtr() const
std::pair< unsigned, unsigned > getFlatWorkGroupSizes() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getGITPtrHigh() const
Register addFlatScratchInit(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkItemIDY
SIArgument(const SIArgument &Other)
const AMDGPUGWSResourcePseudoSourceValue * getGWSPSV(const AMDGPUTargetMachine &TM)
unsigned getPSInputEnable() const
void increaseOccupancy(const MachineFunction &MF, unsigned Limit)
Register SGPRForFPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the frame pointer.
unsigned const TargetRegisterInfo * TRI
unsigned getNumPreloadedSGPRs() const
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj * new
void setWorkItemIDZ(ArgDescriptor Arg)
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode)
Optional< SIArgument > WorkItemIDZ
static bool classof(const PseudoSourceValue *V)
const AMDGPUImagePseudoSourceValue * getImagePSV(const AMDGPUTargetMachine &TM)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void mappingImpl(yaml::IO &YamlIO) override
StringValue FrameOffsetReg
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
bool needsWaveLimiter() const
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const
constexpr char NumSpilledVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
std::tuple< const ArgDescriptor *, const TargetRegisterClass *, LLT > getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const
bool hasWorkGroupIDZ() const
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
bool FP64FP16InputDenormals
Optional< SIArgument > DispatchID
Optional< SIArgument > WorkItemIDX
void setBytesInStackArgArea(unsigned Bytes)
uint64_t ExplicitKernArgSize
Register getStackPtrOffsetReg() const
bool usesAGPRs(const MachineFunction &MF) const
This class implements an extremely fast bulk output stream that can only output to a stream.
static void mapping(IO &YamlIO, SIMode &Mode)
Register SGPRForBPSaveRestoreCopy
If this is set, an SGPR used for save/restore of the register used for the base pointer.
Special value supplied for machine level alias analysis.
bool mayUseAGPRs(const MachineFunction &MF) const
This struct is a compact representation of a valid (non-zero power of two) alignment.
AMDGPUBufferPseudoSourceValue(const AMDGPUTargetMachine &TM)
bool hasNonSpillStackObjects() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRToVGPRSpills(int FrameIndex) const
Register getWorkGroupIDSGPR(unsigned Dim) const
void setWorkItemIDX(ArgDescriptor Arg)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
Register addDispatchID(const SIRegisterInfo &TRI)
bool mayAlias(const MachineFrameInfo *) const override
Return true if the memory pointed to by this PseudoSourceValue can ever alias an LLVM IR Value.
bool hasSpilledSGPRs() const
AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
static SIArgument createArgument(bool IsReg)
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkGroupIDZ
void setWorkItemIDY(ArgDescriptor Arg)
StringValue ScratchRSrcReg
SIArgument & operator=(const SIArgument &Other)
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI)
bool isStackRealigned() const
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
bool hasWorkGroupInfo() const
SGPRSpillVGPR(Register V, Optional< int > F)
unsigned getMaxWavesPerEU() const
void reserveWWMRegister(Register Reg)
Optional< int > getOptionalScavengeFI() const
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)
Allocate memory in an ever growing pool, as if by bump-pointer.
SIMachineFunctionInfo()=default
bool hasDispatchID() const
Optional< int > FramePointerSaveIndex
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
unsigned getNumSpilledVGPRs() const
unsigned BytesInStackArgArea
MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, const DenseMap< MachineBasicBlock *, MachineBasicBlock * > &Src2DstMBB) const override
Make a functionally equivalent copy of this MachineFunctionInfo in MF.
SmallSetVector< Register, 8 > WWMReservedRegs
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Optional< FrameIndex > ScavengeFI
SmallVector< StringValue > WWMReservedRegs
unsigned getMinWavesPerEU() const
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
uint32_t get32BitAddressHighBits() const
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
const AMDGPUFunctionArgInfo & getArgInfo() const
void addToSpilledSGPRs(unsigned num)
bool isMemoryBound() const
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&... args)
zip iterator for two or more iteratable types.
void setHasSpilledVGPRs(bool Spill=true)
Register getGITPtrLoReg(const MachineFunction &MF) const
A wrapper around std::string which contains a source range that's being set during parsing.
unsigned getOccupancy() const
AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
SIMachineFunctionInfo(const MachineFunction &MF)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool hasWorkGroupIDX() const
StringValue StackPtrOffsetReg
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void limitOccupancy(const MachineFunction &MF)
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
bool isAliased(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue may also be pointed to by an LLVM IR Val...
Optional< SIArgument > ImplicitBufferPtr
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Register getImplicitBufferPtrUserSGPR() const
void setStackPtrOffsetReg(Register Reg)
bool hasPrivateSegmentBuffer() const
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
Helper struct shared between Function Specialization and SCCP Solver.
static void mapping(IO &YamlIO, SIArgument &A)
Wrapper class representing virtual and physical registers.
unsigned getNumUserSGPRs() const
bool hasWorkItemIDZ() const
void setVGPRToAGPRSpillDead(int FrameIndex)
bool hasPrivateSegmentWaveByteOffset() const
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
SmallVector< MCPhysReg, 32 > Lanes
unsigned getMaxFlatWorkGroupSize() const
bool hasDispatchPtr() const
Optional< SIArgument > WorkGroupInfo
Register addQueuePtr(const SIRegisterInfo &TRI)
Optional< SIArgument > ImplicitArgPtr
Optional< SIArgument > KernargSegmentPtr
Optional< SIArgument > WorkGroupIDY
Optional< int > BasePointerSaveIndex
Optional< SIArgument > PrivateSegmentSize
Lightweight error class with error context and mandatory checking.
bool hasWorkItemIDY() const
void setPrivateSegmentWaveByteOffset(Register Reg)
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
std::pair< unsigned, unsigned > getWavesPerEU() const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
void setScratchRSrcReg(Register Reg)
void setFrameOffsetReg(Register Reg)
Optional< SIArgument > PrivateSegmentBuffer
unsigned getBytesInStackArgArea() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
void setHasNonSpillStackObjects(bool StackObject=true)
void markPSInputEnabled(unsigned Index)
static void mapping(IO &YamlIO, SIArgumentInfo &AI)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Represents a range in source code.
const AMDGPUBufferPseudoSourceValue * getBufferPSV(const AMDGPUTargetMachine &TM)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool hasWorkGroupIDY() const
Register addWorkGroupIDX()
bool hasKernargSegmentPtr() const
unsigned getPSInputAddr() const
bool isPSInputAllocated(unsigned Index) const
Register addWorkGroupInfo()
void printCustom(raw_ostream &OS) const override
Implement printing for PseudoSourceValue.
A SetVector that performs no allocations if smaller than a certain size.
const char LLVMTargetMachineRef TM
void setHasSpilledSGPRs(bool Spill=true)
void addToSpilledVGPRs(unsigned num)
constexpr char NumSpilledSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
~SIMachineFunctionInfo()=default
bool isConstant(const MachineFrameInfo *) const override
Test whether the memory pointed to by this PseudoSourceValue has a constant value.
MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...
bool hasWorkItemIDX() const
bool FP64FP16OutputDenormals
Optional< unsigned > Mask
LLVM Value Representation.
bool hasImplicitBufferPtr() const
bool hasFlatScratchInit() const
Optional< SIArgument > FlatScratchInit
Register getQueuePtrUserSGPR() const
Register getFrameOffsetReg() const
void allocateWWMReservedSpillSlots(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Optional< std::vector< StOtherPiece > > Other
Wrapper class representing physical registers. Should be passed by value.
Optional< SIArgument > QueuePtr