Go to the documentation of this file.
18 PrivateSegmentBuffer(
false),
21 KernargSegmentPtr(
false),
23 FlatScratchInit(
false),
28 PrivateSegmentWaveByteOffset(
false),
32 ImplicitBufferPtr(
false),
33 ImplicitArgPtr(
false),
34 GITPtrHigh(0xffffffff),
35 HighBitsOf32BitAddress(0),
39 FlatWorkGroupSizes =
ST.getFlatWorkGroupSizes(
F);
40 WavesPerEU =
ST.getWavesPerEU(
F);
47 const bool HasCalls =
F.hasFnAttribute(
"amdgpu-calls");
57 KernargSegmentPtr =
true;
69 FrameOffsetReg = AMDGPU::SGPR33;
70 StackPtrOffsetReg = AMDGPU::SGPR32;
72 if (!
ST.enableFlatScratch()) {
75 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
81 if (
F.hasFnAttribute(
"amdgpu-implicitarg-ptr"))
82 ImplicitArgPtr =
true;
84 if (
F.hasFnAttribute(
"amdgpu-implicitarg-ptr")) {
85 KernargSegmentPtr =
true;
98 ImplicitArgPtr =
true;
100 if (
F.hasFnAttribute(
"amdgpu-work-group-id-x"))
103 if (
F.hasFnAttribute(
"amdgpu-work-group-id-y"))
106 if (
F.hasFnAttribute(
"amdgpu-work-group-id-z"))
109 if (
F.hasFnAttribute(
"amdgpu-work-item-id-x"))
112 if (
F.hasFnAttribute(
"amdgpu-work-item-id-y"))
115 if (
F.hasFnAttribute(
"amdgpu-work-item-id-z"))
119 bool HasStackObjects =
F.hasFnAttribute(
"amdgpu-stack-objects");
126 PrivateSegmentWaveByteOffset =
true;
135 bool isAmdHsaOrMesa =
ST.isAmdHsaOrMesa(
F);
136 if (isAmdHsaOrMesa) {
137 if (!
ST.enableFlatScratch())
138 PrivateSegmentBuffer =
true;
147 if (
F.hasFnAttribute(
"amdgpu-dispatch-ptr"))
150 if (
F.hasFnAttribute(
"amdgpu-queue-ptr"))
153 if (
F.hasFnAttribute(
"amdgpu-dispatch-id"))
156 }
else if (
ST.isMesaGfxShader(
F)) {
157 ImplicitBufferPtr =
true;
160 if (UseFixedABI ||
F.hasFnAttribute(
"amdgpu-kernarg-segment-ptr"))
161 KernargSegmentPtr =
true;
164 (isAmdHsaOrMesa ||
ST.enableFlatScratch())) {
168 if (
HasCalls || HasStackObjects ||
ST.enableFlatScratch())
169 FlatScratchInit =
true;
172 Attribute A =
F.getFnAttribute(
"amdgpu-git-ptr-high");
175 S.consumeInteger(0, GITPtrHigh);
177 A =
F.getFnAttribute(
"amdgpu-32bit-address-high-bits");
178 S = A.getValueAsString();
180 S.consumeInteger(0, HighBitsOf32BitAddress);
182 S =
F.getFnAttribute(
"amdgpu-gds-size").getValueAsString();
184 S.consumeInteger(0, GDSSize);
198 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
205 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
220 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
249 if (CSRegs[
I] ==
Reg)
261 unsigned NumNeed)
const {
263 unsigned WaveSize =
ST.getWavefrontSize();
264 return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
270 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
273 if (!SpillLanes.empty())
280 unsigned WaveSize =
ST.getWavefrontSize();
284 unsigned NumLanes =
Size / 4;
286 if (NumLanes > WaveSize)
289 assert(
Size >= 4 &&
"invalid sgpr spill size");
290 assert(
TRI->spillSGPRToVGPR() &&
"not spilling SGPRs to VGPRs");
294 for (
unsigned I = 0;
I < NumLanes; ++
I, ++NumVGPRSpillLanes) {
296 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
307 }
else if (VGPRIndex == 0) {
308 LaneVGPR =
TRI->findUnusedRegister(
MRI, &AMDGPU::VGPR_32RegClass, MF);
309 if (LaneVGPR == AMDGPU::NoRegister) {
312 SGPRToVGPRSpills.erase(FI);
313 NumVGPRSpillLanes -=
I;
329 BB.addLiveIn(LaneVGPR);
331 LaneVGPR = SpillVGPRs.back().VGPR;
334 SpillLanes.push_back(
SpilledReg(LaneVGPR, VGPRIndex));
347 MF.
getRegInfo(), &AMDGPU::VGPR_32RegClass, MF,
true);
367 auto &Spill = VGPRToAGPRSpills[FI];
370 if (!Spill.Lanes.empty())
371 return Spill.FullyAllocated;
374 unsigned NumLanes =
Size / 4;
375 Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
378 isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
381 auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
383 Spill.FullyAllocated =
true;
403 for (
unsigned I = 0;
I < NumLanes; ++
I) {
406 return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
410 if (NextSpillReg == Regs.end()) {
411 Spill.FullyAllocated =
false;
415 OtherUsedRegs.
set(*NextSpillReg);
417 Spill.Lanes[
I] = *NextSpillReg++;
420 return Spill.FullyAllocated;
425 for (
auto &R : SGPRToVGPRSpills) {
437 for (
auto &R : VGPRToAGPRSpills) {
438 if (R.second.FullyAllocated)
458 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR()
const {
459 assert(NumSystemSGPRs == 0 &&
"System SGPRs must be added after user SGPRs");
460 return AMDGPU::SGPR0 + NumUserSGPRs;
463 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR()
const {
464 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
470 if (!
ST.isAmdPalOS())
473 if (
ST.hasMergedShaders()) {
479 GitPtrLo = AMDGPU::SGPR8;
510 if (
Arg.isRegister()) {
551 : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
552 MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
553 DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
554 NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
555 MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
556 HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
557 HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
558 HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
559 Occupancy(MFI.getOccupancy()),
590 for (
auto *
i = SpillVGPRs.begin();
i < SpillVGPRs.end();
i++) {
591 if (
i->VGPR == ReservedVGPR) {
598 this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
static yaml::StringValue regToString(Register Reg, const TargetRegisterInfo &TRI)
uint32_t HighBitsOf32BitAddress
static bool EnableFixedFunctionABI
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF)
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
static Optional< yaml::SIArgumentInfo > convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, const TargetRegisterInfo &TRI)
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Optional< SIArgument > WorkGroupIDX
Optional< SIArgument > DispatchPtr
A raw_ostream that writes to an std::string.
Register addDispatchPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > PrivateSegmentWaveByteOffset
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo
ArgDescriptor FlatScratchInit
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI)
Reserve a slice of a VGPR to support spilling for FrameIndex FI.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Register addFlatScratchInit(const SIRegisterInfo &TRI)
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
unsigned getLDSSize() const
Optional< SIArgument > WorkItemIDY
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
ArgDescriptor PrivateSegmentSize
ArgDescriptor DispatchPtr
int getObjectIndexEnd() const
Return one past the maximum frame object index.
unsigned const TargetRegisterInfo * TRI
@ AMDGPU_Gfx
Calling convention used for AMD graphics targets.
Register VGPRReservedForSGPRSpill
ArgDescriptor ImplicitArgPtr
Optional< SIArgument > WorkItemIDZ
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void mappingImpl(yaml::IO &YamlIO) override
ArgDescriptor WorkGroupIDX
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
ArgDescriptor WorkItemIDX
Optional< SIArgument > DispatchID
Optional< SIArgument > WorkItemIDX
uint64_t ExplicitKernArgSize
unsigned getInitialPSInputAddr(const Function &F)
bool reserveVGPRforSGPRSpills(MachineFunction &MF)
Reserve a VGPR for spilling of SGPRs.
int getObjectIndexBegin() const
Return the minimum frame object index.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Register addDispatchID(const SIRegisterInfo &TRI)
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const
returns true if NumLanes slots are available in VGPRs already used for SGPR spilling.
Align getSpillAlign(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class.
static SIArgument createArgument(bool IsReg)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Register addKernargSegmentPtr(const SIRegisterInfo &TRI)
Optional< SIArgument > WorkGroupIDZ
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI)
ArgDescriptor WorkGroupIDZ
ArgDescriptor PrivateSegmentBuffer
unsigned getMaxWavesPerEU() const
SIMachineFunctionInfo()=default
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Optional< int > FramePointerSaveIndex
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
bool isEntryFunction() const
ArgDescriptor PrivateSegmentWaveByteOffset
ArgDescriptor WorkGroupIDY
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Register getGITPtrLoReg(const MachineFunction &MF) const
A wrapper around std::string which contains a source range that's being set during parsing.
ArgDescriptor WorkItemIDZ
SIMachineFunctionInfo(const MachineFunction &MF)
void setStackID(int ObjectIdx, uint8_t ID)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
StringRef - Represent a constant reference to a string, i.e.
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
void limitOccupancy(const MachineFunction &MF)
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Optional< SIArgument > ImplicitBufferPtr
ArgDescriptor ImplicitBufferPtr
Register addImplicitBufferPtr(const SIRegisterInfo &TRI)
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
ArgDescriptor WorkGroupInfo
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Function & getFunction()
Return the LLVM function that this machine code represents.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Optional< SIArgument > WorkGroupInfo
Register addQueuePtr(const SIRegisterInfo &TRI)
Optional< SIArgument > ImplicitArgPtr
Optional< SIArgument > KernargSegmentPtr
Optional< SIArgument > WorkGroupIDY
Optional< int > BasePointerSaveIndex
Optional< SIArgument > PrivateSegmentSize
void setBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsInMask - Add '1' bits from Mask to this vector.
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Optional< SIArgument > PrivateSegmentBuffer
ArgDescriptor KernargSegmentPtr
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void removeDeadFrameIndices(MachineFrameInfo &MFI)
ArgDescriptor WorkItemIDY
MCRegister getRegister() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Align max(MaybeAlign Lhs, Align Rhs)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Optional< unsigned > Mask
Optional< SIArgument > FlatScratchInit
virtual const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const
Return a mask of call-preserved registers for the given calling convention on the current function.
iterator_range< SmallVectorImpl< MCPhysReg >::const_iterator > getRegisters() const
Optional< SIArgument > QueuePtr