32#define DEBUG_TYPE "amdgpu-resource-usage"
42 "amdgpu-assume-external-call-stack-size",
47 "amdgpu-assume-dynamic-stack-object-size",
48 cl::desc(
"Assumed extra stack use if there are any "
49 "variable sized objects (in bytes)"),
53 "Function register usage analysis",
true,
true)
57 assert(Op.getImm() == 0);
66 if (!UseOp.isImplicit() || !
TII.isFLAT(*UseOp.getParent()))
84 uint32_t AssumedStackSizeForDynamicSizeObjects =
91 AssumedStackSizeForDynamicSizeObjects = 0;
93 AssumedStackSizeForExternalCall = 0;
97 MF, AssumedStackSizeForDynamicSizeObjects,
98 AssumedStackSizeForExternalCall);
111 uint32_t AssumedStackSizeForDynamicSizeObjects =
118 AssumedStackSizeForDynamicSizeObjects = 0;
120 AssumedStackSizeForExternalCall = 0;
124 MF, AssumedStackSizeForDynamicSizeObjects,
125 AssumedStackSizeForExternalCall);
131 uint32_t AssumedStackSizeForExternalCall)
const {
141 Info.UsesFlatScratch = MRI.
isPhysRegUsed(AMDGPU::FLAT_SCR_LO) ||
158 Info.UsesFlatScratch =
false;
161 Info.PrivateSegmentSize = FrameInfo.getStackSize();
164 Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects();
165 if (Info.HasDynamicallySizedStack)
166 Info.PrivateSegmentSize += AssumedStackSizeForDynamicSizeObjects;
169 Info.PrivateSegmentSize += FrameInfo.getMaxAlign().value();
173 Info.NumExplicitSGPR =
TRI.getNumUsedPhysRegs(MRI, AMDGPU::SGPR_32RegClass,
175 if (ST.hasMAIInsts())
176 Info.NumAGPR =
TRI.getNumUsedPhysRegs(MRI, AMDGPU::AGPR_32RegClass,
182 if (!FrameInfo.hasCalls() && !FrameInfo.hasTailCall()) {
183 Info.NumVGPR =
TRI.getNumUsedPhysRegs(MRI, AMDGPU::VGPR_32RegClass,
188 int32_t MaxVGPR = -1;
189 Info.CalleeSegmentSize = 0;
193 for (
unsigned I = 0;
I <
MI.getNumOperands(); ++
I) {
201 case AMDGPU::NoRegister:
203 "Instruction uses invalid noreg register");
206 case AMDGPU::XNACK_MASK:
207 case AMDGPU::XNACK_MASK_LO:
208 case AMDGPU::XNACK_MASK_HI:
211 case AMDGPU::LDS_DIRECT:
222 case AMDGPU::SRC_VCCZ:
225 case AMDGPU::SRC_EXECZ:
228 case AMDGPU::SRC_SCC:
236 assert((!RC ||
TRI.isVGPRClass(RC) ||
TRI.isSGPRClass(RC) ||
237 TRI.isAGPRClass(RC) || AMDGPU::TTMP_32RegClass.contains(Reg) ||
238 AMDGPU::TTMP_64RegClass.contains(Reg) ||
239 AMDGPU::TTMP_128RegClass.contains(Reg) ||
240 AMDGPU::TTMP_256RegClass.contains(Reg) ||
241 AMDGPU::TTMP_512RegClass.contains(Reg)) &&
242 "Unknown register class");
244 if (!RC || !
TRI.isVGPRClass(RC))
247 if (
MI.isCall() ||
MI.isMetaInstruction())
251 unsigned HWReg =
TRI.getHWRegIndex(Reg);
252 int MaxUsed = HWReg + Width - 1;
253 MaxVGPR = std::max(MaxUsed, MaxVGPR);
263 TII->getNamedOperand(
MI, AMDGPU::OpName::callee);
266 CalleeOp ? getCalleeFunction(*CalleeOp) :
nullptr;
272 if (Callee && !isSameFunction(MF, Callee))
273 Info.Callees.push_back(Callee);
275 bool IsIndirect = !Callee || Callee->isDeclaration();
276 Info.HasIndirectCall |= IsIndirect;
287 if (!Callee || !Callee->doesNotRecurse()) {
288 Info.HasRecursion =
true;
292 if (!
MI.isReturn()) {
299 Info.CalleeSegmentSize = std::max(
300 Info.CalleeSegmentSize,
301 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
306 Info.CalleeSegmentSize =
307 std::max(Info.CalleeSegmentSize,
308 static_cast<uint64_t>(AssumedStackSizeForExternalCall));
312 Info.UsesFlatScratch = ST.hasFlatAddressSpace();
313 Info.HasDynamicallySizedStack =
true;
319 Info.NumVGPR = MaxVGPR + 1;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< uint32_t > clAssumedStackSizeForDynamicSizeObjects("amdgpu-assume-dynamic-stack-object-size", cl::desc("Assumed extra stack use if there are any " "variable sized objects (in bytes)"), cl::Hidden, cl::init(4096))
static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, const SIInstrInfo &TII, unsigned Reg)
static cl::opt< uint32_t > clAssumedStackSizeForExternalCall("amdgpu-assume-external-call-stack-size", cl::desc("Assumed stack use of any external call (in bytes)"), cl::Hidden, cl::init(16384))
Analyzes how many registers and other resources are used by functions.
The AMDGPU TargetMachine interface definition for hw codegen targets.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
uint32_t getNumNamedBarriers() const
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo Result
static bool EnableObjectLinking
bool hasFlatScratchInit() const
Module * getParent()
Get the module that this global value is contained inside of...
Generic base class for all target subtargets.
const Triple & getTargetTriple() const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
iterator_range< reg_iterator > reg_operands(Register Reg) const
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
Wrapper class representing virtual and physical registers.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
GCNUserSGPRUsageInfo & getUserSGPRInfo()
bool isStackRealigned() const
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Primary interface to the complete machine description for the target machine.
const MCSubtargetInfo * getMCSubtargetInfo() const
OSType getOS() const
Get the parsed operating system type of this triple.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAMDHSACodeObjectVersion(const Module &M)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
char & AMDGPUResourceUsageAnalysisID
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF, uint32_t AssumedStackSizeForDynamicSizeObjects, uint32_t AssumedStackSizeForExternalCall) const
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
FunctionResourceInfo ResourceInfo
A special type used by analysis passes to provide an address that identifies that particular analysis...