LLVM 23.0.0git
AMDGPUMCResourceInfo.cpp
Go to the documentation of this file.
1//===- AMDGPUMCResourceInfo.cpp --- MC Resource Info ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief MC infrastructure to propagate the function level resource usage
11/// info.
12///
13//===----------------------------------------------------------------------===//
14
16#include "AMDGPUTargetMachine.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/MC/MCAsmInfo.h"
20#include "llvm/MC/MCContext.h"
21#include "llvm/MC/MCSymbol.h"
23
24#define DEBUG_TYPE "amdgpu-mc-resource-usage"
25
26using namespace llvm;
27
29 MCContext &OutContext) {
30 auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
31 StringRef Prefix = OutContext.getAsmInfo()->getInternalSymbolPrefix();
32 return OutContext.getOrCreateSymbol(Twine(Prefix) + FuncName +
33 Twine(Suffix));
34 };
35 switch (RIK) {
36 case RIK_NumVGPR:
37 return GOCS(".num_vgpr");
38 case RIK_NumAGPR:
39 return GOCS(".num_agpr");
40 case RIK_NumSGPR:
41 return GOCS(".numbered_sgpr");
43 return GOCS(".num_named_barrier");
45 return GOCS(".private_seg_size");
46 case RIK_UsesVCC:
47 return GOCS(".uses_vcc");
49 return GOCS(".uses_flat_scratch");
51 return GOCS(".has_dyn_sized_stack");
53 return GOCS(".has_recursion");
55 return GOCS(".has_indirect_call");
56 }
57 llvm_unreachable("Unexpected ResourceInfoKind.");
58}
59
62 MCContext &Ctx) {
63 return MCSymbolRefExpr::create(getSymbol(FuncName, RIK, Ctx), Ctx);
64}
65
66void MCResourceInfo::assignMaxRegs(MCContext &OutContext) {
67 // Assign expression to get the max register use to the max_num_Xgpr symbol.
68 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
69 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
70 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
71 MCSymbol *MaxNamedBarrierSym = getMaxNamedBarrierSymbol(OutContext);
72
73 auto assignMaxRegSym = [&OutContext](MCSymbol *Sym, int32_t RegCount) {
74 const MCExpr *MaxExpr = MCConstantExpr::create(RegCount, OutContext);
75 Sym->setVariableValue(MaxExpr);
76 };
77
78 assignMaxRegSym(MaxVGPRSym, MaxVGPR);
79 assignMaxRegSym(MaxAGPRSym, MaxAGPR);
80 assignMaxRegSym(MaxSGPRSym, MaxSGPR);
81 assignMaxRegSym(MaxNamedBarrierSym, MaxNamedBarrier);
82}
83
85
87 assert(!Finalized && "Cannot finalize ResourceInfo again.");
88 Finalized = true;
89 assignMaxRegs(OutContext);
90}
91
93 return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
94}
95
97 return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
98}
99
101 return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
102}
103
105 return OutContext.getOrCreateSymbol("amdgpu.max_num_named_barrier");
106}
107
108// Tries to flatten recursive call register resource gathering. Simple cycle
109// avoiding dfs to find the constants in the propagated symbols.
110// Assumes:
111// - RecSym has been confirmed to recurse (this means the callee symbols should
112// all be populated, started at RecSym).
113// - Shape of the resource symbol's MCExpr (`max` args are order agnostic):
114// RecSym.MCExpr := max(<constant>+, <callee_symbol>*)
115const MCExpr *MCResourceInfo::flattenedCycleMax(MCSymbol *RecSym,
116 ResourceInfoKind RIK,
117 MCContext &OutContext) {
120 int64_t Maximum = 0;
121
122 const MCExpr *RecExpr = RecSym->getVariableValue();
123 WorkList.push_back(RecExpr);
124
125 while (!WorkList.empty()) {
126 const MCExpr *CurExpr = WorkList.pop_back_val();
127 switch (CurExpr->getKind()) {
128 default: {
129 // Assuming the recursion is of shape `max(<constant>, <callee_symbol>)`
130 // where <callee_symbol> will eventually recurse. If this condition holds,
131 // the recursion occurs within some other (possibly unresolvable) MCExpr,
132 // thus using the worst case value then.
133 if (!AMDGPUMCExpr::isSymbolUsedInExpression(RecSym, CurExpr)) {
134 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
135 << ": Recursion in unexpected sub-expression, using "
136 "module maximum\n");
137 switch (RIK) {
138 default:
139 break;
140 case RIK_NumVGPR:
141 return MCSymbolRefExpr::create(getMaxVGPRSymbol(OutContext),
142 OutContext);
143 break;
144 case RIK_NumSGPR:
145 return MCSymbolRefExpr::create(getMaxSGPRSymbol(OutContext),
146 OutContext);
147 break;
148 case RIK_NumAGPR:
149 return MCSymbolRefExpr::create(getMaxAGPRSymbol(OutContext),
150 OutContext);
151 break;
152 }
153 }
154 break;
155 }
157 int64_t Val = cast<MCConstantExpr>(CurExpr)->getValue();
158 Maximum = std::max(Maximum, Val);
159 break;
160 }
162 const MCSymbolRefExpr *SymExpr = cast<MCSymbolRefExpr>(CurExpr);
163 const MCSymbol &SymRef = SymExpr->getSymbol();
164 if (SymRef.isVariable()) {
165 const MCExpr *SymVal = SymRef.getVariableValue();
166 if (Seen.insert(SymVal).second)
167 WorkList.push_back(SymVal);
168 }
169 break;
170 }
172 const AMDGPUMCExpr *TargetExpr = cast<AMDGPUMCExpr>(CurExpr);
173 if (TargetExpr->getKind() == AMDGPUMCExpr::VariantKind::AGVK_Max) {
174 for (auto &Arg : TargetExpr->getArgs())
175 WorkList.push_back(Arg);
176 }
177 break;
178 }
179 }
180 }
181
182 LLVM_DEBUG(dbgs() << "MCResUse: " << RecSym->getName()
183 << ": Using flattened max: << " << Maximum << '\n');
184
185 return MCConstantExpr::create(Maximum, OutContext);
186}
187
188void MCResourceInfo::assignResourceInfoExpr(
189 int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
190 const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
191 MCContext &OutContext) {
192 const TargetMachine &TM = MF.getTarget();
193 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
194 const MCConstantExpr *LocalConstExpr =
195 MCConstantExpr::create(LocalValue, OutContext);
196 const MCExpr *SymVal = LocalConstExpr;
197 MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
198 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
199 << LocalValue << " as function local usage\n");
200 if (!Callees.empty()) {
202 SmallPtrSet<const Function *, 8> Seen;
203 ArgExprs.push_back(LocalConstExpr);
204
205 for (const Function *Callee : Callees) {
206 if (!Seen.insert(Callee).second)
207 continue;
208
209 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
210 MCSymbol *CalleeValSym =
211 getSymbol(CalleeFnSym->getName(), RIK, OutContext);
212
213 // Avoid constructing recursive definitions by detecting whether `Sym` is
214 // found transitively within any of its `CalleeValSym`.
215 if (!CalleeValSym->isVariable() ||
217 Sym, CalleeValSym->getVariableValue())) {
218 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
219 << CalleeValSym->getName() << " as callee\n");
220 ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
221 } else {
222 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName()
223 << ": Recursion found, attempt flattening of cycle "
224 "for resource usage\n");
225 // In case of recursion for vgpr/sgpr/agpr resource usage: try to
226 // flatten and use the max of the call cycle. May still end up emitting
227 // module max if not fully resolvable.
228 switch (RIK) {
229 default:
230 break;
231 case RIK_NumVGPR:
232 case RIK_NumSGPR:
233 case RIK_NumAGPR:
234 ArgExprs.push_back(flattenedCycleMax(CalleeValSym, RIK, OutContext));
235 break;
238 getMaxNamedBarrierSymbol(OutContext), OutContext));
239 break;
240 }
241 }
242 }
243 if (ArgExprs.size() > 1)
244 SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
245 }
246 Sym->setVariableValue(SymVal);
247}
248
250 const MachineFunction &MF,
252 MCContext &OutContext) {
253 // Worst case VGPR use for non-hardware-entrypoints.
254 MCSymbol *MaxVGPRSym = getMaxVGPRSymbol(OutContext);
255 MCSymbol *MaxAGPRSym = getMaxAGPRSymbol(OutContext);
256 MCSymbol *MaxSGPRSym = getMaxSGPRSymbol(OutContext);
257 MCSymbol *MaxNamedBarrierSym = getMaxNamedBarrierSymbol(OutContext);
258
264 }
265
266 const TargetMachine &TM = MF.getTarget();
267 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
268
269 LLVM_DEBUG(dbgs() << "MCResUse: Gathering resource information for "
270 << FnSym->getName() << '\n');
271
272 auto SetToLocal = [&](int64_t Value, ResourceInfoKind RIK) {
273 MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext);
275 };
276
277 // When link-time object linking is enabled, set all resource symbols to
278 // concrete local values.
280 LLVM_DEBUG(dbgs() << "MCResUse: object linking enabled, no call-graph "
281 "propagation; emitting local resource values only\n");
282 SetToLocal(FRI.NumVGPR, RIK_NumVGPR);
283 SetToLocal(FRI.NumAGPR, RIK_NumAGPR);
284 SetToLocal(FRI.NumExplicitSGPR, RIK_NumSGPR);
285 SetToLocal(FRI.NumNamedBarrier, RIK_NumNamedBarrier);
286 SetToLocal(FRI.PrivateSegmentSize, RIK_PrivateSegSize);
287 SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
289 SetToLocal(FRI.HasDynamicallySizedStack,
293 return;
294 }
295
296 LLVM_DEBUG({
297 if (!FRI.Callees.empty()) {
298 dbgs() << "MCResUse: Callees:\n";
299 for (const Function *Callee : FRI.Callees) {
300 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
301 dbgs() << "MCResUse: " << CalleeFnSym->getName() << '\n';
302 }
303 }
304 });
305
306 auto SetMaxReg = [&](MCSymbol *MaxSym, int32_t numRegs,
307 ResourceInfoKind RIK) {
308 if (!FRI.HasIndirectCall) {
309 assignResourceInfoExpr(numRegs, RIK, AMDGPUMCExpr::AGVK_Max, MF,
310 FRI.Callees, OutContext);
311 } else {
312 const MCExpr *SymRef = MCSymbolRefExpr::create(MaxSym, OutContext);
313 MCSymbol *LocalNumSym = getSymbol(FnSym->getName(), RIK, OutContext);
314 const MCExpr *MaxWithLocal = AMDGPUMCExpr::createMax(
315 {MCConstantExpr::create(numRegs, OutContext), SymRef}, OutContext);
316 LocalNumSym->setVariableValue(MaxWithLocal);
317 LLVM_DEBUG(dbgs() << "MCResUse: " << LocalNumSym->getName()
318 << ": Indirect callee within, using module maximum\n");
319 }
320 };
321
322 LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n');
323 SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
324 SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
325 SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR);
326 SetMaxReg(MaxNamedBarrierSym, FRI.NumNamedBarrier, RIK_NumNamedBarrier);
327
328 {
329 // The expression for private segment size should be: FRI.PrivateSegmentSize
330 // + max(FRI.Callees, FRI.CalleeSegmentSize)
332 MCSymbol *Sym = getSymbol(FnSym->getName(), RIK_PrivateSegSize, OutContext);
333 if (FRI.CalleeSegmentSize) {
334 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
335 << FRI.CalleeSegmentSize
336 << " for indirect/recursive callees within\n");
337 ArgExprs.push_back(
339 }
340
342 Seen.insert(&MF.getFunction());
343 for (const Function *Callee : FRI.Callees) {
344 if (!Seen.insert(Callee).second)
345 continue;
346 if (!Callee->isDeclaration()) {
347 MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
348 MCSymbol *CalleeValSym =
349 getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
350
351 // Avoid constructing recursive definitions by detecting whether `Sym`
352 // is found transitively within any of its `CalleeValSym`.
353 if (!CalleeValSym->isVariable() ||
355 Sym, CalleeValSym->getVariableValue())) {
356 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
357 << CalleeValSym->getName() << " as callee\n");
358 ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
359 }
360 }
361 }
362 const MCExpr *localConstExpr =
364 LLVM_DEBUG(dbgs() << "MCResUse: " << Sym->getName() << ": Adding "
365 << FRI.PrivateSegmentSize
366 << " as function local usage\n");
367 if (!ArgExprs.empty()) {
368 const AMDGPUMCExpr *transitiveExpr =
369 AMDGPUMCExpr::createMax(ArgExprs, OutContext);
370 localConstExpr =
371 MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
372 }
373 Sym->setVariableValue(localConstExpr);
374 }
375
376 if (!FRI.HasIndirectCall) {
377 assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC,
378 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
379 assignResourceInfoExpr(FRI.UsesFlatScratch,
381 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
382 assignResourceInfoExpr(FRI.HasDynamicallySizedStack,
384 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
385 assignResourceInfoExpr(FRI.HasRecursion, ResourceInfoKind::RIK_HasRecursion,
386 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
387 assignResourceInfoExpr(FRI.HasIndirectCall,
389 AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
390 } else {
391 SetToLocal(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC);
393 SetToLocal(FRI.HasDynamicallySizedStack,
397 }
398}
399
401 MCContext &Ctx) {
402 const TargetMachine &TM = MF.getTarget();
403 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
405 getSymRefExpr(FnSym->getName(), RIK_NumAGPR, Ctx),
406 getSymRefExpr(FnSym->getName(), RIK_NumVGPR, Ctx), Ctx);
407}
408
410 bool hasXnack,
411 MCContext &Ctx) {
412 const TargetMachine &TM = MF.getTarget();
413 MCSymbol *FnSym = TM.getSymbol(&MF.getFunction());
415 getSymRefExpr(FnSym->getName(), RIK_NumSGPR, Ctx),
417 getSymRefExpr(FnSym->getName(), RIK_UsesVCC, Ctx),
418 getSymRefExpr(FnSym->getName(), RIK_UsesFlatScratch, Ctx), hasXnack,
419 Ctx),
420 Ctx);
421}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MC infrastructure to propagate the function level resource usage info.
The AMDGPU TargetMachine interface definition for hw codegen targets.
#define LLVM_DEBUG(...)
Definition Debug.h:114
AMDGPU target specific MCExpr operations.
ArrayRef< const MCExpr * > getArgs() const
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
VariantKind getKind() const
static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *E)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
StringRef getInternalSymbolPrefix() const
Definition MCAsmInfo.h:552
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCAsmInfo * getAsmInfo() const
Definition MCContext.h:409
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
@ Constant
Constant expressions.
Definition MCExpr.h:42
@ SymbolRef
References to labels and assigned expressions.
Definition MCExpr.h:43
@ Target
Target specific expression.
Definition MCExpr.h:46
ExprKind getKind() const
Definition MCExpr.h:85
MCResourceInfo()=default
MCSymbol * getMaxNamedBarrierSymbol(MCContext &OutContext)
void addMaxSGPRCandidate(int32_t candidate)
const MCExpr * getSymRefExpr(StringRef FuncName, ResourceInfoKind RIK, MCContext &Ctx)
MCSymbol * getMaxSGPRSymbol(MCContext &OutContext)
void addMaxNamedBarrierCandidate(int32_t candidate)
MCSymbol * getMaxAGPRSymbol(MCContext &OutContext)
const MCExpr * createTotalNumVGPRs(const MachineFunction &MF, MCContext &Ctx)
void finalize(MCContext &OutContext)
void addMaxAGPRCandidate(int32_t candidate)
MCSymbol * getMaxVGPRSymbol(MCContext &OutContext)
const MCExpr * createTotalNumSGPRs(const MachineFunction &MF, bool hasXnack, MCContext &Ctx)
MCSymbol * getSymbol(StringRef FuncName, ResourceInfoKind RIK, MCContext &OutContext)
void addMaxVGPRCandidate(int32_t candidate)
void gatherResourceInfo(const MachineFunction &MF, const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &FRI, MCContext &OutContext)
AMDGPUResourceUsageAnalysis gathers resource usage on a per-function granularity.
const MCSymbol & getSymbol() const
Definition MCExpr.h:227
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Primary interface to the complete machine description for the target machine.
MCSymbol * getSymbol(const GlobalValue *GV) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo