LLVM 19.0.0git
AMDGPUMCExpr.cpp
Go to the documentation of this file.
1//===- AMDGPUMCExpr.cpp - AMDGPU specific MC expression classes -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMCExpr.h"
10#include "GCNSubtarget.h"
12#include "llvm/IR/Function.h"
13#include "llvm/MC/MCContext.h"
14#include "llvm/MC/MCStreamer.h"
15#include "llvm/MC/MCSymbol.h"
16#include "llvm/MC/MCValue.h"
19#include <optional>
20
21using namespace llvm;
22using namespace llvm::AMDGPU;
23
24AMDGPUVariadicMCExpr::AMDGPUVariadicMCExpr(VariadicKind Kind,
26 MCContext &Ctx)
27 : Kind(Kind), Ctx(Ctx) {
28 assert(Args.size() >= 1 && "Needs a minimum of one expression.");
29 assert(Kind != AGVK_None &&
30 "Cannot construct AMDGPUVariadicMCExpr of kind none.");
31
32 // Allocating the variadic arguments through the same allocation mechanism
33 // that the object itself is allocated with so they end up in the same memory.
34 //
35 // Will result in an asan failure if allocated on the heap through standard
36 // allocation (e.g., through SmallVector's grow).
37 RawArgs = static_cast<const MCExpr **>(
38 Ctx.allocate(sizeof(const MCExpr *) * Args.size()));
39 std::uninitialized_copy(Args.begin(), Args.end(), RawArgs);
40 this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size());
41}
42
43AMDGPUVariadicMCExpr::~AMDGPUVariadicMCExpr() { Ctx.deallocate(RawArgs); }
44
47 MCContext &Ctx) {
48 return new (Ctx) AMDGPUVariadicMCExpr(Kind, Args, Ctx);
49}
50
52 assert(Index < Args.size() &&
53 "Indexing out of bounds AMDGPUVariadicMCExpr sub-expr");
54 return Args[Index];
55}
56
58 const MCAsmInfo *MAI) const {
59 switch (Kind) {
60 default:
61 llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
62 case AGVK_Or:
63 OS << "or(";
64 break;
65 case AGVK_Max:
66 OS << "max(";
67 break;
68 case AGVK_ExtraSGPRs:
69 OS << "extrasgprs(";
70 break;
72 OS << "totalnumvgprs(";
73 break;
74 case AGVK_AlignTo:
75 OS << "alignto(";
76 break;
77 case AGVK_Occupancy:
78 OS << "occupancy(";
79 break;
80 }
81 for (auto It = Args.begin(); It != Args.end(); ++It) {
82 (*It)->print(OS, MAI, /*InParens=*/false);
83 if ((It + 1) != Args.end())
84 OS << ", ";
85 }
86 OS << ')';
87}
88
89static int64_t op(AMDGPUVariadicMCExpr::VariadicKind Kind, int64_t Arg1,
90 int64_t Arg2) {
91 switch (Kind) {
92 default:
93 llvm_unreachable("Unknown AMDGPUVariadicMCExpr kind.");
95 return std::max(Arg1, Arg2);
97 return Arg1 | Arg2;
98 }
99}
100
101bool AMDGPUVariadicMCExpr::evaluateExtraSGPRs(MCValue &Res,
102 const MCAsmLayout *Layout,
103 const MCFixup *Fixup) const {
104 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
105 MCValue MCVal;
106 if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
107 !MCVal.isAbsolute())
108 return false;
109
110 ConstantValue = MCVal.getConstant();
111 return true;
112 };
113
114 assert(Args.size() == 3 &&
115 "AMDGPUVariadic Argument count incorrect for ExtraSGPRs");
116 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
117 uint64_t VCCUsed = 0, FlatScrUsed = 0, XNACKUsed = 0;
118
119 bool Success = TryGetMCExprValue(Args[2], XNACKUsed);
120
121 assert(Success && "Arguments 3 for ExtraSGPRs should be a known constant");
122 if (!Success || !TryGetMCExprValue(Args[0], VCCUsed) ||
123 !TryGetMCExprValue(Args[1], FlatScrUsed))
124 return false;
125
127 STI, (bool)VCCUsed, (bool)FlatScrUsed, (bool)XNACKUsed);
128 Res = MCValue::get(ExtraSGPRs);
129 return true;
130}
131
132bool AMDGPUVariadicMCExpr::evaluateTotalNumVGPR(MCValue &Res,
133 const MCAsmLayout *Layout,
134 const MCFixup *Fixup) const {
135 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
136 MCValue MCVal;
137 if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
138 !MCVal.isAbsolute())
139 return false;
140
141 ConstantValue = MCVal.getConstant();
142 return true;
143 };
144 assert(Args.size() == 2 &&
145 "AMDGPUVariadic Argument count incorrect for TotalNumVGPRs");
146 const MCSubtargetInfo *STI = Ctx.getSubtargetInfo();
147 uint64_t NumAGPR = 0, NumVGPR = 0;
148
149 bool Has90AInsts = AMDGPU::isGFX90A(*STI);
150
151 if (!TryGetMCExprValue(Args[0], NumAGPR) ||
152 !TryGetMCExprValue(Args[1], NumVGPR))
153 return false;
154
155 uint64_t TotalNum = Has90AInsts && NumAGPR ? alignTo(NumVGPR, 4) + NumAGPR
156 : std::max(NumVGPR, NumAGPR);
157 Res = MCValue::get(TotalNum);
158 return true;
159}
160
161bool AMDGPUVariadicMCExpr::evaluateAlignTo(MCValue &Res,
162 const MCAsmLayout *Layout,
163 const MCFixup *Fixup) const {
164 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
165 MCValue MCVal;
166 if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
167 !MCVal.isAbsolute())
168 return false;
169
170 ConstantValue = MCVal.getConstant();
171 return true;
172 };
173
174 assert(Args.size() == 2 &&
175 "AMDGPUVariadic Argument count incorrect for AlignTo");
176 uint64_t Value = 0, Align = 0;
177 if (!TryGetMCExprValue(Args[0], Value) || !TryGetMCExprValue(Args[1], Align))
178 return false;
179
181 return true;
182}
183
184bool AMDGPUVariadicMCExpr::evaluateOccupancy(MCValue &Res,
185 const MCAsmLayout *Layout,
186 const MCFixup *Fixup) const {
187 auto TryGetMCExprValue = [&](const MCExpr *Arg, uint64_t &ConstantValue) {
188 MCValue MCVal;
189 if (!Arg->evaluateAsRelocatable(MCVal, Layout, Fixup) ||
190 !MCVal.isAbsolute())
191 return false;
192
193 ConstantValue = MCVal.getConstant();
194 return true;
195 };
196 assert(Args.size() == 7 &&
197 "AMDGPUVariadic Argument count incorrect for Occupancy");
198 uint64_t InitOccupancy, MaxWaves, Granule, TargetTotalNumVGPRs, Generation,
200
201 bool Success = true;
202 Success &= TryGetMCExprValue(Args[0], MaxWaves);
203 Success &= TryGetMCExprValue(Args[1], Granule);
204 Success &= TryGetMCExprValue(Args[2], TargetTotalNumVGPRs);
205 Success &= TryGetMCExprValue(Args[3], Generation);
206 Success &= TryGetMCExprValue(Args[4], InitOccupancy);
207
208 assert(Success && "Arguments 1 to 5 for Occupancy should be known constants");
209
210 if (!Success || !TryGetMCExprValue(Args[5], NumSGPRs) ||
211 !TryGetMCExprValue(Args[6], NumVGPRs))
212 return false;
213
214 unsigned Occupancy = InitOccupancy;
215 if (NumSGPRs)
216 Occupancy = std::min(
218 NumSGPRs, MaxWaves,
219 static_cast<AMDGPUSubtarget::Generation>(Generation)));
220 if (NumVGPRs)
221 Occupancy = std::min(Occupancy,
223 NumVGPRs, Granule, MaxWaves, TargetTotalNumVGPRs));
224
225 Res = MCValue::get(Occupancy);
226 return true;
227}
228
230 MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const {
231 std::optional<int64_t> Total;
232
233 switch (Kind) {
234 default:
235 break;
236 case AGVK_ExtraSGPRs:
237 return evaluateExtraSGPRs(Res, Layout, Fixup);
238 case AGVK_AlignTo:
239 return evaluateAlignTo(Res, Layout, Fixup);
241 return evaluateTotalNumVGPR(Res, Layout, Fixup);
242 case AGVK_Occupancy:
243 return evaluateOccupancy(Res, Layout, Fixup);
244 }
245
246 for (const MCExpr *Arg : Args) {
247 MCValue ArgRes;
248 if (!Arg->evaluateAsRelocatable(ArgRes, Layout, Fixup) ||
249 !ArgRes.isAbsolute())
250 return false;
251
252 if (!Total.has_value())
253 Total = ArgRes.getConstant();
254 Total = op(Kind, *Total, ArgRes.getConstant());
255 }
256
257 Res = MCValue::get(*Total);
258 return true;
259}
260
262 for (const MCExpr *Arg : Args)
263 Streamer.visitUsedExpr(*Arg);
264}
265
267 for (const MCExpr *Arg : Args) {
268 if (Arg->findAssociatedFragment())
269 return Arg->findAssociatedFragment();
270 }
271 return nullptr;
272}
273
274/// Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed
275/// are unresolvable but needed for further MCExprs). Derived from
276/// implementation of IsaInfo::getNumExtraSGPRs in AMDGPUBaseInfo.cpp.
277///
280 const MCExpr *FlatScrUsed,
281 bool XNACKUsed, MCContext &Ctx) {
282
283 return create(AGVK_ExtraSGPRs,
284 {VCCUsed, FlatScrUsed, MCConstantExpr::create(XNACKUsed, Ctx)},
285 Ctx);
286}
287
289 const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx) {
290 return create(AGVK_TotalNumVGPRs, {NumAGPR, NumVGPR}, Ctx);
291}
292
293/// Mimics GCNSubtarget::computeOccupancy for MCExpr.
294///
295/// Remove dependency on GCNSubtarget and depend only only the necessary values
296/// for said occupancy computation. Should match computeOccupancy implementation
297/// without passing \p STM on.
299AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
300 const MCExpr *NumVGPRs,
301 const GCNSubtarget &STM, MCContext &Ctx) {
302 unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(&STM);
303 unsigned Granule = IsaInfo::getVGPRAllocGranule(&STM);
304 unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(&STM);
305 unsigned Generation = STM.getGeneration();
306
307 auto CreateExpr = [&Ctx](unsigned Value) {
308 return MCConstantExpr::create(Value, Ctx);
309 };
310
311 return create(AGVK_Occupancy,
312 {CreateExpr(MaxWaves), CreateExpr(Granule),
313 CreateExpr(TargetTotalNumVGPRs), CreateExpr(Generation),
314 CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
315 Ctx);
316}
#define Success
This file defines the BumpPtrAllocator interface.
AMD GCN specific subclass of TargetSubtarget.
#define op(i)
PowerPC TLS Dynamic Call Fixup
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
AMDGPU target specific variadic MCExpr operations.
Definition: AMDGPUMCExpr.h:30
static const AMDGPUVariadicMCExpr * create(VariadicKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUVariadicMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
static const AMDGPUVariadicMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const override
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override
MCFragment * findAssociatedFragment() const override
const MCExpr * getSubExpr(size_t Index) const
static const AMDGPUVariadicMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
void visitUsedExpr(MCStreamer &Streamer) const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Generation getGeneration() const
Definition: GCNSubtarget.h:308
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
Encapsulates the layout of an assembly file at a particular point in time.
Definition: MCAsmLayout.h:28
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
Context object for machine code objects.
Definition: MCContext.h:81
void * allocate(unsigned Size, unsigned Align=8)
Definition: MCContext.h:816
void deallocate(void *Ptr)
Definition: MCContext.h:820
const MCSubtargetInfo * getSubtargetInfo() const
Definition: MCContext.h:418
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
bool evaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout, const MCFixup *Fixup) const
Try to evaluate the expression to a relocatable value, i.e.
Definition: MCExpr.cpp:809
MCFragment * findAssociatedFragment() const
Find the "associated section" for this expression, which is currently defined as the absolute section...
Definition: MCExpr.cpp:1057
Encode information on a single operation to perform on a byte sequence (e.g., an encoded instruction)...
Definition: MCFixup.h:71
Streaming machine code generation interface.
Definition: MCStreamer.h:212
void visitUsedExpr(const MCExpr &Expr)
Generic base class for all target subtargets.
This represents an "assembler immediate".
Definition: MCValue.h:36
int64_t getConstant() const
Definition: MCValue.h:43
static MCValue get(const MCSymbolRefExpr *SymA, const MCSymbolRefExpr *SymB=nullptr, int64_t Val=0, uint32_t RefKind=0)
Definition: MCValue.h:59
bool isAbsolute() const
Is this an absolute (as opposed to relocatable) value.
Definition: MCValue.h:49
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char NumVGPRs[]
Key for Kernel::CodeProps::Metadata::mNumVGPRs.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
bool isGFX90A(const MCSubtargetInfo &STI)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39