55#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"
59class AMDGPULowerVGPREncoding {
60 static constexpr unsigned OpNum = 4;
61 static constexpr unsigned BitsPerField = 2;
62 static constexpr unsigned NumFields = 4;
63 static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
64 static constexpr unsigned ModeWidth = NumFields * BitsPerField;
65 static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
67 std::bitset<BitsPerField * NumFields>>;
69 static constexpr unsigned VGPRMSBShift =
72 class ModeTy :
public ModeType {
75 ModeTy() : ModeType(0) {}
77 operator int64_t()
const {
return raw_bits().to_ulong(); }
79 static ModeTy fullMask() {
110 unsigned ClauseRemaining;
113 unsigned ClauseBreaks;
119 bool setMode(ModeTy NewMode, ModeTy Mask,
124 setMode(ModeTy(), ModeTy::fullMask(),
I);
138 const AMDGPU::OpName
Ops[OpNum],
139 const AMDGPU::OpName *Ops2 =
nullptr);
164bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
166 assert((NewMode.raw_bits() & ~Mask.raw_bits()).none());
168 auto Delta = NewMode.raw_bits() ^ CurrentMode.raw_bits();
170 if ((Delta & Mask.raw_bits()).none()) {
175 if (MostRecentModeSet && (Delta & CurrentMask.raw_bits()).none()) {
176 CurrentMode |= NewMode;
181 if (MostRecentModeSet->
getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
184 int64_t OldModeBits =
Op.getImm() & (ModeMask << ModeWidth);
185 Op.setImm(CurrentMode | OldModeBits);
188 "unexpected MostRecentModeSet opcode");
189 updateSetregModeImm(*MostRecentModeSet, CurrentMode);
196 int64_t OldModeBits = CurrentMode << ModeWidth;
199 I = handleCoissue(
I);
200 MostRecentModeSet =
BuildMI(*
MBB,
I, {},
TII->get(AMDGPU::S_SET_VGPR_MSB))
201 .
addImm(NewMode | OldModeBits);
203 CurrentMode = NewMode;
208std::optional<unsigned>
215 if (!RC || !
TRI->isVGPRClass(RC))
218 unsigned Idx =
TRI->getHWRegIndex(
Reg);
222void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode, ModeTy &Mask,
224 const AMDGPU::OpName
Ops[OpNum],
225 const AMDGPU::OpName *Ops2) {
229 for (
unsigned I = 0;
I < OpNum; ++
I) {
232 std::optional<unsigned> MSBits;
234 MSBits = getMSBs(*
Op);
237 if (MSBits.has_value() && Ops2) {
238 auto Op2 =
TII->getNamedOperand(
MI, Ops2[
I]);
240 std::optional<unsigned> MSBits2;
241 MSBits2 = getMSBs(*Op2);
242 if (MSBits2.has_value() && MSBits != MSBits2)
248 if (!MSBits.has_value() && Ops2) {
249 Op =
TII->getNamedOperand(
MI, Ops2[
I]);
251 MSBits = getMSBs(*
Op);
254 if (!MSBits.has_value())
260 if (
Ops[
I] == AMDGPU::OpName::src2 && !
Op->isDef() &&
Op->isTied() &&
263 TII->hasVALU32BitEncoding(
MI.getOpcode()))))
266 NewMode[
I] = MSBits.value();
271bool AMDGPULowerVGPREncoding::runOnMachineInstr(
MachineInstr &
MI) {
274 ModeTy NewMode, Mask;
275 computeMode(NewMode, Mask,
MI,
Ops.first,
Ops.second);
276 return setMode(NewMode, Mask,
MI.getIterator());
278 assert(!
TII->hasVGPRUses(
MI) ||
MI.isMetaInstruction() ||
MI.isPseudo());
285 if (!ClauseRemaining)
290 if (ClauseRemaining == ClauseLen) {
291 I =
Clause->getPrevNode()->getIterator();
299 Clause->eraseFromBundle();
309 Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));
321 if (
I ==
I->getParent()->begin())
326 return TII->isBarrier(
MI->getOpcode()) ||
328 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
331 if (!isProgramStateSALU(&*Prev))
334 while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
335 isProgramStateSALU(&*Prev)) {
345static int64_t convertModeToSetregFormat(int64_t
Mode) {
350bool AMDGPULowerVGPREncoding::updateSetregModeImm(
MachineInstr &
MI,
352 assert(
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
355 int64_t SetregMode = convertModeToSetregFormat(ModeValue);
358 int64_t OldImm = ImmOp->
getImm();
360 (OldImm &
~AMDGPU::Hwreg::VGPR_MSB_MASK) | (SetregMode << VGPRMSBShift);
362 return NewImm != OldImm;
368 assert(
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
369 "only S_SETREG_IMM32_B32 needs to be handled");
372 assert(SIMM16Op &&
"SIMM16Op must be present");
376 if (HwRegId != ID_MODE)
379 int64_t ModeValue =
static_cast<int64_t
>(CurrentMode);
384 if (
Size <= VGPRMSBShift) {
387 MostRecentModeSet = &
MI;
388 return updateSetregModeImm(
MI, ModeValue);
396 assert(ImmOp &&
"ImmOp must be present");
397 int64_t ImmBits12To19 = (ImmOp->
getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;
398 int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);
399 if (ImmBits12To19 == SetregModeValue) {
404 MostRecentModeSet =
nullptr;
411 MostRecentModeSet =
BuildMI(*
MBB, InsertPt,
MI.getDebugLoc(),
412 TII->get(AMDGPU::S_SET_VGPR_MSB))
419 if (!ST.has1024AddressableVGPRs())
422 TII = ST.getInstrInfo();
423 TRI = ST.getRegisterInfo();
426 ClauseLen = ClauseRemaining = 0;
429 for (
auto &
MBB : MF) {
430 MostRecentModeSet =
nullptr;
434 if (
MI.isMetaInstruction())
437 if (
MI.isTerminator() ||
MI.isCall()) {
438 if (
MI.getOpcode() == AMDGPU::S_ENDPGM ||
439 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)
442 resetMode(
MI.getIterator());
446 if (
MI.isInlineAsm()) {
447 if (
TII->hasVGPRUses(
MI))
448 resetMode(
MI.getIterator());
452 if (
MI.getOpcode() == AMDGPU::S_CLAUSE) {
453 assert(!ClauseRemaining &&
"Nested clauses are not supported");
454 ClauseLen =
MI.getOperand(0).getImm();
455 ClauseBreaks = (ClauseLen >> 8) & 15;
456 ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;
461 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
462 ST.hasSetregVGPRMSBFixup()) {
474 resetMode(
MBB.instr_end());
487 return AMDGPULowerVGPREncoding().run(MF);
498char AMDGPULowerVGPREncodingLegacy::ID = 0;
503 "AMDGPU Lower VGPR Encoding",
false,
false)
508 if (!AMDGPULowerVGPREncoding().run(MF))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
This file implements the PackedVector class.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
This file implements the C++20 <bit> header.
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Wrapper class representing physical registers. Should be passed by value.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Store a vector of values using a specific number of bits for each value.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVOP2(const MachineInstr &MI)
static bool isProgramStateSALU(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
DWARFExpression::Operation Op
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
char & AMDGPULowerVGPREncodingLegacyID
constexpr T rotl(T V, int R)