23#define DEBUG_TYPE "si-mode-register"
25STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
110class SIModeRegister {
112 std::vector<std::unique_ptr<BlockData>> BlockInfo;
113 std::queue<MachineBasicBlock *> Phase2List;
144 SIModeRegisterLegacy() : MachineFunctionPass(ID) {}
146 bool runOnMachineFunction(MachineFunction &MF)
override;
148 void getAnalysisUsage(AnalysisUsage &AU)
const override {
156 "Insert required mode register values",
false,
false)
158char SIModeRegisterLegacy::
ID = 0;
163 return new SIModeRegisterLegacy();
172 unsigned Opcode =
MI.getOpcode();
173 if (
TII->usesFPDPRounding(
MI) ||
174 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
175 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
176 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
177 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
179 case AMDGPU::V_INTERP_P1LL_F16:
180 case AMDGPU::V_INTERP_P1LV_F16:
181 case AMDGPU::V_INTERP_P2_F16:
185 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
186 unsigned Mode =
MI.getOperand(2).getImm();
188 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
191 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
192 unsigned Mode =
MI.getOperand(2).getImm();
194 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
197 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
198 unsigned Mode =
MI.getOperand(6).getImm();
200 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
203 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
204 unsigned Mode =
MI.getOperand(2).getImm();
206 MI.setDesc(
TII->get(AMDGPU::V_CVT_F32_F64_e32));
210 return DefaultStatus;
221void SIModeRegister::insertSetreg(MachineBasicBlock &
MBB, MachineInstr *
MI,
222 const SIInstrInfo *
TII, Status InstrMode) {
223 while (InstrMode.
Mask) {
227 using namespace AMDGPU::Hwreg;
233 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
256void SIModeRegister::processBlockPhase1(MachineBasicBlock &
MBB,
257 const SIInstrInfo *
TII) {
258 auto NewInfo = std::make_unique<BlockData>();
259 MachineInstr *InsertionPoint =
nullptr;
265 bool RequirePending =
true;
267 for (MachineInstr &
MI :
MBB) {
268 Status InstrMode = getInstructionMode(
MI,
TII);
269 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
270 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
271 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
272 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
276 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
277 using namespace AMDGPU::Hwreg;
278 auto [
Id,
Offset, Width] = HwregEncoding::decode(Dst);
285 if (InsertionPoint) {
286 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
287 InsertionPoint =
nullptr;
292 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
293 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
294 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
296 Status Setreg = Status(Mask,
Mode);
299 RequirePending =
false;
300 NewInfo->Change = NewInfo->Change.merge(Setreg);
302 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
304 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
307 if (InsertionPoint) {
312 if (RequirePending) {
316 NewInfo->FirstInsertionPoint = InsertionPoint;
317 NewInfo->Require = NewInfo->Change;
318 RequirePending =
false;
320 insertSetreg(
MBB, InsertionPoint,
TII,
321 IPChange.
delta(NewInfo->Change));
322 IPChange = NewInfo->Change;
325 InsertionPoint = &
MI;
327 NewInfo->Change = NewInfo->Change.merge(InstrMode);
331 InsertionPoint = &
MI;
332 IPChange = NewInfo->Change;
333 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
337 if (RequirePending) {
340 NewInfo->FirstInsertionPoint = InsertionPoint;
341 NewInfo->Require = NewInfo->Change;
342 }
else if (InsertionPoint) {
344 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
346 NewInfo->Exit = NewInfo->Change;
354void SIModeRegister::processBlockPhase2(MachineBasicBlock &
MBB,
355 const SIInstrInfo *
TII) {
356 bool RevisitRequired =
false;
357 bool ExitSet =
false;
361 BlockInfo[ThisBlock]->Pred = DefaultStatus;
375 MachineBasicBlock &
PB = *(*P);
376 unsigned PredBlock =
PB.getNumber();
377 if ((ThisBlock == PredBlock) && (std::next(
P) ==
E)) {
378 BlockInfo[ThisBlock]->Pred = DefaultStatus;
380 }
else if (BlockInfo[PredBlock]->ExitSet) {
381 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
383 }
else if (PredBlock != ThisBlock)
384 RevisitRequired =
true;
386 for (
P = std::next(
P);
P !=
E;
P = std::next(
P)) {
387 MachineBasicBlock *Pred = *
P;
389 if (BlockInfo[PredBlock]->ExitSet) {
390 if (BlockInfo[ThisBlock]->ExitSet) {
391 BlockInfo[ThisBlock]->Pred =
392 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
394 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
397 }
else if (PredBlock != ThisBlock)
398 RevisitRequired =
true;
402 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
403 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
404 BlockInfo[ThisBlock]->Exit = TmpStatus;
408 Phase2List.push(Succ);
410 BlockInfo[ThisBlock]->ExitSet = ExitSet;
412 Phase2List.push(&
MBB);
418void SIModeRegister::processBlockPhase3(MachineBasicBlock &
MBB,
419 const SIInstrInfo *
TII) {
421 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
423 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
424 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
425 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
431bool SIModeRegisterLegacy::runOnMachineFunction(MachineFunction &MF) {
432 return SIModeRegister().run(MF);
437 if (!SIModeRegister().
run(MF))
451 if (
F.hasFnAttribute(llvm::Attribute::StrictFP))
462 processBlockPhase1(BB,
TII);
468 Phase2List.push(&BB);
469 while (!Phase2List.empty()) {
470 processBlockPhase2(*Phase2List.front(),
TII);
477 processBlockPhase3(BB,
TII);
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)