23#define DEBUG_TYPE "si-mode-register"
25STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
110class SIModeRegister {
112 std::vector<std::unique_ptr<BlockData>> BlockInfo;
113 std::queue<MachineBasicBlock *> Phase2List;
144 SIModeRegisterLegacy() : MachineFunctionPass(ID) {}
146 bool runOnMachineFunction(MachineFunction &MF)
override;
148 void getAnalysisUsage(AnalysisUsage &AU)
const override {
156 "Insert required mode register values",
false,
false)
158char SIModeRegisterLegacy::
ID = 0;
163 return new SIModeRegisterLegacy();
172 unsigned Opcode =
MI.getOpcode();
173 if (
TII->usesFPDPRounding(
MI) ||
174 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
175 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32 ||
176 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64 ||
177 Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO ||
178 Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_SALU_PSEUDO) {
180 case AMDGPU::V_INTERP_P1LL_F16:
181 case AMDGPU::V_INTERP_P1LV_F16:
182 case AMDGPU::V_INTERP_P2_F16:
186 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO: {
187 unsigned Mode =
MI.getOperand(2).getImm();
189 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
192 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_fake16_e32: {
193 unsigned Mode =
MI.getOperand(2).getImm();
195 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_fake16_e32));
198 case AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO_t16_e64: {
199 unsigned Mode =
MI.getOperand(6).getImm();
201 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
204 case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
205 unsigned Mode =
MI.getOperand(2).getImm();
207 MI.setDesc(
TII->get(AMDGPU::V_CVT_F32_F64_e32));
210 case AMDGPU::FPTRUNC_ROUND_F16_F32_SALU_PSEUDO: {
211 unsigned Mode =
MI.getOperand(2).getImm();
213 MI.setDesc(
TII->get(AMDGPU::S_CVT_F16_F32));
217 return DefaultStatus;
228void SIModeRegister::insertSetreg(MachineBasicBlock &
MBB, MachineInstr *
MI,
229 const SIInstrInfo *
TII, Status InstrMode) {
230 while (InstrMode.
Mask) {
234 using namespace AMDGPU::Hwreg;
240 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
263void SIModeRegister::processBlockPhase1(MachineBasicBlock &
MBB,
264 const SIInstrInfo *
TII) {
265 auto NewInfo = std::make_unique<BlockData>();
266 MachineInstr *InsertionPoint =
nullptr;
272 bool RequirePending =
true;
274 for (MachineInstr &
MI :
MBB) {
275 Status InstrMode = getInstructionMode(
MI,
TII);
276 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
277 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
278 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
279 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
283 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
284 using namespace AMDGPU::Hwreg;
285 auto [
Id,
Offset, Width] = HwregEncoding::decode(Dst);
292 if (InsertionPoint) {
293 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
294 InsertionPoint =
nullptr;
299 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
300 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
301 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
303 Status Setreg = Status(Mask,
Mode);
306 RequirePending =
false;
307 NewInfo->Change = NewInfo->Change.merge(Setreg);
309 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
311 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
314 if (InsertionPoint) {
319 if (RequirePending) {
323 NewInfo->FirstInsertionPoint = InsertionPoint;
324 NewInfo->Require = NewInfo->Change;
325 RequirePending =
false;
327 insertSetreg(
MBB, InsertionPoint,
TII,
328 IPChange.
delta(NewInfo->Change));
329 IPChange = NewInfo->Change;
332 InsertionPoint = &
MI;
334 NewInfo->Change = NewInfo->Change.merge(InstrMode);
338 InsertionPoint = &
MI;
339 IPChange = NewInfo->Change;
340 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
344 if (RequirePending) {
347 NewInfo->FirstInsertionPoint = InsertionPoint;
348 NewInfo->Require = NewInfo->Change;
349 }
else if (InsertionPoint) {
351 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
353 NewInfo->Exit = NewInfo->Change;
361void SIModeRegister::processBlockPhase2(MachineBasicBlock &
MBB,
362 const SIInstrInfo *
TII) {
363 bool RevisitRequired =
false;
364 bool ExitSet =
false;
368 BlockInfo[ThisBlock]->Pred = DefaultStatus;
382 MachineBasicBlock &
PB = *(*P);
383 unsigned PredBlock =
PB.getNumber();
384 if ((ThisBlock == PredBlock) && (std::next(
P) ==
E)) {
385 BlockInfo[ThisBlock]->Pred = DefaultStatus;
387 }
else if (BlockInfo[PredBlock]->ExitSet) {
388 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
390 }
else if (PredBlock != ThisBlock)
391 RevisitRequired =
true;
393 for (
P = std::next(
P);
P !=
E;
P = std::next(
P)) {
394 MachineBasicBlock *Pred = *
P;
396 if (BlockInfo[PredBlock]->ExitSet) {
397 if (BlockInfo[ThisBlock]->ExitSet) {
398 BlockInfo[ThisBlock]->Pred =
399 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
401 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
404 }
else if (PredBlock != ThisBlock)
405 RevisitRequired =
true;
409 BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change);
410 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
411 BlockInfo[ThisBlock]->Exit = TmpStatus;
415 Phase2List.push(Succ);
417 BlockInfo[ThisBlock]->ExitSet = ExitSet;
419 Phase2List.push(&
MBB);
425void SIModeRegister::processBlockPhase3(MachineBasicBlock &
MBB,
426 const SIInstrInfo *
TII) {
428 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
430 BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require);
431 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
432 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
438bool SIModeRegisterLegacy::runOnMachineFunction(MachineFunction &MF) {
439 return SIModeRegister().run(MF);
444 if (!SIModeRegister().
run(MF))
458 if (
F.hasFnAttribute(llvm::Attribute::StrictFP))
469 processBlockPhase1(BB,
TII);
475 Phase2List.push(&BB);
476 while (!Phase2List.empty()) {
477 processBlockPhase2(*Phase2List.front(),
TII);
484 processBlockPhase3(BB,
TII);
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
SmallVectorImpl< MachineBasicBlock * >::iterator pred_iterator
pred_iterator pred_begin()
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)