23#define DEBUG_TYPE "si-mode-register"
25STATISTIC(NumSetregInserted,
"Number of setreg of mode register inserted.");
57 unsigned NewMode = (
Mode & NewMask);
58 return Status(NewMask, NewMode);
114 std::vector<std::unique_ptr<BlockData>> BlockInfo;
115 std::queue<MachineBasicBlock *> Phase2List;
126 bool Changed =
false;
152 "Insert required mode register values",
false,
false)
154char SIModeRegister::
ID = 0;
166 if (
TII->usesFPDPRounding(
MI) ||
167 MI.getOpcode() == AMDGPU::FPTRUNC_UPWARD_PSEUDO ||
168 MI.getOpcode() == AMDGPU::FPTRUNC_DOWNWARD_PSEUDO) {
169 switch (
MI.getOpcode()) {
170 case AMDGPU::V_INTERP_P1LL_F16:
171 case AMDGPU::V_INTERP_P1LV_F16:
172 case AMDGPU::V_INTERP_P2_F16:
176 case AMDGPU::FPTRUNC_UPWARD_PSEUDO: {
178 if (
TII->getSubtarget().hasTrue16BitInsts()) {
181 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
189 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
193 case AMDGPU::FPTRUNC_DOWNWARD_PSEUDO: {
195 if (
TII->getSubtarget().hasTrue16BitInsts()) {
198 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_t16_e64));
206 MI.setDesc(
TII->get(AMDGPU::V_CVT_F16_F32_e32));
211 return DefaultStatus;
224 while (InstrMode.
Mask) {
225 unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.
Mask);
226 unsigned Width = llvm::countr_one<unsigned>(InstrMode.
Mask >>
Offset);
228 using namespace AMDGPU::Hwreg;
234 InstrMode.
Mask &= ~(((1 << Width) - 1) <<
Offset);
259 auto NewInfo = std::make_unique<BlockData>();
266 bool RequirePending =
true;
270 if (
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
271 MI.getOpcode() == AMDGPU::S_SETREG_B32_mode ||
272 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
273 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
277 unsigned Dst =
TII->getNamedOperand(
MI, AMDGPU::OpName::simm16)->getImm();
278 using namespace AMDGPU::Hwreg;
279 auto [
Id,
Offset, Width] = HwregEncoding::decode(Dst);
283 unsigned Mask = maskTrailingOnes<unsigned>(Width) <<
Offset;
286 if (InsertionPoint) {
287 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
288 InsertionPoint =
nullptr;
293 if (
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
294 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_mode) {
295 unsigned Val =
TII->getNamedOperand(
MI, AMDGPU::OpName::imm)->getImm();
300 RequirePending =
false;
301 NewInfo->Change = NewInfo->Change.merge(Setreg);
303 NewInfo->Change = NewInfo->Change.mergeUnknown(Mask);
305 }
else if (!NewInfo->Change.isCompatible(InstrMode)) {
308 if (InsertionPoint) {
313 if (RequirePending) {
317 NewInfo->FirstInsertionPoint = InsertionPoint;
318 NewInfo->Require = NewInfo->Change;
319 RequirePending =
false;
321 insertSetreg(
MBB, InsertionPoint,
TII,
322 IPChange.
delta(NewInfo->Change));
323 IPChange = NewInfo->Change;
326 InsertionPoint = &
MI;
328 NewInfo->Change = NewInfo->Change.merge(InstrMode);
332 InsertionPoint = &
MI;
333 IPChange = NewInfo->Change;
334 NewInfo->Change = NewInfo->Change.
merge(InstrMode);
338 if (RequirePending) {
341 NewInfo->FirstInsertionPoint = InsertionPoint;
342 NewInfo->Require = NewInfo->Change;
343 }
else if (InsertionPoint) {
345 insertSetreg(
MBB, InsertionPoint,
TII, IPChange.
delta(NewInfo->Change));
347 NewInfo->Exit = NewInfo->Change;
357 bool RevisitRequired =
false;
358 bool ExitSet =
false;
362 BlockInfo[ThisBlock]->Pred = DefaultStatus;
377 unsigned PredBlock =
PB.getNumber();
378 if ((ThisBlock == PredBlock) && (std::next(
P) == E)) {
379 BlockInfo[ThisBlock]->Pred = DefaultStatus;
381 }
else if (BlockInfo[PredBlock]->ExitSet) {
382 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
384 }
else if (PredBlock != ThisBlock)
385 RevisitRequired =
true;
387 for (
P = std::next(
P);
P != E;
P = std::next(
P)) {
390 if (BlockInfo[PredBlock]->ExitSet) {
391 if (BlockInfo[ThisBlock]->ExitSet) {
392 BlockInfo[ThisBlock]->Pred =
393 BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit);
395 BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit;
398 }
else if (PredBlock != ThisBlock)
399 RevisitRequired =
true;
403 BlockInfo[ThisBlock]->Pred.
merge(BlockInfo[ThisBlock]->Change);
404 if (BlockInfo[ThisBlock]->Exit != TmpStatus) {
405 BlockInfo[ThisBlock]->Exit = TmpStatus;
409 Phase2List.push(Succ);
411 BlockInfo[ThisBlock]->ExitSet = ExitSet;
413 Phase2List.push(&
MBB);
422 if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) {
424 BlockInfo[ThisBlock]->Pred.
delta(BlockInfo[ThisBlock]->Require);
425 if (BlockInfo[ThisBlock]->FirstInsertionPoint)
426 insertSetreg(
MBB, BlockInfo[ThisBlock]->FirstInsertionPoint,
TII, Delta);
442 processBlockPhase1(BB,
TII);
448 Phase2List.push(&BB);
449 while (!Phase2List.empty()) {
450 processBlockPhase2(*Phase2List.front(),
TII);
457 processBlockPhase3(BB,
TII);
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
#define FP_ROUND_MODE_DP(x)
#define FP_ROUND_ROUND_TO_INF
#define FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_ZERO
#define FP_ROUND_ROUND_TO_NEGINF
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
MachineInstr * FirstInsertionPoint
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
pred_iterator pred_begin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
MachineInstr & instr_front()
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
LLVM Value Representation.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createSIModeRegisterPass()
Status delta(const Status &S) const
Status(unsigned NewMask, unsigned NewMode)
bool isCombinable(Status &S)
bool operator==(const Status &S) const
bool isCompatible(Status &S)
Status merge(const Status &S) const
Status intersect(const Status &S) const
bool operator!=(const Status &S) const
Status mergeUnknown(unsigned newMask)