21#define DEBUG_TYPE "si-pre-emit-peephole"
28 "Number of instructions before jumping over divergent control flow"),
61 "SI peephole optimizations",
false,
false)
63char SIPreEmitPeephole::
ID = 0;
89 const bool IsWave32 =
ST.isWave32();
90 const unsigned CondReg =
TRI->getVCC();
91 const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
92 const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
93 const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
94 const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
98 bool ReadsCond =
false;
99 unsigned Threshold = 5;
100 for (++
A;
A != E; ++
A) {
103 if (
A->modifiesRegister(ExecReg,
TRI))
105 if (
A->modifiesRegister(CondReg,
TRI)) {
106 if (!
A->definesRegister(CondReg,
TRI) ||
107 (
A->getOpcode() !=
And &&
A->getOpcode() != AndN2))
111 ReadsCond |=
A->readsRegister(CondReg,
TRI);
119 TII->commuteInstruction(*
A);
122 if (Op1.
getReg() != ExecReg)
127 int64_t MaskValue = 0;
131 auto M = std::next(
A);
132 bool ReadsSreg =
false;
133 bool ModifiesExec =
false;
134 for (;
M != E; ++
M) {
135 if (
M->definesRegister(SReg,
TRI))
137 if (
M->modifiesRegister(SReg,
TRI))
139 ReadsSreg |=
M->readsRegister(SReg,
TRI);
140 ModifiesExec |=
M->modifiesRegister(ExecReg,
TRI);
148 if (
A->getOpcode() ==
And && SReg == CondReg && !ModifiesExec &&
150 A->eraseFromParent();
153 if (!
M->isMoveImmediate() || !
M->getOperand(1).isImm() ||
154 (
M->getOperand(1).getImm() != -1 &&
M->getOperand(1).getImm() != 0))
156 MaskValue =
M->getOperand(1).getImm();
159 if (!ReadsSreg && Op2.
isKill()) {
160 A->getOperand(2).ChangeToImmediate(MaskValue);
161 M->eraseFromParent();
163 }
else if (Op2.
isImm()) {
170 assert(MaskValue == 0 || MaskValue == -1);
171 if (
A->getOpcode() == AndN2)
172 MaskValue = ~MaskValue;
174 if (!ReadsCond &&
A->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
175 if (!
MI.killsRegister(CondReg,
TRI)) {
177 if (MaskValue == 0) {
178 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
181 BuildMI(*
A->getParent(), *
A,
A->getDebugLoc(),
TII->get(Mov), CondReg)
186 A->eraseFromParent();
189 bool IsVCCZ =
MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
190 if (SReg == ExecReg) {
193 MI.eraseFromParent();
196 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
197 }
else if (IsVCCZ && MaskValue == 0) {
208 Found =
Term.isIdenticalTo(
MI);
211 assert(Found &&
"conditional branch is not terminator");
214 assert(Dst.isMBB() &&
"destination is not basic block");
216 BranchMI->eraseFromParent();
224 MI.setDesc(
TII->get(AMDGPU::S_BRANCH));
225 }
else if (!IsVCCZ && MaskValue == 0) {
228 assert(Dst.isMBB() &&
"destination is not basic block");
229 MI.getParent()->removeSuccessor(Dst.getMBB());
230 MI.eraseFromParent();
232 }
else if (MaskValue == -1) {
235 TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
238 MI.removeOperand(
MI.findRegisterUseOperandIdx(CondReg,
TRI,
false ));
259 E =
MI.getIterator();
263 switch (
I->getOpcode()) {
264 case AMDGPU::S_SET_GPR_IDX_MODE:
266 case AMDGPU::S_SET_GPR_IDX_OFF:
271 if (
I->modifiesRegister(AMDGPU::M0,
TRI))
273 if (IdxReg &&
I->modifiesRegister(IdxReg,
TRI))
278 TRI->isVectorRegister(MRI, MO.getReg());
282 if (!IdxOn || !(
I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
283 I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
289 MI.eraseFromBundle();
291 RI->eraseFromBundle();
295bool SIPreEmitPeephole::getBlockDestinations(
307bool SIPreEmitPeephole::mustRetainExeczBranch(
309 unsigned NumInstr = 0;
320 if (
MI.isConditionalBranch())
323 if (
MI.isMetaInstruction())
326 if (
TII->hasUnwantedEffectsWhenEXECEmpty(
MI))
331 TII->isDS(
MI) ||
TII->isWaitcnt(
MI.getOpcode()))
350 if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB,
Cond))
355 mustRetainExeczBranch(*FalseMBB, *TrueMBB))
359 MI.eraseFromParent();
367 TII =
ST.getInstrInfo();
368 TRI = &
TII->getRegisterInfo();
369 bool Changed =
false;
378 switch (
MI.getOpcode()) {
379 case AMDGPU::S_CBRANCH_VCCZ:
380 case AMDGPU::S_CBRANCH_VCCNZ:
381 Changed |= optimizeVccBranch(
MI);
383 case AMDGPU::S_CBRANCH_EXECZ:
384 Changed |= removeExeczBranch(
MI,
MBB);
389 if (!
ST.hasVGPRIndexMode())
393 const unsigned Threshold = 20;
401 if (Count == Threshold)
406 if (
MI.getOpcode() != AMDGPU::S_SET_GPR_IDX_ON)
415 if (optimizeSetGPR(*SetGPRMI,
MI))
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
ReachingDefAnalysis InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static cl::opt< unsigned, true > SkipThresholdFlag("amdgpu-skip-threshold", cl::Hidden, cl::desc("Number of instructions before jumping over divergent control flow"), cl::location(SkipThreshold), cl::init(12))
static unsigned SkipThreshold
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
BasicBlockListType::const_iterator const_iterator
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void initializeSIPreEmitPeepholePass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIPreEmitPeepholeID
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ And
Bitwise or logical AND of integers.