Go to the documentation of this file.
24 #define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
37 unsigned OrSaveExecOpc;
55 return "SI optimize exec mask operations pre-RA";
68 "SI optimize exec mask operations pre-RA",
false,
false)
73 char SIOptimizeExecMaskingPreRA::
ID = 0;
78 return new SIOptimizeExecMaskingPreRA();
126 unsigned Opc = MI.getOpcode();
127 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
128 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
133 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *
I, *
MRI, LIS);
134 if (!And ||
And->getOpcode() != AndOpc ||
135 !
And->getOperand(1).isReg() || !
And->getOperand(2).isReg())
142 AndCC = &
And->getOperand(2);
145 }
else if (
And->getOperand(2).getReg() !=
Register(ExecReg)) {
149 auto *
Cmp =
TRI->findReachingDef(CmpReg, CmpSubReg, *And, *
MRI, LIS);
150 if (!Cmp || !(
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
151 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
152 Cmp->getParent() !=
And->getParent())
163 auto *Sel =
TRI->findReachingDef(SelReg, Op1->
getSubReg(), *Cmp, *
MRI, LIS);
164 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
167 if (
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
168 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
171 Op1 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
172 Op2 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
185 LLVM_DEBUG(
dbgs() <<
"Folding sequence:\n\t" << *Sel <<
'\t' << *Cmp <<
'\t'
188 LIS->RemoveMachineInstrFromMaps(*And);
191 And->getOperand(0).getReg())
199 And->eraseFromParent();
200 LIS->InsertMachineInstrInMaps(*Andn2);
210 return MI.readsRegister(CondReg, TRI);
214 LIS->RemoveMachineInstrFromMaps(*Cmp);
215 Cmp->eraseFromParent();
221 LIS->RemoveMachineInstrFromMaps(*Sel);
222 Sel->eraseFromParent();
250 if (SaveExecMI.
getOpcode() != OrSaveExecOpc)
254 return MI.getOpcode() == XorTermrOpc;
269 while (
I != First && !AndExecMI) {
270 if (
I->getOpcode() == AndOpc &&
I->getOperand(0).getReg() == DstReg &&
271 I->getOperand(1).getReg() ==
Register(ExecReg))
282 SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI);
283 SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI);
285 LiveRange &RegUnit = LIS->getRegUnit(*UI);
286 if (RegUnit.
find(StartIdx) != std::prev(RegUnit.
find(EndIdx)))
291 LIS->removeInterval(SavedExecReg);
292 LIS->removeInterval(DstReg);
296 LIS->RemoveMachineInstrFromMaps(*AndExecMI);
299 LIS->createAndComputeVirtRegInterval(DstReg);
304 bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(
MachineFunction &MF) {
309 TRI =
ST.getRegisterInfo();
310 TII =
ST.getInstrInfo();
312 LIS = &getAnalysis<LiveIntervals>();
315 AndOpc =
Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
316 Andn2Opc =
Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
318 Wave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
319 XorTermrOpc =
Wave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
324 bool Changed =
false;
328 if (optimizeElseBranch(
MBB)) {
335 RecalcRegs.insert(AMDGPU::VCC_LO);
336 RecalcRegs.insert(AMDGPU::VCC_HI);
351 if (
Term.getOpcode() != AMDGPU::S_ENDPGM ||
Term.getNumOperands() != 1)
356 while (!Blocks.empty()) {
358 auto I = CurBB->rbegin(),
E = CurBB->rend();
360 if (
I->isUnconditionalBranch() ||
I->getOpcode() == AMDGPU::S_ENDPGM)
362 else if (
I->isBranch())
367 if (
I->isDebugInstr()) {
372 if (
I->mayStore() ||
I->isBarrier() ||
I->isCall() ||
373 I->hasUnmodeledSideEffects() ||
I->hasOrderedMemoryRef())
377 <<
"Removing no effect instruction: " << *
I <<
'\n');
379 for (
auto &
Op :
I->operands()) {
381 RecalcRegs.insert(
Op.getReg());
384 auto Next = std::next(
I);
385 LIS->RemoveMachineInstrFromMaps(*
I);
386 I->eraseFromParent();
396 for (
auto *Pred : CurBB->predecessors()) {
397 if (Pred->succ_size() == 1)
398 Blocks.push_back(Pred);
411 unsigned ScanThreshold = 10;
413 && ScanThreshold--; ++
I) {
415 if (!(
I->isFullCopy() &&
I->getOperand(1).getReg() ==
Register(ExecReg)))
418 Register SavedExec =
I->getOperand(0).getReg();
423 if (SingleExecUser->
getParent() ==
I->getParent() &&
425 TII->isOperandLegal(*SingleExecUser, Idx, &
I->getOperand(1))) {
427 LIS->RemoveMachineInstrFromMaps(*
I);
428 I->eraseFromParent();
430 LIS->removeInterval(SavedExec);
439 for (
auto Reg : RecalcRegs) {
440 if (
Reg.isVirtual()) {
441 LIS->removeInterval(
Reg);
443 LIS->createAndComputeVirtRegInterval(
Reg);
445 LIS->removeAllRegUnitsForPhysReg(
Reg);
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
This is an optimization pass for GlobalISel generic memory operations.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
SI optimize exec mask operations
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Reg
All possible values of the reg field in the ModR/M byte.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
LLVM_NODISCARD T pop_back_val()
iterator_range< iterator > terminators()
Result of a LiveRange query.
std::pair< iterator, bool > insert(const ValueT &V)
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
unsigned getUndefRegState(bool B)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
@ And
Bitwise or logical AND of integers.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const HexagonInstrInfo * TII
into llvm powi allowing the code generator to produce balanced multiplication trees First
MachineOperand class - Representation of each machine instruction operand.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
SlotIndex - An opaque wrapper around machine indexes.
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Implements a dense probed hash-table based set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void setIsDead(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
This class represents the liveness of a register, stack slot, etc.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
StandardInstrumentations SI(Debug, VerifyEach)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
LiveInterval & getInterval(Register Reg)
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
StringRef - Represent a constant reference to a string, i.e.
reverse_iterator rbegin()
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
self_iterator getIterator()
const MachineBasicBlock * getParent() const
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Function & getFunction()
Return the LLVM function that this machine code represents.
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
bool reg_empty(Register RegNo) const
reg_empty - Return true if there are no instructions using or defining the specified register (it may...
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void setReg(Register Reg)
Change the register this operand corresponds to.
INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx)
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
char & SIOptimizeExecMaskingPreRAID
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
bool isKill() const
Return true if the live-in value is killed by this instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing physical registers. Should be passed by value.