24#define DEBUG_TYPE "si-optimize-exec-masking-pre-ra"
37 unsigned OrSaveExecOpc;
55 return "SI optimize exec mask operations pre-RA";
68 "SI optimize exec mask operations pre-RA",
false,
false)
73char SIOptimizeExecMaskingPreRA::
ID = 0;
78 return new SIOptimizeExecMaskingPreRA();
125 unsigned Opc = MI.getOpcode();
126 return Opc == AMDGPU::S_CBRANCH_VCCZ ||
127 Opc == AMDGPU::S_CBRANCH_VCCNZ; });
132 TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *
I, *
MRI, LIS);
133 if (!
And ||
And->getOpcode() != AndOpc ||
134 !
And->getOperand(1).isReg() || !
And->getOperand(2).isReg())
141 AndCC = &
And->getOperand(2);
144 }
else if (
And->getOperand(2).getReg() !=
Register(ExecReg)) {
148 auto *
Cmp =
TRI->findReachingDef(CmpReg, CmpSubReg, *
And, *
MRI, LIS);
149 if (!Cmp || !(
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
150 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
151 Cmp->getParent() !=
And->getParent())
165 auto *Sel =
TRI->findReachingDef(SelReg, Op1->
getSubReg(), *Cmp, *
MRI, LIS);
166 if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
169 if (
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
170 TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
173 Op1 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
174 Op2 =
TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
189 SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
193 return VNI->isPHIDef();
198 LLVM_DEBUG(
dbgs() <<
"Folding sequence:\n\t" << *Sel <<
'\t' << *Cmp <<
'\t'
203 And->getOperand(0).getReg())
212 SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*
And, *Andn2);
213 And->eraseFromParent();
219 SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
228 "No live interval segment covering definition?");
229 for (
auto I = DefSegment;
I != SelLI->
end(); ++
I) {
241 else if (!Dst.liveAt(AndIdx))
244 LIS->getSlotIndexes()->getMBBStartIdx(Andn2->
getParent()),
251 applyLiveRanges(CCLI, CCQ.valueIn());
253 if (
CC->getSubReg()) {
261 applyLiveRanges(SR, CCQS.valueIn());
263 *LIS->getSlotIndexes(), *
TRI);
267 LIS->splitSeparateComponents(CCLI, SplitLIs);
270 LIS->removeAllRegUnitsForPhysReg(CCReg);
279 return MI.readsRegister(CondReg, TRI);
283 LIS->removeVRegDefAt(*CmpLI, CmpIdx.
getRegSlot());
284 LIS->RemoveMachineInstrFromMaps(*Cmp);
285 Cmp->eraseFromParent();
290 LIS->shrinkToUses(SelLI);
292 if (
MRI->use_nodbg_empty(SelReg) && (IsKill ||
IsDead)) {
295 LIS->removeVRegDefAt(*SelLI, SelIdx.
getRegSlot());
296 LIS->RemoveMachineInstrFromMaps(*Sel);
297 Sel->eraseFromParent();
325 if (SaveExecMI.
getOpcode() != OrSaveExecOpc)
329 return MI.getOpcode() == XorTermrOpc;
344 while (
I != First && !AndExecMI) {
345 if (
I->getOpcode() == AndOpc &&
I->getOperand(0).getReg() == DstReg &&
346 I->getOperand(1).getReg() ==
Register(ExecReg))
357 SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI);
358 SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI);
360 LiveRange &RegUnit = LIS->getRegUnit(*UI);
361 if (RegUnit.
find(StartIdx) != std::prev(RegUnit.
find(EndIdx)))
366 LIS->removeInterval(SavedExecReg);
367 LIS->removeInterval(DstReg);
371 LIS->RemoveMachineInstrFromMaps(*AndExecMI);
374 LIS->createAndComputeVirtRegInterval(DstReg);
379bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(
MachineFunction &MF) {
384 TRI =
ST.getRegisterInfo();
385 TII =
ST.getInstrInfo();
387 LIS = &getAnalysis<LiveIntervals>();
390 AndOpc =
Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
391 Andn2Opc =
Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
393 Wave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
394 XorTermrOpc =
Wave32 ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
399 bool Changed =
false;
403 if (optimizeElseBranch(
MBB)) {
404 RecalcRegs.
insert(AMDGPU::SCC);
408 if (optimizeVcndVcmpPair(
MBB)) {
409 RecalcRegs.
insert(AMDGPU::VCC_LO);
410 RecalcRegs.insert(AMDGPU::VCC_HI);
411 RecalcRegs.insert(AMDGPU::SCC);
425 if (
Term.getOpcode() != AMDGPU::S_ENDPGM ||
Term.getNumOperands() != 1)
430 while (!Blocks.empty()) {
432 auto I = CurBB->rbegin(),
E = CurBB->rend();
434 if (
I->isUnconditionalBranch() ||
I->getOpcode() == AMDGPU::S_ENDPGM)
436 else if (
I->isBranch())
441 if (
I->isDebugInstr()) {
446 if (
I->mayStore() ||
I->isBarrier() ||
I->isCall() ||
447 I->hasUnmodeledSideEffects() ||
I->hasOrderedMemoryRef())
451 <<
"Removing no effect instruction: " << *
I <<
'\n');
453 for (
auto &Op :
I->operands()) {
455 RecalcRegs.insert(
Op.getReg());
458 auto Next = std::next(
I);
459 LIS->RemoveMachineInstrFromMaps(*
I);
460 I->eraseFromParent();
470 for (
auto *Pred : CurBB->predecessors()) {
471 if (Pred->succ_size() == 1)
472 Blocks.push_back(Pred);
485 unsigned ScanThreshold = 10;
487 && ScanThreshold--; ++
I) {
489 if (!(
I->isFullCopy() &&
I->getOperand(1).getReg() ==
Register(ExecReg)))
492 Register SavedExec =
I->getOperand(0).getReg();
493 if (SavedExec.
isVirtual() &&
MRI->hasOneNonDBGUse(SavedExec)) {
494 MachineInstr *SingleExecUser = &*
MRI->use_instr_nodbg_begin(SavedExec);
497 if (SingleExecUser->
getParent() ==
I->getParent() &&
499 TII->isOperandLegal(*SingleExecUser,
Idx, &
I->getOperand(1))) {
501 LIS->RemoveMachineInstrFromMaps(*
I);
502 I->eraseFromParent();
503 MRI->replaceRegWith(SavedExec, ExecReg);
504 LIS->removeInterval(SavedExec);
513 for (
auto Reg : RecalcRegs) {
514 if (
Reg.isVirtual()) {
515 LIS->removeInterval(Reg);
516 if (!
MRI->reg_empty(Reg))
517 LIS->createAndComputeVirtRegInterval(Reg);
519 LIS->removeAllRegUnitsForPhysReg(Reg);
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx, SlotIndex SelIdx)
SI optimize exec mask operations
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
Allocate memory in an ever growing pool, as if by bump-pointer.
Implements a dense probed hash-table based set.
FunctionPass class - This class is used to implement most global optimizations.
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply, const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, unsigned ComposeSubRegIdx=0)
Refines the subranges to support LaneMask.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
bool isDeadDef() const
Return true if this instruction has a dead def.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
bool isKill() const
Return true if the live-in value is killed by this instruction.
This class represents the liveness of a register, stack slot, etc.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
iterator_range< vni_iterator > vnis()
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none.
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
iterator_range< iterator > terminators()
reverse_iterator rbegin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
VNInfo - Value Number Information.
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
char & SIOptimizeExecMaskingPreRAID
unsigned getUndefRegState(bool B)
@ And
Bitwise or logical AND of integers.
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createSIOptimizeExecMaskingPreRAPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This represents a simple continuous liveness interval for a value.