63#define DEBUG_TYPE "si-lower-control-flow"
89 unsigned Andn2TermOpc;
92 unsigned OrSaveExecOpc;
95 bool EnableOptimizeEndCf =
false;
132 while (
I !=
End && !
I->isUnconditionalBranch())
138 void optimizeEndCf();
148 return "SI Lower control flow pseudo instructions";
164char SILowerControlFlow::ID = 0;
167 "SI lower control flow",
false,
false)
183 while (!Worklist.
empty()) {
198 Register SaveExecReg =
MI.getOperand(0).getReg();
199 auto U =
MRI->use_instr_nodbg_begin(SaveExecReg);
201 if (U ==
MRI->use_instr_nodbg_end() ||
202 std::next(U) !=
MRI->use_instr_nodbg_end() ||
203 U->getOpcode() != AMDGPU::SI_END_CF)
213 Register SaveExecReg =
MI.getOperand(0).getReg();
215 assert(
Cond.getSubReg() == AMDGPU::NoSubRegister);
228 auto UseMI =
MRI->use_instr_nodbg_begin(SaveExecReg);
234 Register CopyReg = SimpleIf ? SaveExecReg
235 :
MRI->createVirtualRegister(BoolRC);
240 LoweredIf.
insert(CopyReg);
251 setImpSCCDefDead(*
And,
true);
259 setImpSCCDefDead(*
Xor, ImpDefSCC.
isDead());
272 I = skipToUncondBrOrEnd(
MBB,
I);
277 .
add(
MI.getOperand(2));
280 MI.eraseFromParent();
296 MI.eraseFromParent();
301 RecomputeRegs.
insert(SaveExecReg);
318 Register SaveReg =
MRI->createVirtualRegister(BoolRC);
321 .
add(
MI.getOperand(1));
342 ElsePt = skipToUncondBrOrEnd(
MBB, ElsePt);
349 MI.eraseFromParent();
354 MI.eraseFromParent();
362 RecomputeRegs.
insert(SrcReg);
363 RecomputeRegs.
insert(DstReg);
373 auto Dst =
MI.getOperand(0).getReg();
379 bool SkipAnding =
false;
380 if (
MI.getOperand(1).isReg()) {
382 SkipAnding =
Def->getParent() ==
MI.getParent()
392 AndReg =
MRI->createVirtualRegister(BoolRC);
395 .
add(
MI.getOperand(1));
400 .
add(
MI.getOperand(2));
403 .
add(
MI.getOperand(1))
404 .
add(
MI.getOperand(2));
415 RecomputeRegs.
insert(
And->getOperand(2).getReg());
421 MI.eraseFromParent();
431 .
add(
MI.getOperand(0));
435 auto BranchPt = skipToUncondBrOrEnd(
MBB,
MI.getIterator());
438 .
add(
MI.getOperand(1));
441 RecomputeRegs.
insert(
MI.getOperand(0).getReg());
446 MI.eraseFromParent();
450SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
460 for ( ; It != E; ++It) {
461 if (
TII->mayReadEXEC(*
MRI, *It))
468 if (
B->succ_size() != 1)
488 bool NeedBlockSplit =
false;
492 if (
I->modifiesRegister(DataReg,
TRI)) {
493 NeedBlockSplit =
true;
498 unsigned Opcode = OrOpc;
500 if (NeedBlockSplit) {
502 if (MDT && SplitBB != &
MBB) {
517 .
add(
MI.getOperand(0));
521 if (SplitBB != &
MBB) {
531 if (
Op.getReg().isVirtual())
537 for (
unsigned i = 0, e =
MRI->getNumVirtRegs(); i != e; ++i) {
545 if (
Kill->getParent() == SplitBB && !DefInOrigBlock.
contains(Reg))
553 LoweredEndCf.
insert(NewMI);
558 MI.eraseFromParent();
567void SILowerControlFlow::findMaskOperands(
MachineInstr &
MI,
unsigned OpNo,
570 if (!
Op.isReg() || !
Op.getReg().isVirtual()) {
576 if (!Def ||
Def->getParent() !=
MI.getParent() ||
577 !(
Def->isFullCopy() || (
Def->getOpcode() ==
MI.getOpcode())))
583 for (
auto I =
Def->getIterator();
I !=
MI.getIterator(); ++
I)
584 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI) &&
585 !(
I->isCopy() &&
I->getOperand(0).getReg() != Exec))
588 for (
const auto &
SrcOp :
Def->explicit_operands())
591 Src.push_back(
SrcOp);
599 assert(
MI.getNumExplicitOperands() == 3);
601 unsigned OpToReplace = 1;
602 findMaskOperands(
MI, 1, Ops);
603 if (Ops.
size() == 1) OpToReplace = 2;
604 findMaskOperands(
MI, 2, Ops);
605 if (Ops.
size() != 3)
return;
607 unsigned UniqueOpndIdx;
608 if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
609 else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
610 else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
614 MI.removeOperand(OpToReplace);
615 MI.addOperand(Ops[UniqueOpndIdx]);
616 if (
MRI->use_empty(Reg))
617 MRI->getUniqueVRegDef(Reg)->eraseFromParent();
620void SILowerControlFlow::optimizeEndCf() {
623 if (!EnableOptimizeEndCf)
629 skipIgnoreExecInstsTrivialSucc(
MBB, std::next(
MI->getIterator()));
630 if (Next ==
MBB.
end() || !LoweredEndCf.
count(&*Next))
635 =
TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg();
639 if (Def && LoweredIf.
count(SavedExec)) {
645 Reg =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src1)->getReg();
646 MI->eraseFromParent();
649 removeMBBifRedundant(
MBB);
661 switch (
MI.getOpcode()) {
666 case AMDGPU::SI_ELSE:
670 case AMDGPU::SI_IF_BREAK:
674 case AMDGPU::SI_LOOP:
678 case AMDGPU::SI_WATERFALL_LOOP:
679 MI.setDesc(
TII->get(AMDGPU::S_CBRANCH_EXECNZ));
682 case AMDGPU::SI_END_CF:
683 SplitBB = emitEndCf(
MI);
687 assert(
false &&
"Attempt to process unsupported instruction");
696 case AMDGPU::S_AND_B64:
697 case AMDGPU::S_OR_B64:
698 case AMDGPU::S_AND_B32:
699 case AMDGPU::S_OR_B32:
701 combineMasks(MaskMI);
716 bool IsWave32 =
ST.isWave32();
718 if (
MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
721 TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
727 MI.eraseFromParent();
738 Register InputReg =
MI.getOperand(0).getReg();
744 if (DefInstr != FirstMI) {
762 Register CountReg =
MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
763 auto BfeMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_BFE_U32), CountReg)
765 .
addImm((
MI.getOperand(1).getImm() & Mask) | 0x70000);
770 TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
773 auto CmpMI =
BuildMI(*
MBB, FirstMI,
DL,
TII->get(AMDGPU::S_CMP_EQ_U32))
780 TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
785 MI.eraseFromParent();
790 MI.eraseFromParent();
797 RecomputeRegs.
insert(InputReg);
803 if (!
I.isDebugInstr() && !
I.isUnconditionalBranch())
814 if (
P->getFallThrough(
false) == &
MBB)
816 P->ReplaceUsesOfBlockWith(&
MBB, Succ);
849 TII =
ST.getInstrInfo();
850 TRI = &
TII->getRegisterInfo();
855 LIS = getAnalysisIfAvailable<LiveIntervals>();
857 LV = getAnalysisIfAvailable<LiveVariables>();
858 MDT = getAnalysisIfAvailable<MachineDominatorTree>();
860 BoolRC =
TRI->getBoolRC();
863 AndOpc = AMDGPU::S_AND_B32;
864 OrOpc = AMDGPU::S_OR_B32;
865 XorOpc = AMDGPU::S_XOR_B32;
866 MovTermOpc = AMDGPU::S_MOV_B32_term;
867 Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
868 XorTermrOpc = AMDGPU::S_XOR_B32_term;
869 OrTermrOpc = AMDGPU::S_OR_B32_term;
870 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
871 Exec = AMDGPU::EXEC_LO;
873 AndOpc = AMDGPU::S_AND_B64;
874 OrOpc = AMDGPU::S_OR_B64;
875 XorOpc = AMDGPU::S_XOR_B64;
876 MovTermOpc = AMDGPU::S_MOV_B64_term;
877 Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
878 XorTermrOpc = AMDGPU::S_XOR_B64_term;
879 OrTermrOpc = AMDGPU::S_OR_B64_term;
880 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
885 const bool CanDemote =
887 for (
auto &
MBB : MF) {
888 bool IsKillBlock =
false;
890 if (
TII->isKillTerminator(
Term.getOpcode())) {
896 if (CanDemote && !IsKillBlock) {
897 for (
auto &
MI :
MBB) {
898 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
906 bool Changed =
false;
909 BI != MF.end(); BI = NextBB) {
910 NextBB = std::next(BI);
920 switch (
MI.getOpcode()) {
922 case AMDGPU::SI_ELSE:
923 case AMDGPU::SI_IF_BREAK:
924 case AMDGPU::SI_WATERFALL_LOOP:
925 case AMDGPU::SI_LOOP:
926 case AMDGPU::SI_END_CF:
927 SplitMBB = process(
MI);
932 case AMDGPU::SI_INIT_EXEC:
933 case AMDGPU::SI_INIT_EXEC_FROM_INPUT:
934 lowerInitExec(
MBB,
MI);
944 if (SplitMBB !=
MBB) {
945 MBB = Next->getParent();
954 for (
Register Reg : RecomputeRegs) {
960 RecomputeRegs.
clear();
961 LoweredEndCf.
clear();
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< bool > RemoveRedundantEndcf("amdgpu-remove-redundant-endcf", cl::init(true), cl::ReallyHidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI)
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an Operation in the Expression.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
LiveInterval & createAndComputeVirtRegInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
void recomputeForSingleDefVirtReg(Register Reg)
Recompute liveness from scratch for a virtual register Reg that is known to have a single def that do...
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
DebugLoc findBranchDebugLoc()
Find and return the merged DebugLoc of the branch instructions of the block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
void eraseNode(MachineBasicBlock *BB)
eraseNode - Removes a node from the dominator tree.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineInstr * removeFromParent()
Unlink 'this' from the containing basic block, and return it without deleting it.
const MachineBasicBlock * getParent() const
MachineOperand class - Representation of each machine instruction operand.
void setIsDead(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
static bool isVALU(const MachineInstr &MI)
A vector that has set insertion semantics.
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
constexpr char WavefrontSize[]
Key for Kernel::CodeProps::Metadata::mWavefrontSize.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
char & LiveVariablesID
LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...
char & SILowerControlFlowID
VarInfo - This represents the regions where a virtual register is live in the program.
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...