29#define DEBUG_TYPE "si-i1-copies"
48 void markAsLaneMask(
Register DstReg)
const override;
49 void getCandidatesForLowering(
51 void collectIncomingValuesFromPhi(
62 bool lowerCopiesFromI1();
63 bool lowerCopiesToI1();
64 bool cleanConstrainRegs(
bool Changed);
66 return Reg.isVirtual() && MRI->getRegClass(
Reg) == &AMDGPU::VReg_1RegClass;
73 : PhiLoweringHelper(MF, DT, PDT) {}
75bool Vreg1LoweringHelper::cleanConstrainRegs(
bool Changed) {
79 ConstrainRegs.clear();
106class PhiIncomingAnalysis {
107 MachinePostDominatorTree &PDT;
108 const SIInstrInfo *
TII;
112 MapVector<MachineBasicBlock *, bool> ReachableMap;
113 SmallVector<MachineBasicBlock *, 4> Stack;
114 SmallVector<MachineBasicBlock *, 4> Predecessors;
117 PhiIncomingAnalysis(MachinePostDominatorTree &PDT,
const SIInstrInfo *
TII)
122 bool isSource(MachineBasicBlock &
MBB)
const {
123 return ReachableMap.
find(&
MBB)->second;
128 void analyze(MachineBasicBlock &DefBlock,
131 ReachableMap.
clear();
132 Predecessors.
clear();
138 for (
auto Incoming : Incomings) {
139 MachineBasicBlock *
MBB = Incoming.Block;
140 if (
MBB == &DefBlock) {
141 ReachableMap[&DefBlock] =
true;
153 while (!
Stack.empty()) {
154 MachineBasicBlock *
MBB =
Stack.pop_back_val();
159 for (
auto &[
MBB, Reachable] : ReachableMap) {
160 bool HaveReachablePred =
false;
162 if (ReachableMap.count(Pred)) {
163 HaveReachablePred =
true;
165 Stack.push_back(Pred);
168 if (!HaveReachablePred)
170 if (HaveReachablePred) {
171 for (MachineBasicBlock *UnreachablePred : Stack) {
214 MachineDominatorTree &DT;
215 MachinePostDominatorTree &PDT;
220 DenseMap<MachineBasicBlock *, unsigned> Visited;
224 SmallVector<MachineBasicBlock *, 4> CommonDominators;
227 MachineBasicBlock *VisitedPostDom =
nullptr;
232 unsigned FoundLoopLevel = ~0
u;
234 MachineBasicBlock *DefBlock =
nullptr;
235 SmallVector<MachineBasicBlock *, 4>
Stack;
236 SmallVector<MachineBasicBlock *, 4> NextLevel;
239 LoopFinder(MachineDominatorTree &DT, MachinePostDominatorTree &PDT)
240 : DT(DT), PDT(PDT) {}
244 CommonDominators.
clear();
247 VisitedPostDom =
nullptr;
248 FoundLoopLevel = ~0
u;
257 unsigned findLoop(MachineBasicBlock *PostDom) {
264 while (PDNode->
getBlock() != PostDom) {
265 if (PDNode->
getBlock() == VisitedPostDom)
269 if (FoundLoopLevel == Level)
279 void addLoopEntries(
unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
280 MachineRegisterInfo &MRI,
281 MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs,
285 MachineBasicBlock *Dom = CommonDominators[LoopLevel];
286 for (
auto &Incoming : Incomings)
289 if (!inLoopLevel(*Dom, LoopLevel, Incomings)) {
296 if (!inLoopLevel(*Pred, LoopLevel, Incomings))
304 bool inLoopLevel(MachineBasicBlock &
MBB,
unsigned LoopLevel,
306 auto DomIt = Visited.
find(&
MBB);
307 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
310 for (
auto &Incoming : Incomings)
311 if (Incoming.Block == &
MBB)
317 void advanceLevel() {
318 MachineBasicBlock *VisitedDom;
320 if (!VisitedPostDom) {
321 VisitedPostDom = DefBlock;
322 VisitedDom = DefBlock;
323 Stack.push_back(DefBlock);
325 VisitedPostDom = PDT.
getNode(VisitedPostDom)->getIDom()->getBlock();
326 VisitedDom = CommonDominators.
back();
328 for (
unsigned i = 0; i < NextLevel.
size();) {
329 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
330 Stack.push_back(NextLevel[i]);
332 NextLevel[i] = NextLevel.
back();
340 unsigned Level = CommonDominators.
size();
341 while (!
Stack.empty()) {
342 MachineBasicBlock *
MBB =
Stack.pop_back_val();
350 if (Succ == DefBlock) {
351 if (
MBB == VisitedPostDom)
352 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
354 FoundLoopLevel = std::min(FoundLoopLevel, Level);
359 if (
MBB == VisitedPostDom)
362 Stack.push_back(Succ);
385 BuildMI(*
MBB,
MBB->getFirstTerminator(), {},
TII->get(AMDGPU::IMPLICIT_DEF),
394 unsigned Size =
TRI.getRegSizeInBits(
Reg, MRI);
399bool Vreg1LoweringHelper::lowerCopiesFromI1() {
401 SmallVector<MachineInstr *, 4> DeadCopies;
403 for (MachineBasicBlock &
MBB : *MF) {
404 for (MachineInstr &
MI :
MBB) {
405 if (
MI.getOpcode() != AMDGPU::COPY)
410 if (!isVreg1(SrcReg))
413 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
423 assert(!
MI.getOperand(0).getSubReg());
425 ConstrainRegs.insert(SrcReg);
435 for (MachineInstr *
MI : DeadCopies)
436 MI->eraseFromParent();
447 MRI = &
MF->getRegInfo();
449 TII =
ST->getInstrInfo();
454 LoopFinder LF(*
DT, *
PDT);
455 PhiIncomingAnalysis PIA(*
PDT,
TII);
460 if (Vreg1Phis.
empty())
463 DT->updateDFSNumbers();
467 if (&
MBB != PrevMBB) {
485 return DT->getNode(LHS.Block)->getDFSNumIn() <
486 DT->getNode(RHS.Block)->getDFSNumIn();
495 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
497 DomBlocks.push_back(
Use.getParent());
500 PDT->findNearestCommonDominator(DomBlocks);
506 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
510 if (FoundLoopLevel) {
528 PIA.analyze(
MBB, Incomings);
536 if (PIA.isSource(IMBB)) {
557 if (NewReg != DstReg) {
559 MI->eraseFromParent();
567bool Vreg1LoweringHelper::lowerCopiesToI1() {
570 LoopFinder LF(*DT, *PDT);
577 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
578 MI.getOpcode() != AMDGPU::COPY)
582 if (!isVreg1(DstReg))
594 markAsLaneMask(DstReg);
595 initializeLaneMaskRegisterAttributes(DstReg);
597 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
602 assert(!
MI.getOperand(1).getSubReg());
604 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
605 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
610 MI.getOperand(1).setReg(TmpReg);
614 MI.getOperand(1).setIsKill(
false);
619 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
621 DomBlocks.push_back(
Use.getParent());
623 MachineBasicBlock *PostDomBound =
625 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
626 if (FoundLoopLevel) {
629 LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs);
631 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
637 for (MachineInstr *
MI : DeadCopies)
638 MI->eraseFromParent();
648 MI =
MRI->getUniqueVRegDef(Reg);
649 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
652 if (
MI->getOpcode() != AMDGPU::COPY)
655 Reg =
MI->getOperand(1).getReg();
656 if (!Reg.isVirtual())
662 if (
MI->getOpcode() !=
LMC->MovOpc)
665 if (!
MI->getOperand(1).isImm())
668 int64_t
Imm =
MI->getOperand(1).getImm();
686 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
699 auto InsertionPt =
MBB.getFirstTerminator();
700 bool TerminatorsUseSCC =
false;
701 for (
auto I = InsertionPt, E =
MBB.end();
I != E; ++
I) {
704 if (TerminatorsUseSCC || DefsSCC)
708 if (!TerminatorsUseSCC)
711 while (InsertionPt !=
MBB.begin()) {
725void Vreg1LoweringHelper::markAsLaneMask(
Register DstReg)
const {
729void Vreg1LoweringHelper::getCandidatesForLowering(
733 if (isVreg1(
MI.getOperand(0).getReg()))
739void Vreg1LoweringHelper::collectIncomingValuesFromPhi(
740 const MachineInstr *
MI,
741 SmallVectorImpl<AMDGPU::Incoming> &Incomings)
const {
742 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
743 assert(i + 1 <
MI->getNumOperands());
744 Register IncomingReg =
MI->getOperand(i).getReg();
745 MachineBasicBlock *IncomingMBB =
MI->getOperand(i + 1).getMBB();
748 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
750 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
752 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
755 assert(IncomingDef->
isPHI() || PhiRegisters.count(IncomingReg));
763 MachineBasicBlock *
MBB) {
767void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &
MBB,
772 bool PrevVal =
false;
773 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
775 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
777 if (PrevConstant && CurConstant) {
778 if (PrevVal == CurVal) {
793 if (CurConstant && CurVal) {
794 PrevMaskedReg = PrevReg;
804 if (PrevConstant && PrevVal) {
805 CurMaskedReg = CurReg;
814 if (PrevConstant && !PrevVal) {
817 }
else if (CurConstant && !CurVal) {
820 }
else if (PrevConstant && PrevVal) {
827 .
addReg(CurMaskedReg ? CurMaskedReg : LMC->ExecReg);
831void Vreg1LoweringHelper::constrainAsLaneMask(AMDGPU::Incoming &In) {}
848 Vreg1LoweringHelper Helper(&MF, &MDT, &MPDT);
850 Changed |= Helper.lowerCopiesFromI1();
852 Changed |= Helper.lowerCopiesToI1();
853 return Helper.cleanConstrainRegs(
Changed);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static Register insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
static bool runFixI1Copies(MachineFunction &MF, MachineDominatorTree &MDT, MachinePostDominatorTree &MPDT)
Lower all instructions that def or use vreg_1 registers.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
Interface definition of the PhiLoweringHelper class that implements lane mask merging algorithm for d...
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool isLaneMaskReg(Register Reg) const
MachinePostDominatorTree * PDT
virtual void replaceDstReg(Register NewReg, Register OldReg, MachineBasicBlock *MBB)=0
MachineBasicBlock::iterator getSaluInsertionAtEnd(MachineBasicBlock &MBB) const
Return a point at the end of the given MBB to insert SALU instructions for lane mask calculation.
bool isConstantLaneMask(Register Reg, bool &Val) const
MachineDominatorTree * DT
MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs
void initializeLaneMaskRegisterAttributes(Register LaneMask)
virtual void buildMergeLaneMasks(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg)=0
virtual void getCandidatesForLowering(SmallVectorImpl< MachineInstr * > &Vreg1Phis) const =0
const AMDGPU::LaneMaskConstants * LMC
MachineRegisterInfo * MRI
DenseSet< Register > PhiRegisters
virtual void markAsLaneMask(Register DstReg) const =0
virtual void constrainAsLaneMask(Incoming &In)=0
virtual void collectIncomingValuesFromPhi(const MachineInstr *MI, SmallVectorImpl< Incoming > &Incomings) const =0
PhiLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, MachinePostDominatorTree *PDT)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Implements a dense probed hash-table based set.
DomTreeNodeBase * getIDom() const
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
const HexagonRegisterInfo & getRegisterInfo() const
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass(char &ID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
LLVM_ABI MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void Initialize(Register V)
Initialize - Reset this object to get ready for a new set of SSA updates.
Register GetValueInMiddleOfBlock(MachineBasicBlock *BB, bool ExistingValueOnly=false)
GetValueInMiddleOfBlock - Construct SSA form, materializing a value that is live in the middle of the...
void AddAvailableValue(MachineBasicBlock *BB, Register V)
AddAvailableValue - Indicate that a rewritten value is available at the end of the specified block wi...
iterator find(const KeyT &Key)
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Register createLaneMaskReg(MachineRegisterInfo *MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
ArrayRef(const T &OneElt) -> ArrayRef< T >
FunctionPass * createSILowerI1CopiesLegacyPass()
char & SILowerI1CopiesLegacyID
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Incoming for lane mask phi as machine instruction, incoming register Reg and incoming block Block are...
MachineBasicBlock * Block
All attributes(register class or bank and low-level type) a virtual register can have.