Go to the documentation of this file.
33 #define DEBUG_TYPE "si-i1-copies"
47 bool IsWave32 =
false;
72 StringRef getPassName()
const override {
return "SI Lower i1 Copies"; }
82 void lowerCopiesFromI1();
84 void lowerCopiesToI1();
85 bool isConstantLaneMask(
Register Reg,
bool &Val)
const;
88 unsigned DstReg,
unsigned PrevReg,
unsigned CurReg);
96 bool isLaneMaskReg(
unsigned Reg)
const {
97 return TII->getRegisterInfo().isSGPRReg(*
MRI,
Reg) &&
98 TII->getRegisterInfo().getRegSizeInBits(
Reg, *
MRI) ==
99 ST->getWavefrontSize();
125 class PhiIncomingAnalysis {
141 return ReachableMap.
find(&
MBB)->second;
149 ReachableMap.
clear();
150 ReachableOrdered.
clear();
151 Predecessors.
clear();
156 ReachableOrdered.push_back(&DefBlock);
159 if (
MBB == &DefBlock) {
160 ReachableMap[&DefBlock] =
true;
165 ReachableOrdered.push_back(
MBB);
169 bool Divergent =
false;
171 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
172 MI.getOpcode() == AMDGPU::SI_IF ||
173 MI.getOpcode() == AMDGPU::SI_ELSE ||
174 MI.getOpcode() == AMDGPU::SI_LOOP) {
184 while (!
Stack.empty()) {
188 ReachableOrdered.push_back(
MBB);
194 bool HaveReachablePred =
false;
196 if (ReachableMap.
count(Pred)) {
197 HaveReachablePred =
true;
199 Stack.push_back(Pred);
202 if (!HaveReachablePred)
203 ReachableMap[
MBB] =
true;
204 if (HaveReachablePred) {
207 Predecessors.push_back(UnreachablePred);
266 unsigned FoundLoopLevel = ~0u;
274 : DT(DT), PDT(PDT) {}
278 CommonDominators.
clear();
281 VisitedPostDom =
nullptr;
282 FoundLoopLevel = ~0u;
298 while (PDNode->
getBlock() != PostDom) {
299 if (PDNode->
getBlock() == VisitedPostDom)
303 if (FoundLoopLevel ==
Level)
315 assert(LoopLevel < CommonDominators.size());
321 if (!inLoopLevel(*Dom, LoopLevel, Blocks)) {
327 if (!inLoopLevel(*Pred, LoopLevel, Blocks))
336 auto DomIt = Visited.
find(&
MBB);
337 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
346 void advanceLevel() {
349 if (!VisitedPostDom) {
350 VisitedPostDom = DefBlock;
351 VisitedDom = DefBlock;
352 Stack.push_back(DefBlock);
355 VisitedDom = CommonDominators.back();
357 for (
unsigned i = 0;
i < NextLevel.size();) {
358 if (PDT.
dominates(VisitedPostDom, NextLevel[
i])) {
359 Stack.push_back(NextLevel[
i]);
361 NextLevel[
i] = NextLevel.back();
362 NextLevel.pop_back();
369 unsigned Level = CommonDominators.size();
370 while (!
Stack.empty()) {
373 NextLevel.push_back(
MBB);
379 if (Succ == DefBlock) {
380 if (
MBB == VisitedPostDom)
388 if (
MBB == VisitedPostDom)
389 NextLevel.push_back(Succ);
391 Stack.push_back(Succ);
396 CommonDominators.push_back(VisitedDom);
409 char SILowerI1Copies::
ID = 0;
414 return new SILowerI1Copies();
421 : &AMDGPU::SReg_64RegClass);
451 DT = &getAnalysis<MachineDominatorTree>();
452 PDT = &getAnalysis<MachinePostDominatorTree>();
455 TII =
ST->getInstrInfo();
456 IsWave32 =
ST->isWave32();
459 ExecReg = AMDGPU::EXEC_LO;
460 MovOp = AMDGPU::S_MOV_B32;
461 AndOp = AMDGPU::S_AND_B32;
462 OrOp = AMDGPU::S_OR_B32;
463 XorOp = AMDGPU::S_XOR_B32;
464 AndN2Op = AMDGPU::S_ANDN2_B32;
465 OrN2Op = AMDGPU::S_ORN2_B32;
467 ExecReg = AMDGPU::EXEC;
468 MovOp = AMDGPU::S_MOV_B64;
469 AndOp = AMDGPU::S_AND_B64;
470 OrOp = AMDGPU::S_OR_B64;
471 XorOp = AMDGPU::S_XOR_B64;
472 AndN2Op = AMDGPU::S_ANDN2_B64;
473 OrN2Op = AMDGPU::S_ORN2_B64;
480 for (
unsigned Reg : ConstrainRegs)
482 ConstrainRegs.clear();
496 void SILowerI1Copies::lowerCopiesFromI1() {
501 if (
MI.getOpcode() != AMDGPU::COPY)
506 if (!isVreg1(SrcReg))
509 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
517 assert(!
MI.getOperand(0).getSubReg());
519 ConstrainRegs.insert(SrcReg);
526 DeadCopies.push_back(&
MI);
530 MI->eraseFromParent();
535 void SILowerI1Copies::lowerPhis() {
537 LoopFinder LF(*DT, *PDT);
538 PhiIncomingAnalysis PIA(*PDT);
549 if (isVreg1(
MI.getOperand(0).getReg()))
550 Vreg1Phis.push_back(&
MI);
557 if (&
MBB != PrevMBB) {
566 : &AMDGPU::SReg_64RegClass);
569 for (
unsigned i = 1;
i <
MI->getNumOperands();
i += 2) {
575 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
577 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
579 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
585 IncomingBlocks.push_back(IncomingMBB);
586 IncomingRegs.push_back(IncomingReg);
590 PhiRegisters.
insert(DstReg);
595 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
601 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
605 if (FoundLoopLevel) {
606 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, IncomingBlocks);
608 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
611 IncomingUpdated.back());
614 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
617 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[
i],
623 PIA.analyze(
MBB, IncomingBlocks);
628 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
630 if (PIA.isSource(IMBB)) {
631 IncomingUpdated.push_back(0);
639 for (
unsigned i = 0;
i < IncomingRegs.size(); ++
i) {
640 if (!IncomingUpdated[
i])
645 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[
i],
651 if (NewReg != DstReg) {
653 MI->eraseFromParent();
656 IncomingBlocks.
clear();
657 IncomingRegs.
clear();
658 IncomingUpdated.
clear();
662 void SILowerI1Copies::lowerCopiesToI1() {
664 LoopFinder LF(*DT, *PDT);
671 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
672 MI.getOpcode() != AMDGPU::COPY)
676 if (!isVreg1(DstReg))
680 DeadCopies.push_back(&
MI);
687 : &AMDGPU::SReg_64RegClass);
688 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
693 assert(!
MI.getOperand(1).getSubReg());
695 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
696 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
701 MI.getOperand(1).setReg(TmpReg);
707 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
713 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
714 if (FoundLoopLevel) {
717 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater);
719 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
721 DeadCopies.push_back(&
MI);
726 MI->eraseFromParent();
731 bool SILowerI1Copies::isConstantLaneMask(
Register Reg,
bool &Val)
const {
735 if (
MI->getOpcode() != AMDGPU::COPY)
738 Reg =
MI->getOperand(1).getReg();
739 if (!
Reg.isVirtual())
741 if (!isLaneMaskReg(
Reg))
745 if (
MI->getOpcode() != MovOp)
748 if (!
MI->getOperand(1).isImm())
751 int64_t Imm =
MI->getOperand(1).getImm();
783 bool TerminatorsUseSCC =
false;
784 for (
auto I = InsertionPt,
E =
MBB.
end();
I !=
E; ++
I) {
787 if (TerminatorsUseSCC || DefsSCC)
791 if (!TerminatorsUseSCC)
794 while (InsertionPt !=
MBB.
begin()) {
810 unsigned PrevReg,
unsigned CurReg) {
812 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
814 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
816 if (PrevConstant && CurConstant) {
817 if (PrevVal == CurVal) {
829 unsigned PrevMaskedReg = 0;
830 unsigned CurMaskedReg = 0;
832 if (CurConstant && CurVal) {
833 PrevMaskedReg = PrevReg;
843 if (PrevConstant && PrevVal) {
844 CurMaskedReg = CurReg;
853 if (PrevConstant && !PrevVal) {
856 }
else if (CurConstant && !CurVal) {
859 }
else if (PrevConstant && PrevVal) {
866 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
pred_range predecessors(BasicBlock *BB)
bool hasProperty(Property P) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB)
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
FunctionPass * createSILowerI1CopiesPass()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void initializeSILowerI1CopiesPass(PassRegistry &)
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
iterator_range< iterator > terminators()
DomTreeNodeBase * getIDom() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
unsigned const TargetRegisterInfo * TRI
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void push_back(MachineInstr *MI)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const HexagonInstrInfo * TII
MachineOperand class - Representation of each machine instruction operand.
INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, "SI Lower i1 Copies", false, false) INITIALIZE_PASS_END(SILowerI1Copies
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static unsigned createLaneMaskReg(MachineFunction &MF)
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
iterator find(const_arg_type_t< KeyT > Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
iterator_range< pred_iterator > predecessors()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
iterator_range< succ_iterator > successors()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
Base class for the actual dominator tree node.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class for SSA formation on a set of values defined in multiple blocks.
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
A Use represents the edge between a Value definition and its users.