33#define DEBUG_TYPE "si-i1-copies"
47 bool IsWave32 =
false;
82 bool lowerCopiesFromI1();
84 bool lowerCopiesToI1();
85 bool isConstantLaneMask(
Register Reg,
bool &Val)
const;
88 unsigned DstReg,
unsigned PrevReg,
unsigned CurReg);
93 return Reg.isVirtual() &&
MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
96 bool isLaneMaskReg(
unsigned Reg)
const {
97 return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
98 TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
99 ST->getWavefrontSize();
125class PhiIncomingAnalysis {
143 return ReachableMap.
find(&
MBB)->second;
151 ReachableMap.
clear();
152 ReachableOrdered.
clear();
153 Predecessors.
clear();
161 if (
MBB == &DefBlock) {
162 ReachableMap[&DefBlock] =
true;
175 while (!
Stack.empty()) {
185 bool HaveReachablePred =
false;
187 if (ReachableMap.
count(Pred)) {
188 HaveReachablePred =
true;
190 Stack.push_back(Pred);
193 if (!HaveReachablePred)
194 ReachableMap[
MBB] =
true;
195 if (HaveReachablePred) {
257 unsigned FoundLoopLevel = ~0
u;
265 : DT(DT), PDT(PDT) {}
269 CommonDominators.
clear();
272 VisitedPostDom =
nullptr;
273 FoundLoopLevel = ~0
u;
289 while (PDNode->
getBlock() != PostDom) {
290 if (PDNode->
getBlock() == VisitedPostDom)
294 if (FoundLoopLevel == Level)
312 if (!inLoopLevel(*Dom, LoopLevel,
Blocks)) {
318 if (!inLoopLevel(*Pred, LoopLevel,
Blocks))
327 auto DomIt = Visited.
find(&
MBB);
328 if (DomIt != Visited.
end() && DomIt->second <= LoopLevel)
337 void advanceLevel() {
340 if (!VisitedPostDom) {
341 VisitedPostDom = DefBlock;
342 VisitedDom = DefBlock;
343 Stack.push_back(DefBlock);
346 VisitedDom = CommonDominators.
back();
348 for (
unsigned i = 0; i < NextLevel.
size();) {
349 if (PDT.
dominates(VisitedPostDom, NextLevel[i])) {
350 Stack.push_back(NextLevel[i]);
352 NextLevel[i] = NextLevel.
back();
360 unsigned Level = CommonDominators.
size();
361 while (!
Stack.empty()) {
366 Visited[
MBB] = Level;
370 if (Succ == DefBlock) {
371 if (
MBB == VisitedPostDom)
372 FoundLoopLevel = std::min(FoundLoopLevel, Level + 1);
374 FoundLoopLevel = std::min(FoundLoopLevel, Level);
379 if (
MBB == VisitedPostDom)
382 Stack.push_back(Succ);
400char SILowerI1Copies::
ID = 0;
405 return new SILowerI1Copies();
411 return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
412 : &AMDGPU::SReg_64RegClass);
437 MachineFunctionProperties::Property::Selected))
442 DT = &getAnalysis<MachineDominatorTree>();
443 PDT = &getAnalysis<MachinePostDominatorTree>();
446 TII =
ST->getInstrInfo();
447 IsWave32 =
ST->isWave32();
450 ExecReg = AMDGPU::EXEC_LO;
451 MovOp = AMDGPU::S_MOV_B32;
452 AndOp = AMDGPU::S_AND_B32;
453 OrOp = AMDGPU::S_OR_B32;
454 XorOp = AMDGPU::S_XOR_B32;
455 AndN2Op = AMDGPU::S_ANDN2_B32;
456 OrN2Op = AMDGPU::S_ORN2_B32;
458 ExecReg = AMDGPU::EXEC;
459 MovOp = AMDGPU::S_MOV_B64;
460 AndOp = AMDGPU::S_AND_B64;
461 OrOp = AMDGPU::S_OR_B64;
462 XorOp = AMDGPU::S_XOR_B64;
463 AndN2Op = AMDGPU::S_ANDN2_B64;
464 OrN2Op = AMDGPU::S_ORN2_B64;
467 bool Changed =
false;
468 Changed |= lowerCopiesFromI1();
469 Changed |= lowerPhis();
470 Changed |= lowerCopiesToI1();
473 for (
unsigned Reg : ConstrainRegs)
474 MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
475 ConstrainRegs.clear();
484 unsigned Size =
TRI.getRegSizeInBits(Reg,
MRI);
489bool SILowerI1Copies::lowerCopiesFromI1() {
490 bool Changed =
false;
495 if (
MI.getOpcode() != AMDGPU::COPY)
500 if (!isVreg1(SrcReg))
503 if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
513 assert(!
MI.getOperand(0).getSubReg());
515 ConstrainRegs.insert(SrcReg);
526 MI->eraseFromParent();
532bool SILowerI1Copies::lowerPhis() {
534 LoopFinder LF(*DT, *PDT);
535 PhiIncomingAnalysis PIA(*PDT,
TII);
546 if (isVreg1(
MI.getOperand(0).getReg()))
550 if (Vreg1Phis.
empty())
556 if (&
MBB != PrevMBB) {
564 MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
565 : &AMDGPU::SReg_64RegClass);
568 for (
unsigned i = 1; i <
MI->getNumOperands(); i += 2) {
569 assert(i + 1 <
MI->getNumOperands());
570 Register IncomingReg =
MI->getOperand(i).getReg();
574 if (IncomingDef->
getOpcode() == AMDGPU::COPY) {
576 assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
578 }
else if (IncomingDef->
getOpcode() == AMDGPU::IMPLICIT_DEF) {
589 PhiRegisters.
insert(DstReg);
594 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
605 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
609 if (FoundLoopLevel) {
610 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater, IncomingBlocks);
612 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
615 IncomingUpdated.
back());
618 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
621 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
627 PIA.analyze(
MBB, IncomingBlocks);
632 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
634 if (PIA.isSource(IMBB)) {
643 for (
unsigned i = 0; i < IncomingRegs.
size(); ++i) {
644 if (!IncomingUpdated[i])
649 IMBB, getSaluInsertionAtEnd(IMBB), {}, IncomingUpdated[i],
655 if (NewReg != DstReg) {
656 MRI->replaceRegWith(NewReg, DstReg);
657 MI->eraseFromParent();
660 IncomingBlocks.
clear();
661 IncomingRegs.
clear();
662 IncomingUpdated.
clear();
667bool SILowerI1Copies::lowerCopiesToI1() {
668 bool Changed =
false;
670 LoopFinder LF(*DT, *PDT);
677 if (
MI.getOpcode() != AMDGPU::IMPLICIT_DEF &&
678 MI.getOpcode() != AMDGPU::COPY)
682 if (!isVreg1(DstReg))
687 if (
MRI->use_empty(DstReg)) {
694 MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
695 : &AMDGPU::SReg_64RegClass);
696 if (
MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
701 assert(!
MI.getOperand(1).getSubReg());
703 if (!SrcReg.
isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
704 assert(
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *
MRI) == 32);
709 MI.getOperand(1).setReg(TmpReg);
713 MI.getOperand(1).setIsKill(
false);
718 std::vector<MachineBasicBlock *> DomBlocks = {&
MBB};
724 unsigned FoundLoopLevel = LF.findLoop(PostDomBound);
725 if (FoundLoopLevel) {
728 LF.addLoopEntries(FoundLoopLevel,
SSAUpdater);
730 buildMergeLaneMasks(
MBB,
MI,
DL, DstReg,
737 MI->eraseFromParent();
743bool SILowerI1Copies::isConstantLaneMask(
Register Reg,
bool &Val)
const {
746 MI =
MRI->getUniqueVRegDef(Reg);
747 if (
MI->getOpcode() == AMDGPU::IMPLICIT_DEF)
750 if (
MI->getOpcode() != AMDGPU::COPY)
753 Reg =
MI->getOperand(1).getReg();
754 if (!
Reg.isVirtual())
756 if (!isLaneMaskReg(Reg))
760 if (
MI->getOpcode() != MovOp)
763 if (!
MI->getOperand(1).isImm())
766 int64_t
Imm =
MI->getOperand(1).getImm();
784 if (MO.isReg() && MO.getReg() == AMDGPU::SCC) {
798 bool TerminatorsUseSCC =
false;
799 for (
auto I = InsertionPt,
E =
MBB.
end();
I !=
E; ++
I) {
802 if (TerminatorsUseSCC || DefsSCC)
806 if (!TerminatorsUseSCC)
809 while (InsertionPt !=
MBB.
begin()) {
825 unsigned PrevReg,
unsigned CurReg) {
826 bool PrevVal =
false;
827 bool PrevConstant = isConstantLaneMask(PrevReg, PrevVal);
829 bool CurConstant = isConstantLaneMask(CurReg, CurVal);
831 if (PrevConstant && CurConstant) {
832 if (PrevVal == CurVal) {
844 unsigned PrevMaskedReg = 0;
845 unsigned CurMaskedReg = 0;
847 if (CurConstant && CurVal) {
848 PrevMaskedReg = PrevReg;
858 if (PrevConstant && PrevVal) {
859 CurMaskedReg = CurReg;
868 if (PrevConstant && !PrevVal) {
871 }
else if (CurConstant && !CurVal) {
874 }
else if (PrevConstant && PrevVal) {
881 .
addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void instrDefsUsesSCC(const MachineInstr &MI, bool &Def, bool &Use)
static unsigned createLaneMaskReg(MachineFunction &MF)
static bool isVRegCompatibleReg(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, Register Reg)
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
FunctionPass class - This class is used to implement most global optimizations.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
void push_back(MachineInstr *MI)
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) const
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Helper class for SSA formation on a set of values defined in multiple blocks.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createSILowerI1CopiesPass()
void initializeSILowerI1CopiesPass(PassRegistry &)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.