48#define DEBUG_TYPE "gcn-dpp-combine"
50STATISTIC(NumDPPMovsCombined,
"Number of DPP moves combined.");
66 bool IsShrinkable)
const;
70 bool IsShrinkable)
const;
75 int64_t Mask = -1)
const;
97 .
set(MachineFunctionProperties::Property::IsSSA);
101 int getDPPOp(
unsigned Op,
bool IsShrinkable)
const;
109char GCNDPPCombine::
ID = 0;
114 return new GCNDPPCombine();
118 unsigned Op =
MI.getOpcode();
119 if (!
TII->isVOP3(
Op)) {
122 if (!
TII->hasVALU32BitEncoding(
Op)) {
130 if (
const auto *SDst =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst)) {
135 if (!
MRI->use_nodbg_empty(SDst->getReg()))
140 if (!hasNoImmOrEqual(
MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
141 !hasNoImmOrEqual(
MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
142 !hasNoImmOrEqual(
MI, AMDGPU::OpName::clamp, 0) ||
143 !hasNoImmOrEqual(
MI, AMDGPU::OpName::omod, 0)) {
150int GCNDPPCombine::getDPPOp(
unsigned Op,
bool IsShrinkable)
const {
157 if (DPP32 != -1 &&
TII->pseudoToMCOpcode(DPP32) != -1)
160 if (
ST->hasVOP3DPP())
162 if (DPP64 != -1 &&
TII->pseudoToMCOpcode(DPP64) != -1)
176 switch(
Def->getOpcode()) {
178 case AMDGPU::IMPLICIT_DEF:
181 case AMDGPU::V_MOV_B32_e32:
182 case AMDGPU::V_MOV_B64_PSEUDO:
183 case AMDGPU::V_MOV_B64_e32:
184 case AMDGPU::V_MOV_B64_e64: {
185 auto &Op1 =
Def->getOperand(1);
198 bool IsShrinkable)
const {
200 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
201 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
203 bool HasVOP3DPP =
ST->hasVOP3DPP();
205 auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
212 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
213 assert(RowMaskOpnd && RowMaskOpnd->isImm());
214 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
215 assert(BankMaskOpnd && BankMaskOpnd->isImm());
216 const bool MaskAllLanes =
217 RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
220 !(
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
221 TII->isVOPC(OrigOpE32)))) &&
222 "VOPC cannot form DPP unless mask is full");
231 if (
auto *Dst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
235 if (
auto *SDst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
236 if (
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
245 assert(OldIdx == NumOperands);
249 TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
255 }
else if (
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
256 TII->isVOPC(OrigOpE32))) {
261 LLVM_DEBUG(
dbgs() <<
" failed: no old operand in DPP instruction,"
267 if (
auto *Mod0 =
TII->getNamedOperand(OrigMI,
268 AMDGPU::OpName::src0_modifiers)) {
270 AMDGPU::OpName::src0_modifiers));
273 DPPInst.addImm(Mod0->getImm());
279 auto *Src0 =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
281 if (!
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
287 DPPInst->getOperand(NumOperands).setIsKill(
false);
290 if (
auto *Mod1 =
TII->getNamedOperand(OrigMI,
291 AMDGPU::OpName::src1_modifiers)) {
293 AMDGPU::OpName::src1_modifiers));
296 DPPInst.addImm(Mod1->getImm());
302 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
304 if (!
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
313 TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
318 DPPInst.addImm(Mod2->getImm());
321 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
323 if (!
TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
324 !
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
333 auto *ClampOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
335 DPPInst.addImm(ClampOpr->getImm());
337 auto *VdstInOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
340 DPPInst.add(*VdstInOpr);
342 auto *OmodOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
344 DPPInst.addImm(OmodOpr->getImm());
349 TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
350 auto OpSel = OpSelOpr->getImm();
357 DPPInst.addImm(OpSel);
359 if (
auto *OpSelHiOpr =
360 TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
361 auto OpSelHi = OpSelHiOpr->getImm();
364 assert(Src2 &&
"Expected vop3p with 3 operands");
366 LLVM_DEBUG(
dbgs() <<
" failed: op_sel_hi must be all set to one\n");
371 DPPInst.addImm(OpSelHi);
373 auto *NegOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
375 DPPInst.addImm(NegOpr->getImm());
377 auto *NegHiOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
379 DPPInst.addImm(NegHiOpr->getImm());
382 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
383 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
384 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
385 DPPInst.addImm(CombBCZ ? 1 : 0);
389 DPPInst.getInstr()->eraseFromParent();
393 return DPPInst.getInstr();
400 case AMDGPU::V_ADD_U32_e32:
401 case AMDGPU::V_ADD_U32_e64:
402 case AMDGPU::V_ADD_CO_U32_e32:
403 case AMDGPU::V_ADD_CO_U32_e64:
404 case AMDGPU::V_OR_B32_e32:
405 case AMDGPU::V_OR_B32_e64:
406 case AMDGPU::V_SUBREV_U32_e32:
407 case AMDGPU::V_SUBREV_U32_e64:
408 case AMDGPU::V_SUBREV_CO_U32_e32:
409 case AMDGPU::V_SUBREV_CO_U32_e64:
410 case AMDGPU::V_MAX_U32_e32:
411 case AMDGPU::V_MAX_U32_e64:
412 case AMDGPU::V_XOR_B32_e32:
413 case AMDGPU::V_XOR_B32_e64:
414 if (OldOpnd->
getImm() == 0)
417 case AMDGPU::V_AND_B32_e32:
418 case AMDGPU::V_AND_B32_e64:
419 case AMDGPU::V_MIN_U32_e32:
420 case AMDGPU::V_MIN_U32_e64:
422 std::numeric_limits<uint32_t>::max())
425 case AMDGPU::V_MIN_I32_e32:
426 case AMDGPU::V_MIN_I32_e64:
427 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
428 std::numeric_limits<int32_t>::max())
431 case AMDGPU::V_MAX_I32_e32:
432 case AMDGPU::V_MAX_I32_e64:
433 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
434 std::numeric_limits<int32_t>::min())
437 case AMDGPU::V_MUL_I32_I24_e32:
438 case AMDGPU::V_MUL_I32_I24_e64:
439 case AMDGPU::V_MUL_U32_U24_e32:
440 case AMDGPU::V_MUL_U32_U24_e64:
441 if (OldOpnd->
getImm() == 1)
450 MachineOperand *OldOpndValue,
bool CombBCZ,
bool IsShrinkable)
const {
452 if (!CombBCZ && OldOpndValue && OldOpndValue->
isImm()) {
453 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
454 if (!Src1 || !Src1->isReg()) {
455 LLVM_DEBUG(
dbgs() <<
" failed: no src1 or it isn't a register\n");
459 LLVM_DEBUG(
dbgs() <<
" failed: old immediate isn't an identity\n");
463 auto MovDst =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
470 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
475bool GCNDPPCombine::hasNoImmOrEqual(
MachineInstr &
MI,
unsigned OpndName,
476 int64_t
Value, int64_t Mask)
const {
477 auto *
Imm =
TII->getNamedOperand(
MI, OpndName);
482 return (
Imm->getImm() & Mask) ==
Value;
485bool GCNDPPCombine::combineDPPMov(
MachineInstr &MovMI)
const {
487 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
488 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
491 auto *DstOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
492 assert(DstOpnd && DstOpnd->isReg());
493 auto DPPMovReg = DstOpnd->getReg();
494 if (DPPMovReg.isPhysical()) {
504 if (MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
505 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp) {
506 auto *
DppCtrl =
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
516 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
517 assert(RowMaskOpnd && RowMaskOpnd->isImm());
518 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
519 assert(BankMaskOpnd && BankMaskOpnd->isImm());
520 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
521 BankMaskOpnd->getImm() == 0xF;
523 auto *BCZOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
524 assert(BCZOpnd && BCZOpnd->isImm());
525 bool BoundCtrlZero = BCZOpnd->getImm();
527 auto *OldOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
528 auto *SrcOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
530 assert(SrcOpnd && SrcOpnd->isReg());
536 auto *
const OldOpndValue = getOldOpndValue(*OldOpnd);
541 assert(!OldOpndValue || OldOpndValue->
isImm() || OldOpndValue == OldOpnd);
543 bool CombBCZ =
false;
545 if (MaskAllLanes && BoundCtrlZero) {
548 if (!OldOpndValue || !OldOpndValue->
isImm()) {
553 if (OldOpndValue->
getImm() == 0) {
558 }
else if (BoundCtrlZero) {
561 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
570 dbgs() << *OldOpndValue;
571 dbgs() <<
", bound_ctrl=" << CombBCZ <<
'\n');
577 if (CombBCZ && OldOpndValue) {
580 MRI->createVirtualRegister(RC));
582 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.
Reg);
583 DPPMIs.push_back(UndefInst.getInstr());
586 OrigMIs.push_back(&MovMI);
587 bool Rollback =
true;
590 for (
auto &
Use :
MRI->use_nodbg_operands(DPPMovReg)) {
594 while (!
Uses.empty()) {
598 auto &OrigMI = *
Use->getParent();
603 "There should not be e32 True16 instructions pre-RA");
604 if (OrigOp == AMDGPU::REG_SEQUENCE) {
606 unsigned FwdSubReg = 0;
615 for (OpNo = 1; OpNo <
E; OpNo += 2) {
625 for (
auto &
Op :
MRI->use_nodbg_operands(FwdReg)) {
626 if (
Op.getSubReg() == FwdSubReg)
629 RegSeqWithOpNos[&OrigMI].push_back(OpNo);
633 bool IsShrinkable = isShrinkable(OrigMI);
634 if (!(IsShrinkable ||
635 ((
TII->isVOP3P(OrigOp) ||
TII->isVOPC(OrigOp) ||
636 TII->isVOP3(OrigOp)) &&
638 TII->isVOP1(OrigOp) ||
TII->isVOP2(OrigOp))) {
647 auto *Src0 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
648 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
654 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
655 assert(Src0 &&
"Src1 without Src0?");
656 if ((
Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
657 (Src2 && Src2->isIdenticalTo(*Src0)))) ||
658 (
Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
659 (Src2 && Src2->isIdenticalTo(*Src1))))) {
663 <<
" failed: DPP register is used more than once per instruction\n");
669 if (
auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
670 OldOpndValue, CombBCZ, IsShrinkable)) {
671 DPPMIs.push_back(DPPInst);
678 BB->insert(OrigMI, NewMI);
679 if (
TII->commuteInstruction(*NewMI)) {
682 createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
684 DPPMIs.push_back(DPPInst);
689 NewMI->eraseFromParent();
693 OrigMIs.push_back(&OrigMI);
696 Rollback |= !
Uses.empty();
698 for (
auto *
MI : *(Rollback? &DPPMIs : &OrigMIs))
699 MI->eraseFromParent();
702 for (
auto &S : RegSeqWithOpNos) {
703 if (
MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
704 S.first->eraseFromParent();
707 while (!S.second.empty())
708 S.first->getOperand(S.second.pop_back_val()).setIsUndef();
721 TII =
ST->getInstrInfo();
723 bool Changed =
false;
724 for (
auto &
MBB : MF) {
726 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(
MI)) {
728 ++NumDPPMovsCombined;
729 }
else if (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
730 MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
731 if (
ST->hasDPALU_DPP() && combineDPPMov(
MI)) {
733 ++NumDPPMovsCombined;
737 if (M && combineDPPMov(*M))
738 ++NumDPPMovsCombined;
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
TargetInstrInfo::RegSubRegPair RegSubRegPair
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
unsigned getSize(const MachineInstr &MI) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READONLY int getDPPOp64(uint16_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
void initializeGCNDPPCombinePass(PassRegistry &)
FunctionPass * createGCNDPPCombinePass()
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
A pair composed of a register and a sub-register index.