48#define DEBUG_TYPE "gcn-dpp-combine"
50STATISTIC(NumDPPMovsCombined,
"Number of DPP moves combined.");
66 bool IsShrinkable)
const;
70 bool IsShrinkable)
const;
75 int64_t Mask = -1)
const;
97 .
set(MachineFunctionProperties::Property::IsSSA);
101 int getDPPOp(
unsigned Op,
bool IsShrinkable)
const;
109char GCNDPPCombine::
ID = 0;
114 return new GCNDPPCombine();
118 unsigned Op =
MI.getOpcode();
119 if (!
TII->isVOP3(
Op)) {
122 if (!
TII->hasVALU32BitEncoding(
Op)) {
130 if (
const auto *SDst =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst)) {
135 if (!
MRI->use_nodbg_empty(SDst->getReg()))
140 if (!hasNoImmOrEqual(
MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
141 !hasNoImmOrEqual(
MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
142 !hasNoImmOrEqual(
MI, AMDGPU::OpName::clamp, 0) ||
143 !hasNoImmOrEqual(
MI, AMDGPU::OpName::omod, 0)) {
150int GCNDPPCombine::getDPPOp(
unsigned Op,
bool IsShrinkable)
const {
157 if (DPP32 != -1 &&
TII->pseudoToMCOpcode(DPP32) != -1)
160 if (
ST->hasVOP3DPP())
162 if (DPP64 != -1 &&
TII->pseudoToMCOpcode(DPP64) != -1)
176 switch(
Def->getOpcode()) {
178 case AMDGPU::IMPLICIT_DEF:
181 case AMDGPU::V_MOV_B32_e32:
182 case AMDGPU::V_MOV_B64_PSEUDO:
183 case AMDGPU::V_MOV_B64_e32:
184 case AMDGPU::V_MOV_B64_e64: {
185 auto &Op1 =
Def->getOperand(1);
196 int16_t RegClass =
MI.getDesc().operands()[
Idx].RegClass;
201 return TRI->getRegSizeInBits(*
TRI->getRegClass(RegClass));
208 bool IsShrinkable)
const {
210 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
211 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
213 bool HasVOP3DPP =
ST->hasVOP3DPP();
215 auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
222 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
223 assert(RowMaskOpnd && RowMaskOpnd->isImm());
224 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
225 assert(BankMaskOpnd && BankMaskOpnd->isImm());
226 const bool MaskAllLanes =
227 RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
230 !(
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
231 TII->isVOPC(OrigOpE32)))) &&
232 "VOPC cannot form DPP unless mask is full");
241 if (
auto *Dst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
245 if (
auto *SDst =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
246 if (
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
255 assert(OldIdx == NumOperands);
259 TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
265 }
else if (
TII->isVOPC(DPPOp) || (
TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
266 TII->isVOPC(OrigOpE32))) {
271 LLVM_DEBUG(
dbgs() <<
" failed: no old operand in DPP instruction,"
277 auto *Mod0 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
280 AMDGPU::OpName::src0_modifiers));
283 DPPInst.addImm(Mod0->getImm());
289 auto *Src0 =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
291 int Src0Idx = NumOperands;
292 if (!
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
298 DPPInst->getOperand(NumOperands).setIsKill(
false);
301 auto *Mod1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers);
304 AMDGPU::OpName::src1_modifiers));
307 DPPInst.addImm(Mod1->getImm());
313 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
315 int OpNum = NumOperands;
319 if (!
ST->hasDPPSrc1SGPR()) {
322 "Src0 and Src1 operands should have the same size");
325 if (!
TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
334 auto *Mod2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers);
340 DPPInst.addImm(Mod2->getImm());
343 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
345 if (!
TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
346 !
TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
356 auto *ClampOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
358 DPPInst.addImm(ClampOpr->getImm());
360 auto *VdstInOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
363 DPPInst.add(*VdstInOpr);
365 auto *OmodOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
367 DPPInst.addImm(OmodOpr->getImm());
371 if (
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
376 if (Mod0 &&
TII->isVOP3(OrigMI) && !
TII->isVOP3P(OrigMI))
385 DPPInst.addImm(OpSel);
387 if (
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
395 assert(Src2 &&
"Expected vop3p with 3 operands");
397 LLVM_DEBUG(
dbgs() <<
" failed: op_sel_hi must be all set to one\n");
402 DPPInst.addImm(OpSelHi);
404 auto *NegOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
406 DPPInst.addImm(NegOpr->getImm());
408 auto *NegHiOpr =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
410 DPPInst.addImm(NegHiOpr->getImm());
413 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
414 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
415 DPPInst.add(*
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
416 DPPInst.addImm(CombBCZ ? 1 : 0);
420 DPPInst.getInstr()->eraseFromParent();
424 return DPPInst.getInstr();
431 case AMDGPU::V_ADD_U32_e32:
432 case AMDGPU::V_ADD_U32_e64:
433 case AMDGPU::V_ADD_CO_U32_e32:
434 case AMDGPU::V_ADD_CO_U32_e64:
435 case AMDGPU::V_OR_B32_e32:
436 case AMDGPU::V_OR_B32_e64:
437 case AMDGPU::V_SUBREV_U32_e32:
438 case AMDGPU::V_SUBREV_U32_e64:
439 case AMDGPU::V_SUBREV_CO_U32_e32:
440 case AMDGPU::V_SUBREV_CO_U32_e64:
441 case AMDGPU::V_MAX_U32_e32:
442 case AMDGPU::V_MAX_U32_e64:
443 case AMDGPU::V_XOR_B32_e32:
444 case AMDGPU::V_XOR_B32_e64:
445 if (OldOpnd->
getImm() == 0)
448 case AMDGPU::V_AND_B32_e32:
449 case AMDGPU::V_AND_B32_e64:
450 case AMDGPU::V_MIN_U32_e32:
451 case AMDGPU::V_MIN_U32_e64:
453 std::numeric_limits<uint32_t>::max())
456 case AMDGPU::V_MIN_I32_e32:
457 case AMDGPU::V_MIN_I32_e64:
458 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
459 std::numeric_limits<int32_t>::max())
462 case AMDGPU::V_MAX_I32_e32:
463 case AMDGPU::V_MAX_I32_e64:
464 if (
static_cast<int32_t
>(OldOpnd->
getImm()) ==
465 std::numeric_limits<int32_t>::min())
468 case AMDGPU::V_MUL_I32_I24_e32:
469 case AMDGPU::V_MUL_I32_I24_e64:
470 case AMDGPU::V_MUL_U32_U24_e32:
471 case AMDGPU::V_MUL_U32_U24_e64:
472 if (OldOpnd->
getImm() == 1)
481 MachineOperand *OldOpndValue,
bool CombBCZ,
bool IsShrinkable)
const {
483 if (!CombBCZ && OldOpndValue && OldOpndValue->
isImm()) {
484 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
485 if (!Src1 || !Src1->isReg()) {
486 LLVM_DEBUG(
dbgs() <<
" failed: no src1 or it isn't a register\n");
490 LLVM_DEBUG(
dbgs() <<
" failed: old immediate isn't an identity\n");
494 auto MovDst =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
501 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
506bool GCNDPPCombine::hasNoImmOrEqual(
MachineInstr &
MI,
unsigned OpndName,
507 int64_t
Value, int64_t Mask)
const {
508 auto *
Imm =
TII->getNamedOperand(
MI, OpndName);
513 return (
Imm->getImm() & Mask) ==
Value;
516bool GCNDPPCombine::combineDPPMov(
MachineInstr &MovMI)
const {
518 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp ||
519 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
522 auto *DstOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
523 assert(DstOpnd && DstOpnd->isReg());
524 auto DPPMovReg = DstOpnd->getReg();
525 if (DPPMovReg.isPhysical()) {
535 if (MovMI.
getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
536 MovMI.
getOpcode() == AMDGPU::V_MOV_B64_dpp) {
537 auto *
DppCtrl =
TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
547 auto *RowMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
548 assert(RowMaskOpnd && RowMaskOpnd->isImm());
549 auto *BankMaskOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
550 assert(BankMaskOpnd && BankMaskOpnd->isImm());
551 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
552 BankMaskOpnd->getImm() == 0xF;
554 auto *BCZOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
555 assert(BCZOpnd && BCZOpnd->isImm());
556 bool BoundCtrlZero = BCZOpnd->getImm();
558 auto *OldOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
559 auto *SrcOpnd =
TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
561 assert(SrcOpnd && SrcOpnd->isReg());
567 auto *
const OldOpndValue = getOldOpndValue(*OldOpnd);
572 assert(!OldOpndValue || OldOpndValue->
isImm() || OldOpndValue == OldOpnd);
574 bool CombBCZ =
false;
576 if (MaskAllLanes && BoundCtrlZero) {
579 if (!OldOpndValue || !OldOpndValue->
isImm()) {
584 if (OldOpndValue->
getImm() == 0) {
589 }
else if (BoundCtrlZero) {
592 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
601 dbgs() << *OldOpndValue;
602 dbgs() <<
", bound_ctrl=" << CombBCZ <<
'\n');
608 if (CombBCZ && OldOpndValue) {
611 MRI->createVirtualRegister(RC));
613 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.
Reg);
614 DPPMIs.push_back(UndefInst.getInstr());
617 OrigMIs.push_back(&MovMI);
618 bool Rollback =
true;
621 for (
auto &
Use :
MRI->use_nodbg_operands(DPPMovReg)) {
625 while (!
Uses.empty()) {
629 auto &OrigMI = *
Use->getParent();
634 "There should not be e32 True16 instructions pre-RA");
635 if (OrigOp == AMDGPU::REG_SEQUENCE) {
637 unsigned FwdSubReg = 0;
646 for (OpNo = 1; OpNo < E; OpNo += 2) {
656 for (
auto &
Op :
MRI->use_nodbg_operands(FwdReg)) {
657 if (
Op.getSubReg() == FwdSubReg)
660 RegSeqWithOpNos[&OrigMI].push_back(OpNo);
664 bool IsShrinkable = isShrinkable(OrigMI);
665 if (!(IsShrinkable ||
666 ((
TII->isVOP3P(OrigOp) ||
TII->isVOPC(OrigOp) ||
667 TII->isVOP3(OrigOp)) &&
669 TII->isVOP1(OrigOp) ||
TII->isVOP2(OrigOp))) {
678 auto *Src0 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
679 auto *Src1 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
685 auto *Src2 =
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
686 assert(Src0 &&
"Src1 without Src0?");
687 if ((
Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
688 (Src2 && Src2->isIdenticalTo(*Src0)))) ||
689 (
Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
690 (Src2 && Src2->isIdenticalTo(*Src1))))) {
694 <<
" failed: DPP register is used more than once per instruction\n");
700 if (
auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
701 OldOpndValue, CombBCZ, IsShrinkable)) {
702 DPPMIs.push_back(DPPInst);
709 BB->
insert(OrigMI, NewMI);
710 if (
TII->commuteInstruction(*NewMI)) {
713 createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
715 DPPMIs.push_back(DPPInst);
720 NewMI->eraseFromParent();
724 OrigMIs.push_back(&OrigMI);
727 Rollback |= !
Uses.empty();
729 for (
auto *
MI : *(Rollback? &DPPMIs : &OrigMIs))
730 MI->eraseFromParent();
733 for (
auto &S : RegSeqWithOpNos) {
734 if (
MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
735 S.first->eraseFromParent();
738 while (!S.second.empty())
739 S.first->getOperand(S.second.pop_back_val()).setIsUndef();
752 TII =
ST->getInstrInfo();
754 bool Changed =
false;
755 for (
auto &
MBB : MF) {
757 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(
MI)) {
759 ++NumDPPMovsCombined;
760 }
else if (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
761 MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
762 if (
ST->hasDPALU_DPP() && combineDPPMov(
MI)) {
764 ++NumDPPMovsCombined;
768 if (M && combineDPPMov(*M))
769 ++NumDPPMovsCombined;
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd)
static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, MachineRegisterInfo &MRI)
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
TargetInstrInfo::RegSubRegPair RegSubRegPair
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
unsigned getSize(const MachineInstr &MI) const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
void insert(mop_iterator InsertBefore, ArrayRef< MachineOperand > Ops)
Inserts Ops BEFORE It. Can untie/retie tied operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(unsigned DC)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getDPPOp32(uint16_t Opcode)
bool isTrue16Inst(unsigned Opc)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
LLVM_READONLY int getDPPOp64(uint16_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P, const TargetRegisterClass &TRC, MachineRegisterInfo &MRI)
Returns true if a reg:subreg pair P has a TRC class.
void initializeGCNDPPCombinePass(PassRegistry &)
FunctionPass * createGCNDPPCombinePass()
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
A pair composed of a register and a sub-register index.