Go to the documentation of this file.
17 #define DEBUG_TYPE "si-shrink-instructions"
20 "Number of 64-bit instruction reduced to 32-bit.");
22 "Number of literal constants folded into 32-bit instructions.");
40 StringRef getPassName()
const override {
return "SI Shrink Instructions"; }
51 "SI Shrink Instructions",
false,
false)
53 char SIShrinkInstructions::
ID = 0;
56 return new SIShrinkInstructions();
74 if (
Def &&
Def->isMoveImmediate()) {
76 bool ConstantFolded =
false;
78 if (
TII->isOperandLegal(
MI, Src0Idx, &MovSrc)) {
82 ConstantFolded =
true;
83 }
else if (MovSrc.
isFI()) {
85 ConstantFolded =
true;
89 ConstantFolded =
true;
95 Def->eraseFromParent();
96 ++NumLiteralConstantsFolded;
104 if (TryToCommute &&
MI.isCommutable()) {
105 if (
TII->commuteInstruction(
MI)) {
110 TII->commuteInstruction(
MI);
119 !
TII->isInlineConstant(*Src.getParent(),
120 Src.getParent()->getOperandNo(&Src));
125 !
TII->isInlineConstant(*Src.getParent(),
126 Src.getParent()->getOperandNo(&Src));
134 return !
TII->isInlineConstant(Src);
139 return !
TII->isInlineConstant(Src);
149 int32_t &ReverseImm) {
150 if (!
isInt<32>(Src.getImm()) ||
TII->isInlineConstant(Src))
153 ReverseImm = reverseBits<int32_t>(
static_cast<int32_t
>(Src.getImm()));
154 return ReverseImm >= -16 && ReverseImm <= 64;
161 for (
unsigned i =
MI.getDesc().getNumOperands() +
162 MI.getDesc().getNumImplicitUses() +
163 MI.getDesc().getNumImplicitDefs(),
e =
MI.getNumOperands();
174 if (!
MI.getOperand(0).isReg())
175 TII->commuteInstruction(
MI,
false, 0, 1);
192 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
196 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
197 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
200 MI.setDesc(
TII->get(SOPKOpc));
217 if (!
Info ||
Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
226 unsigned NewAddrDwords =
Info->VAddrDwords;
229 if (
Info->VAddrDwords == 2) {
230 RC = &AMDGPU::VReg_64RegClass;
231 }
else if (
Info->VAddrDwords == 3) {
232 RC = &AMDGPU::VReg_96RegClass;
233 }
else if (
Info->VAddrDwords == 4) {
234 RC = &AMDGPU::VReg_128RegClass;
235 }
else if (
Info->VAddrDwords == 5) {
236 RC = &AMDGPU::VReg_160RegClass;
237 }
else if (
Info->VAddrDwords == 6) {
238 RC = &AMDGPU::VReg_192RegClass;
239 }
else if (
Info->VAddrDwords == 7) {
240 RC = &AMDGPU::VReg_224RegClass;
241 }
else if (
Info->VAddrDwords == 8) {
242 RC = &AMDGPU::VReg_256RegClass;
244 RC = &AMDGPU::VReg_512RegClass;
248 unsigned VgprBase = 0;
250 bool IsKill = NewAddrDwords ==
Info->VAddrDwords;
251 for (
unsigned i = 0;
i <
Info->VAddrDwords; ++
i) {
253 unsigned Vgpr =
TRI.getHWRegIndex(
Op.getReg());
257 }
else if (VgprBase +
i != Vgpr)
266 if (VgprBase + NewAddrDwords > 256)
273 unsigned TFEVal = (TFEIdx == -1) ? 0 :
MI.getOperand(TFEIdx).getImm();
274 unsigned LWEVal = (LWEIdx == -1) ? 0 :
MI.getOperand(LWEIdx).getImm();
276 if (TFEVal || LWEVal) {
278 for (
unsigned i = LWEIdx + 1,
e =
MI.getNumOperands();
i !=
e; ++
i) {
279 if (
MI.getOperand(
i).isReg() &&
MI.getOperand(
i).isTied() &&
280 MI.getOperand(
i).isImplicit()) {
284 "found more than one tied implicit operand when expecting only 1");
286 MI.untieRegOperand(ToUntie);
293 Info->VDataDwords, NewAddrDwords);
294 MI.setDesc(
TII->get(NewOpcode));
296 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
297 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
299 for (
unsigned i = 1;
i <
Info->VAddrDwords; ++
i)
300 MI.removeOperand(VAddr0Idx + 1);
305 ToUntie - (
Info->VAddrDwords - 1));
318 unsigned Opc =
MI.getOpcode();
325 if (!SrcImm->
isImm() ||
332 if (Opc == AMDGPU::S_AND_B32) {
335 Opc = AMDGPU::S_BITSET0_B32;
338 Opc = AMDGPU::S_ANDN2_B32;
340 }
else if (Opc == AMDGPU::S_OR_B32) {
343 Opc = AMDGPU::S_BITSET1_B32;
346 Opc = AMDGPU::S_ORN2_B32;
348 }
else if (Opc == AMDGPU::S_XOR_B32) {
351 Opc = AMDGPU::S_XNOR_B32;
365 const bool IsUndef = SrcReg->
isUndef();
366 const bool IsKill = SrcReg->
isKill();
367 MI.setDesc(
TII->get(Opc));
368 if (Opc == AMDGPU::S_BITSET0_B32 ||
369 Opc == AMDGPU::S_BITSET1_B32) {
372 MI.getOperand(2).ChangeToRegister(Dest->
getReg(),
false,
375 MI.tieOperands(0, 2);
394 if (
Reg.isPhysical() && MO.getReg().isPhysical()) {
397 }
else if (MO.getReg() ==
Reg &&
Reg.isVirtual()) {
423 if (
Reg.isPhysical()) {
426 Sub =
TRI.getSubRegFromChannel(
I +
TRI.getChannelFromSubReg(Sub));
434 for (
unsigned i =
MI.getDesc().getNumOperands() +
435 MI.getDesc().getNumImplicitUses() +
436 MI.getDesc().getNumImplicitDefs(),
e =
MI.getNumOperands();
442 TII->get(AMDGPU::IMPLICIT_DEF),
Op.getReg());
445 MI.eraseFromParent();
481 unsigned Size =
TII->getOpSize(MovT, 0) / 4;
490 const unsigned SearchLimit = 16;
492 bool KilledT =
false;
495 Iter !=
E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
500 if ((MovY->
getOpcode() != AMDGPU::V_MOV_B32_e32 &&
532 (
I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
533 I->getOpcode() != AMDGPU::COPY) ||
534 I->getOperand(0).getReg() !=
X ||
535 I->getOperand(0).getSubReg() != Xsub) {
543 if (Size > 1 && (
I->getNumImplicitOperands() > (
I->isCopy() ? 0U : 1U)))
552 LLVM_DEBUG(
dbgs() <<
"Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
554 for (
unsigned I = 0;
I < Size; ++
I) {
560 TII->get(AMDGPU::V_SWAP_B32))
600 unsigned VCCReg =
ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
602 std::vector<unsigned> I1Defs;
613 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
623 if (Src.isImm() &&
MI.getOperand(0).getReg().isPhysical()) {
626 MI.setDesc(
TII->get(AMDGPU::V_BFREV_B32_e32));
627 Src.setImm(ReverseImm);
633 if (
ST.hasSwap() && (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
634 MI.getOpcode() == AMDGPU::COPY)) {
636 Next = NextMI->getIterator();
646 if (
MI.getOpcode() == AMDGPU::S_ADD_I32 ||
647 MI.getOpcode() == AMDGPU::S_MUL_I32) {
653 if (
TII->commuteInstruction(
MI,
false, 1, 2))
668 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_ADD_I32) ?
669 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
671 MI.setDesc(
TII->get(Opc));
672 MI.tieOperands(0, 1);
678 if (
MI.isCompare() &&
TII->isSOPC(
MI)) {
684 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
688 if (Src.isImm() && Dst.getReg().isPhysical()) {
691 MI.setDesc(
TII->get(AMDGPU::S_MOVK_I32));
693 MI.setDesc(
TII->get(AMDGPU::S_BREV_B32));
694 Src.setImm(ReverseImm);
702 if (
MI.getOpcode() == AMDGPU::S_AND_B32 ||
703 MI.getOpcode() == AMDGPU::S_OR_B32 ||
704 MI.getOpcode() == AMDGPU::S_XOR_B32) {
709 if (
TII->isMIMG(
MI.getOpcode()) &&
717 if (!
TII->hasVALU32BitEncoding(
MI.getOpcode()))
723 if (!
MI.isCommutable() || !
TII->commuteInstruction(
MI) ||
730 if (
TII->isVOPC(Op32)) {
749 if (DstReg != VCCReg)
754 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
758 TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
772 AMDGPU::OpName::sdst);
777 if (SDst->
getReg() != VCCReg) {
786 AMDGPU::OpName::src2);
787 if (Src2 && Src2->
getReg() != VCCReg) {
801 ++NumInstructionsShrunk;
807 if (SDst && SDst->
isDead())
810 MI.eraseFromParent();
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
bool hasProperty(Property P) const
This is an optimization pass for GlobalISel generic memory operations.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
const GlobalValue * getGlobal() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
void setIsKill(bool Val=true)
Reg
All possible values of the reg field in the ModR/M byte.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
void setImm(int64_t immVal)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, Register Reg, unsigned SubReg, const SIRegisterInfo &TRI)
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
static void dropInstructionKeepingImpDefs(MachineInstr &MI, const SIInstrInfo *TII)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
A pair composed of a register and a sub-register index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
const HexagonInstrInfo * TII
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Describe properties that are true of each instruction in the target description file.
MachineOperand class - Representation of each machine instruction operand.
unsigned M0(unsigned Val)
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
Analysis containing CSE Info
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
constexpr bool isInt< 32 >(int64_t x)
void setIsDead(bool Val=true)
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
constexpr bool isUInt< 16 >(uint64_t x)
unsigned getTargetFlags() const
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
constexpr bool any() const
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
constexpr bool isUInt< 32 >(uint64_t x)
TargetInstrInfo::RegSubRegPair RegSubRegPair
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
Register getReg() const
getReg - Returns the register number.
instr_iterator instr_end()
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
FunctionPass * createSIShrinkInstructionsPass()
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
self_iterator getIterator()
const MachineBasicBlock * getParent() const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
constexpr bool isInt< 16 >(int64_t x)
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
Iterator for intrusive lists based on ilist_node.
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
A range adaptor for a pair of iterators.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
FunctionPass class - This class is used to implement most global optimizations.
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
MachineOperand * findRegisterDefOperand(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.