Go to the documentation of this file.
17 #define DEBUG_TYPE "si-shrink-instructions"
20 "Number of 64-bit instruction reduced to 32-bit.");
22 "Number of literal constants folded into 32-bit instructions.");
40 StringRef getPassName()
const override {
return "SI Shrink Instructions"; }
51 "SI Shrink Instructions",
false,
false)
53 char SIShrinkInstructions::
ID = 0;
56 return new SIShrinkInstructions();
74 if (
Def &&
Def->isMoveImmediate()) {
76 bool ConstantFolded =
false;
78 if (
TII->isOperandLegal(
MI, Src0Idx, &MovSrc)) {
82 ConstantFolded =
true;
83 }
else if (MovSrc.
isFI()) {
85 ConstantFolded =
true;
89 ConstantFolded =
true;
95 Def->eraseFromParent();
96 ++NumLiteralConstantsFolded;
104 if (TryToCommute &&
MI.isCommutable()) {
105 if (
TII->commuteInstruction(
MI)) {
110 TII->commuteInstruction(
MI);
134 return !
TII->isInlineConstant(Src);
139 return !
TII->isInlineConstant(Src);
149 int32_t &ReverseImm) {
153 ReverseImm = reverseBits<int32_t>(
static_cast<int32_t
>(Src.
getImm()));
154 return ReverseImm >= -16 && ReverseImm <= 64;
161 for (
unsigned i =
MI.getDesc().getNumOperands() +
162 MI.getDesc().getNumImplicitUses() +
163 MI.getDesc().getNumImplicitDefs(),
e =
MI.getNumOperands();
174 if (!
MI.getOperand(0).isReg())
175 TII->commuteInstruction(
MI,
false, 0, 1);
192 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
196 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
197 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
200 MI.setDesc(
TII->get(SOPKOpc));
217 if (!
Info ||
Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
226 unsigned NewAddrDwords =
Info->VAddrDwords;
229 if (
Info->VAddrDwords == 2) {
230 RC = &AMDGPU::VReg_64RegClass;
231 }
else if (
Info->VAddrDwords == 3) {
232 RC = &AMDGPU::VReg_96RegClass;
233 }
else if (
Info->VAddrDwords == 4) {
234 RC = &AMDGPU::VReg_128RegClass;
235 }
else if (
Info->VAddrDwords <= 8) {
236 RC = &AMDGPU::VReg_256RegClass;
239 RC = &AMDGPU::VReg_512RegClass;
243 unsigned VgprBase = 0;
245 bool IsKill = NewAddrDwords ==
Info->VAddrDwords;
246 for (
unsigned i = 0;
i <
Info->VAddrDwords; ++
i) {
248 unsigned Vgpr =
TRI.getHWRegIndex(
Op.getReg());
252 }
else if (VgprBase +
i != Vgpr)
261 if (VgprBase + NewAddrDwords > 256)
268 unsigned TFEVal = (TFEIdx == -1) ? 0 :
MI.getOperand(TFEIdx).getImm();
269 unsigned LWEVal = (LWEIdx == -1) ? 0 :
MI.getOperand(LWEIdx).getImm();
271 if (TFEVal || LWEVal) {
273 for (
unsigned i = LWEIdx + 1,
e =
MI.getNumOperands();
i !=
e; ++
i) {
274 if (
MI.getOperand(
i).isReg() &&
MI.getOperand(
i).isTied() &&
275 MI.getOperand(
i).isImplicit()) {
279 "found more than one tied implicit operand when expecting only 1");
281 MI.untieRegOperand(ToUntie);
288 Info->VDataDwords, NewAddrDwords);
289 MI.setDesc(
TII->get(NewOpcode));
291 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
292 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
294 for (
unsigned i = 1;
i <
Info->VAddrDwords; ++
i)
295 MI.RemoveOperand(VAddr0Idx + 1);
300 ToUntie - (
Info->VAddrDwords - 1));
313 unsigned Opc =
MI.getOpcode();
320 if (!SrcImm->
isImm() ||
327 if (Opc == AMDGPU::S_AND_B32) {
330 Opc = AMDGPU::S_BITSET0_B32;
333 Opc = AMDGPU::S_ANDN2_B32;
335 }
else if (Opc == AMDGPU::S_OR_B32) {
338 Opc = AMDGPU::S_BITSET1_B32;
341 Opc = AMDGPU::S_ORN2_B32;
343 }
else if (Opc == AMDGPU::S_XOR_B32) {
346 Opc = AMDGPU::S_XNOR_B32;
352 if ((Opc == AMDGPU::S_ANDN2_B32 || Opc == AMDGPU::S_ORN2_B32) &&
354 if (!
TII->commuteInstruction(
MI,
false, 1, 2))
366 const bool IsUndef = SrcReg->
isUndef();
367 const bool IsKill = SrcReg->
isKill();
368 MI.setDesc(
TII->get(Opc));
369 if (Opc == AMDGPU::S_BITSET0_B32 ||
370 Opc == AMDGPU::S_BITSET1_B32) {
373 MI.getOperand(2).ChangeToRegister(Dest->
getReg(),
false,
376 MI.tieOperands(0, 2);
395 if (
Reg.isPhysical() && MO.getReg().isPhysical()) {
398 }
else if (MO.getReg() ==
Reg &&
Reg.isVirtual()) {
424 if (
Reg.isPhysical()) {
427 Sub =
TRI.getSubRegFromChannel(
I +
TRI.getChannelFromSubReg(Sub));
435 for (
unsigned i =
MI.getDesc().getNumOperands() +
436 MI.getDesc().getNumImplicitUses() +
437 MI.getDesc().getNumImplicitDefs(),
e =
MI.getNumOperands();
443 TII->get(AMDGPU::IMPLICIT_DEF),
Op.getReg());
446 MI.eraseFromParent();
482 unsigned Size =
TII->getOpSize(MovT, 0) / 4;
491 const unsigned SearchLimit = 16;
493 bool KilledT =
false;
496 Iter !=
E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
501 if ((MovY->
getOpcode() != AMDGPU::V_MOV_B32_e32 &&
533 (
I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
534 I->getOpcode() != AMDGPU::COPY) ||
535 I->getOperand(0).getReg() !=
X ||
536 I->getOperand(0).getSubReg() != Xsub) {
544 if (
Size > 1 && (
I->getNumImplicitOperands() > (
I->isCopy() ? 0U : 1U)))
553 LLVM_DEBUG(
dbgs() <<
"Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
555 for (
unsigned I = 0;
I <
Size; ++
I) {
561 TII->get(AMDGPU::V_SWAP_B32))
601 unsigned VCCReg =
ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
603 std::vector<unsigned> I1Defs;
614 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
624 if (Src.
isImm() &&
MI.getOperand(0).getReg().isPhysical()) {
627 MI.setDesc(
TII->get(AMDGPU::V_BFREV_B32_e32));
634 if (
ST.hasSwap() && (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
635 MI.getOpcode() == AMDGPU::COPY)) {
637 Next = NextMI->getIterator();
647 if (
MI.getOpcode() == AMDGPU::S_ADD_I32 ||
648 MI.getOpcode() == AMDGPU::S_MUL_I32) {
654 if (
TII->commuteInstruction(
MI,
false, 1, 2))
669 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_ADD_I32) ?
670 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
672 MI.setDesc(
TII->get(Opc));
673 MI.tieOperands(0, 1);
679 if (
MI.isCompare() &&
TII->isSOPC(
MI)) {
685 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
692 MI.setDesc(
TII->get(AMDGPU::S_MOVK_I32));
694 MI.setDesc(
TII->get(AMDGPU::S_BREV_B32));
703 if (
MI.getOpcode() == AMDGPU::S_AND_B32 ||
704 MI.getOpcode() == AMDGPU::S_OR_B32 ||
705 MI.getOpcode() == AMDGPU::S_XOR_B32) {
710 if (
TII->isMIMG(
MI.getOpcode()) &&
718 if (!
TII->hasVALU32BitEncoding(
MI.getOpcode()))
724 if (!
MI.isCommutable() || !
TII->commuteInstruction(
MI) ||
731 if (!
TII->hasVALU32BitEncoding(
MI.getOpcode()))
736 if (
TII->isVOPC(Op32)) {
750 if (DstReg != VCCReg)
754 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
758 TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
772 AMDGPU::OpName::sdst);
776 AMDGPU::OpName::src2);
781 if (SDst->
getReg() != VCCReg) {
789 if (Src2 && Src2->
getReg() != VCCReg) {
803 ++NumInstructionsShrunk;
808 MI.eraseFromParent();
static MachineInstr * matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, const SIInstrInfo *TII)
static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
bool hasProperty(Property P) const
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src)
const GlobalValue * getGlobal() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void setIsKill(bool Val=true)
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
void setImm(int64_t immVal)
static bool instAccessReg(iterator_range< MachineInstr::const_mop_iterator > &&R, Register Reg, unsigned SubReg, const SIRegisterInfo &TRI)
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
static void dropInstructionKeepingImpDefs(MachineInstr &MI, const SIInstrInfo *TII)
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
A pair composed of a register and a sub-register index.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
LLVM_READONLY int getVOPe32(uint16_t Opcode)
const HexagonInstrInfo * TII
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Describe properties that are true of each instruction in the target description file.
MachineOperand class - Representation of each machine instruction operand.
unsigned M0(unsigned Val)
static bool instReadsReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
bool regsOverlap(Register regA, Register regB) const
Returns true if the two registers are equal or alias each other.
Analysis containing CSE Info
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction that are not part of t...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
constexpr bool isInt< 32 >(int64_t x)
constexpr bool isUInt< 16 >(uint64_t x)
unsigned getTargetFlags() const
static bool instModifiesReg(const MachineInstr *MI, unsigned Reg, unsigned SubReg, const SIRegisterInfo &TRI)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
constexpr bool any() const
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
static bool shrinkScalarLogicOp(const GCNSubtarget &ST, MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr &MI)
Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
constexpr bool isUInt< 32 >(uint64_t x)
TargetInstrInfo::RegSubRegPair RegSubRegPair
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
Register getReg() const
getReg - Returns the register number.
instr_iterator instr_end()
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
FunctionPass * createSIShrinkInstructionsPass()
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
self_iterator getIterator()
const MachineBasicBlock * getParent() const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
constexpr bool isInt< 16 >(int64_t x)
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit.")
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
Iterator for intrusive lists based on ilist_node.
static bool isReverseInlineImm(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ReverseImm)
static TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub, unsigned I, const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI)
static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute=true)
This function checks MI for operands defined by a move immediate instruction and then folds the liter...
static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src, bool &IsUnsigned)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
A range adaptor for a pair of iterators.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
FunctionPass class - This class is used to implement most global optimizations.
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.