80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
94 using OpcodePair = std::pair<unsigned, unsigned>;
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(
T,
unsigned,
T &,
T &)>;
99 std::function<void(
MachineInstr &, OpcodePair,
unsigned,
unsigned,
116 template <
typename T>
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
123 template <
typename T>
124 bool visitADDSUB(
unsigned PosOpc,
unsigned NegOpc,
MachineInstr &
MI);
125 template <
typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs,
MachineInstr &
MI);
129 enum class SplitStrategy {
133 template <
typename T>
135 SplitStrategy Strategy,
unsigned OtherOpc = 0);
147 return "AArch64 MI Peephole Optimization pass";
157char AArch64MIPeepholeOpt::ID = 0;
162 "AArch64 MI Peephole Optimization",
false,
false)
166 T UImm =
static_cast<T>(Imm);
167 assert(UImm && (UImm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
198 assert(Imm && (Imm != ~
static_cast<T>(0)) &&
"Invalid immediate!");
205 unsigned LowestGapBitUnset =
209 assert(LowestGapBitUnset <
sizeof(
T) * CHAR_BIT &&
"Undefined behaviour!");
210 T NewImm1 = (
static_cast<T>(1) << LowestGapBitUnset) -
226 SplitStrategy Strategy,
238 return splitTwoPartImm<T>(
240 [
Opc, Strategy, OtherOpc](
T Imm,
unsigned RegSize,
T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
250 if (Insn.
size() == 1)
253 bool SplitSucc =
false;
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm,
RegSize, Imm0, Imm1);
258 case SplitStrategy::Disjoint:
263 return std::make_pair(
Opc, !OtherOpc ?
Opc : OtherOpc);
266 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
270 MachineBasicBlock *
MBB =
MI.getParent();
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &
MI) {
285 if (
MI.getOperand(3).getImm() != 0)
288 if (
MI.getOperand(1).getReg() != AArch64::WZR)
291 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
305 if (SrcMI->
getOpcode() == TargetOpcode::COPY &&
307 const TargetRegisterClass *RC =
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
319 CpySrc =
MRI->createVirtualRegister(&AArch64::FPR32RegClass);
321 TII->get(TargetOpcode::COPY), CpySrc)
331 else if (SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END)
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
339 MI.eraseFromParent();
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &
MI) {
346 if (
MI.getOperand(1).getReg() !=
MI.getOperand(2).getReg())
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
360 MI.eraseFromParent();
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &
MI) {
372 if (!
MI.isRegTiedToDefOperand(1))
376 const TargetRegisterClass *RC =
MRI->getRegClass(DstReg);
377 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
391 if ((SrcMI->
getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
396 MachineInstr *SubregMI =
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
400 .
add(
MI.getOperand(2))
401 .
add(
MI.getOperand(3));
404 MI.eraseFromParent();
413 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
414 (Imm & ~
static_cast<T>(0xffffff)) != 0)
420 if (Insn.
size() == 1)
424 Imm0 = (Imm >> 12) & 0xfff;
430bool AArch64MIPeepholeOpt::visitADDSUB(
431 unsigned PosOpc,
unsigned NegOpc, MachineInstr &
MI) {
448 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
449 MI.getOperand(1).getReg() == AArch64::WZR)
452 return splitTwoPartImm<T>(
454 [PosOpc, NegOpc](
T Imm,
unsigned RegSize,
T &Imm0,
455 T &Imm1) -> std::optional<OpcodePair> {
457 return std::make_pair(PosOpc, PosOpc);
459 return std::make_pair(NegOpc, NegOpc);
462 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
466 MachineBasicBlock *
MBB =
MI.getParent();
479bool AArch64MIPeepholeOpt::visitADDSSUBS(
480 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &
MI) {
484 if (
MI.getOperand(1).getReg() == AArch64::XZR ||
485 MI.getOperand(1).getReg() == AArch64::WZR)
488 return splitTwoPartImm<T>(
492 T &Imm1) -> std::optional<OpcodePair> {
502 MachineInstr &SrcMI = *
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
504 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
508 [&
TII =
TII](MachineInstr &
MI, OpcodePair Opcode,
unsigned Imm0,
512 MachineBasicBlock *
MBB =
MI.getParent();
526bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &
MI,
527 MachineInstr *&MovMI,
528 MachineInstr *&SubregToRegMI) {
530 MachineBasicBlock *
MBB =
MI.getParent();
532 if (L && !
L->isLoopInvariant(
MI))
536 MovMI =
MRI->getUniqueVRegDef(
MI.getOperand(2).getReg());
541 SubregToRegMI =
nullptr;
542 if (MovMI->
getOpcode() == TargetOpcode::SUBREG_TO_REG) {
543 SubregToRegMI = MovMI;
549 if (MovMI->
getOpcode() != AArch64::MOVi32imm &&
550 MovMI->
getOpcode() != AArch64::MOVi64imm)
565bool AArch64MIPeepholeOpt::splitTwoPartImm(
567 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
570 "Invalid RegSize for legal immediate peephole optimization");
573 MachineInstr *MovMI, *SubregToRegMI;
574 if (!checkMovImmInstr(
MI, MovMI, SubregToRegMI))
586 if (
auto R = SplitAndOpc(Imm,
RegSize, Imm0, Imm1))
597 const TargetRegisterClass *FirstInstrDstRC =
598 TII->getRegClass(
TII->get(Opcode.first), 0,
TRI);
599 const TargetRegisterClass *FirstInstrOperandRC =
600 TII->getRegClass(
TII->get(Opcode.first), 1,
TRI);
601 const TargetRegisterClass *SecondInstrDstRC =
602 (Opcode.first == Opcode.second)
605 const TargetRegisterClass *SecondInstrOperandRC =
606 (Opcode.first == Opcode.second)
607 ? FirstInstrOperandRC
608 :
TII->getRegClass(
TII->get(Opcode.second), 1,
TRI);
613 Register NewTmpReg =
MRI->createVirtualRegister(FirstInstrDstRC);
617 ?
MRI->createVirtualRegister(SecondInstrDstRC)
621 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
622 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
623 if (DstReg != NewDstReg)
624 MRI->constrainRegClass(NewDstReg,
MRI->getRegClass(DstReg));
627 BuildInstr(
MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
631 if (DstReg != NewDstReg) {
632 MRI->replaceRegWith(DstReg, NewDstReg);
633 MI.getOperand(0).setReg(DstReg);
637 MI.eraseFromParent();
645bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &
MI,
unsigned Opc) {
657 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
662 if (!SrcMI || SrcMI->
getOpcode() != TargetOpcode::COPY)
669 &AArch64::FPR128RegClass) {
677 MachineInstr *INSvilaneMI =
679 .
add(
MI.getOperand(1))
680 .
add(
MI.getOperand(2))
686 MI.eraseFromParent();
694 if (!
MI->getOperand(0).isReg() || !
MI->getOperand(0).isDef())
697 if (RC != &AArch64::FPR64RegClass)
699 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
702bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &
MI) {
710 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
711 if (Low64MI->
getOpcode() != AArch64::INSERT_SUBREG)
730 MachineInstr *High64MI =
MRI->getUniqueVRegDef(
MI.getOperand(3).getReg());
731 if (!High64MI || High64MI->
getOpcode() != AArch64::INSERT_SUBREG)
734 if (High64MI && High64MI->
getOpcode() == TargetOpcode::COPY)
736 if (!High64MI || (High64MI->
getOpcode() != AArch64::MOVID &&
737 High64MI->
getOpcode() != AArch64::MOVIv2d_ns))
745 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
746 MRI->replaceRegWith(OldDef, NewDef);
747 MI.eraseFromParent();
752bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &
MI) {
754 MachineInstr *Low64MI =
MRI->getUniqueVRegDef(
MI.getOperand(1).getReg());
762 MRI->clearKillFlags(OldDef);
763 MRI->clearKillFlags(NewDef);
764 MRI->constrainRegClass(NewDef,
MRI->getRegClass(OldDef));
765 MRI->replaceRegWith(OldDef, NewDef);
766 MI.eraseFromParent();
771bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &
MI) {
774 int64_t Immr =
MI.getOperand(2).getImm();
775 int64_t Imms =
MI.getOperand(3).getImm();
777 bool IsLSR = Imms == 31 && Immr <= Imms;
778 bool IsLSL = Immr == Imms + 33;
779 if (!IsLSR && !IsLSL)
786 const TargetRegisterClass *DstRC64 =
787 TII->getRegClass(
TII->get(
MI.getOpcode()), 0,
TRI);
788 const TargetRegisterClass *DstRC32 =
789 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
790 assert(DstRC32 &&
"Destination register class of UBFMXri doesn't have a "
791 "sub_32 subregister class");
793 const TargetRegisterClass *SrcRC64 =
794 TII->getRegClass(
TII->get(
MI.getOpcode()), 1,
TRI);
795 const TargetRegisterClass *SrcRC32 =
796 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
797 assert(SrcRC32 &&
"Source register class of UBFMXri doesn't have a sub_32 "
798 "subregister class");
800 Register DstReg64 =
MI.getOperand(0).getReg();
801 Register DstReg32 =
MRI->createVirtualRegister(DstRC32);
802 Register SrcReg64 =
MI.getOperand(1).getReg();
803 Register SrcReg32 =
MRI->createVirtualRegister(SrcRC32);
807 .
addReg(SrcReg64, 0, AArch64::sub_32);
814 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
818 MI.eraseFromParent();
825bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &
MI) {
826 Register InputReg =
MI.getOperand(1).getReg();
827 if (
MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
828 !
MRI->hasOneNonDBGUse(InputReg))
831 MachineInstr *SrcMI =
MRI->getUniqueVRegDef(InputReg);
832 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
844 auto getSXTWSrcReg = [](MachineInstr *SrcMI) ->
Register {
845 if (SrcMI->
getOpcode() != AArch64::SBFMXri ||
848 return AArch64::NoRegister;
852 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) ->
Register {
853 if (SrcMI->
getOpcode() != AArch64::SUBREG_TO_REG ||
856 return AArch64::NoRegister;
858 if (!Orr || Orr->
getOpcode() != AArch64::ORRWrr ||
861 return AArch64::NoRegister;
863 if (!Cpy || Cpy->
getOpcode() != AArch64::COPY ||
865 return AArch64::NoRegister;
870 Register SrcReg = getSXTWSrcReg(SrcMI);
872 SrcReg = getUXTWSrcReg(SrcMI);
876 MRI->constrainRegClass(SrcReg,
MRI->getRegClass(InputReg));
878 MI.getOperand(1).setReg(SrcReg);
880 for (
auto *DeadMI : DeadInstrs) {
882 DeadMI->eraseFromParent();
887bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
891 TII =
static_cast<const AArch64InstrInfo *
>(MF.
getSubtarget().getInstrInfo());
892 TRI =
static_cast<const AArch64RegisterInfo *
>(
894 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
897 assert(
MRI->isSSA() &&
"Expected to be run on SSA form!");
901 for (MachineBasicBlock &
MBB : MF) {
903 switch (
MI.getOpcode()) {
906 case AArch64::INSERT_SUBREG:
909 case AArch64::ANDWrr:
910 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri,
MI,
911 SplitStrategy::Intersect);
913 case AArch64::ANDXrr:
914 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri,
MI,
915 SplitStrategy::Intersect);
917 case AArch64::ANDSWrr:
918 Changed |= trySplitLogicalImm<uint32_t>(
919 AArch64::ANDWri,
MI, SplitStrategy::Intersect, AArch64::ANDSWri);
921 case AArch64::ANDSXrr:
922 Changed |= trySplitLogicalImm<uint64_t>(
923 AArch64::ANDXri,
MI, SplitStrategy::Intersect, AArch64::ANDSXri);
925 case AArch64::EORWrr:
926 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri,
MI,
927 SplitStrategy::Disjoint);
929 case AArch64::EORXrr:
930 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri,
MI,
931 SplitStrategy::Disjoint);
933 case AArch64::ORRWrr:
934 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri,
MI,
935 SplitStrategy::Disjoint);
937 case AArch64::ORRXrr:
938 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri,
MI,
939 SplitStrategy::Disjoint);
941 case AArch64::ORRWrs:
944 case AArch64::ADDWrr:
945 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri,
MI);
947 case AArch64::SUBWrr:
948 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri,
MI);
950 case AArch64::ADDXrr:
951 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri,
MI);
953 case AArch64::SUBXrr:
954 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri,
MI);
956 case AArch64::ADDSWrr:
958 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
959 {AArch64::SUBWri, AArch64::SUBSWri},
MI);
961 case AArch64::SUBSWrr:
963 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
964 {AArch64::ADDWri, AArch64::ADDSWri},
MI);
966 case AArch64::ADDSXrr:
968 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
969 {AArch64::SUBXri, AArch64::SUBSXri},
MI);
971 case AArch64::SUBSXrr:
973 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
974 {AArch64::ADDXri, AArch64::ADDSXri},
MI);
976 case AArch64::CSELWr:
977 case AArch64::CSELXr:
980 case AArch64::INSvi64gpr:
981 Changed |= visitINSviGPR(
MI, AArch64::INSvi64lane);
983 case AArch64::INSvi32gpr:
984 Changed |= visitINSviGPR(
MI, AArch64::INSvi32lane);
986 case AArch64::INSvi16gpr:
987 Changed |= visitINSviGPR(
MI, AArch64::INSvi16lane);
989 case AArch64::INSvi8gpr:
990 Changed |= visitINSviGPR(
MI, AArch64::INSvi8lane);
992 case AArch64::INSvi64lane:
995 case AArch64::FMOVDr:
998 case AArch64::UBFMXri:
1012 return new AArch64MIPeepholeOpt();
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.