44#include "llvm/IR/IntrinsicsAArch64.h"
51#define DEBUG_TYPE "aarch64-isel"
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
86 ProduceNonFlagSettingCondBr =
134 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
212 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
214 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
216 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
218 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
232 unsigned Opc1,
unsigned Opc2,
bool isExt);
238 unsigned emitConstantPoolEntry(
const Constant *CPVal,
257 std::optional<CmpInst::Predicate> = std::nullopt)
const;
260 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
261 std::initializer_list<llvm::SrcOp> SrcOps,
263 const ComplexRendererFns &RenderFns = std::nullopt)
const;
298 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
319 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
341 std::pair<MachineInstr *, AArch64CC::CondCode>
376 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
377 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
381 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
383 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
386 unsigned Size)
const;
388 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
389 return selectAddrModeUnscaled(Root, 1);
391 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
392 return selectAddrModeUnscaled(Root, 2);
394 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
395 return selectAddrModeUnscaled(Root, 4);
397 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
398 return selectAddrModeUnscaled(Root, 8);
400 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
401 return selectAddrModeUnscaled(Root, 16);
406 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
410 unsigned Size)
const;
412 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
413 return selectAddrModeIndexed(Root, Width / 8);
422 bool IsAddrOperand)
const;
425 unsigned SizeInBytes)
const;
433 bool WantsExt)
const;
434 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
436 unsigned SizeInBytes)
const;
438 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
439 return selectAddrModeXRO(Root, Width / 8);
443 unsigned SizeInBytes)
const;
445 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
446 return selectAddrModeWRO(Root, Width / 8);
450 bool AllowROR =
false)
const;
452 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
453 return selectShiftedRegister(Root);
456 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
457 return selectShiftedRegister(Root,
true);
467 bool IsLoadStore =
false)
const;
478 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
483 int OpIdx = -1)
const;
485 int OpIdx = -1)
const;
487 int OpIdx = -1)
const;
491 int OpIdx = -1)
const;
493 int OpIdx = -1)
const;
495 int OpIdx = -1)
const;
498 int OpIdx = -1)
const;
504 bool tryOptSelect(
GSelect &Sel);
511 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
524 bool ProduceNonFlagSettingCondBr =
false;
533#define GET_GLOBALISEL_PREDICATES_DECL
534#include "AArch64GenGlobalISel.inc"
535#undef GET_GLOBALISEL_PREDICATES_DECL
539#define GET_GLOBALISEL_TEMPORARIES_DECL
540#include "AArch64GenGlobalISel.inc"
541#undef GET_GLOBALISEL_TEMPORARIES_DECL
546#define GET_GLOBALISEL_IMPL
547#include "AArch64GenGlobalISel.inc"
548#undef GET_GLOBALISEL_IMPL
550AArch64InstructionSelector::AArch64InstructionSelector(
553 : TM(TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
556#include
"AArch64GenGlobalISel.inc"
559#include
"AArch64GenGlobalISel.inc"
571 bool GetAllRegSet =
false) {
572 if (RB.
getID() == AArch64::GPRRegBankID) {
574 return GetAllRegSet ? &AArch64::GPR32allRegClass
575 : &AArch64::GPR32RegClass;
577 return GetAllRegSet ? &AArch64::GPR64allRegClass
578 : &AArch64::GPR64RegClass;
580 return &AArch64::XSeqPairsClassRegClass;
584 if (RB.
getID() == AArch64::FPRRegBankID) {
587 return &AArch64::FPR8RegClass;
589 return &AArch64::FPR16RegClass;
591 return &AArch64::FPR32RegClass;
593 return &AArch64::FPR64RegClass;
595 return &AArch64::FPR128RegClass;
607 bool GetAllRegSet =
false) {
610 "Expected FPR regbank for scalable type size");
611 return &AArch64::ZPRRegClass;
614 unsigned RegBankID = RB.
getID();
616 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
628 if (RegBankID == AArch64::FPRRegBankID) {
631 "Unexpected scalable register size");
632 return &AArch64::ZPRRegClass;
635 switch (SizeInBits) {
639 return &AArch64::FPR8RegClass;
641 return &AArch64::FPR16RegClass;
643 return &AArch64::FPR32RegClass;
645 return &AArch64::FPR64RegClass;
647 return &AArch64::FPR128RegClass;
657 switch (
TRI.getRegSizeInBits(*RC)) {
665 if (RC != &AArch64::FPR32RegClass)
675 dbgs() <<
"Couldn't find appropriate subregister for register class.");
684 switch (RB.
getID()) {
685 case AArch64::GPRRegBankID:
687 case AArch64::FPRRegBankID:
710 const unsigned RegClassIDs[],
712 unsigned NumRegs = Regs.
size();
715 assert(NumRegs >= 2 && NumRegs <= 4 &&
716 "Only support between two and 4 registers in a tuple!");
718 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
720 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
721 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
722 RegSequence.addUse(Regs[
I]);
723 RegSequence.addImm(SubRegs[
I]);
725 return RegSequence.getReg(0);
730 static const unsigned RegClassIDs[] = {
731 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
732 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
733 AArch64::dsub2, AArch64::dsub3};
734 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
739 static const unsigned RegClassIDs[] = {
740 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
741 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
742 AArch64::qsub2, AArch64::qsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
748 auto &
MBB = *
MI.getParent();
750 auto &
MRI = MF.getRegInfo();
756 else if (Root.
isReg()) {
761 Immed = ValAndVReg->Value.getSExtValue();
777 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
784 for (
auto &MO :
I.operands()) {
787 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
795 if (!MO.getReg().isVirtual()) {
796 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
806 if (PrevOpBank && OpBank != PrevOpBank) {
807 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
822 case AArch64::GPRRegBankID:
824 switch (GenericOpc) {
825 case TargetOpcode::G_SHL:
826 return AArch64::LSLVWr;
827 case TargetOpcode::G_LSHR:
828 return AArch64::LSRVWr;
829 case TargetOpcode::G_ASHR:
830 return AArch64::ASRVWr;
834 }
else if (OpSize == 64) {
835 switch (GenericOpc) {
836 case TargetOpcode::G_PTR_ADD:
837 return AArch64::ADDXrr;
838 case TargetOpcode::G_SHL:
839 return AArch64::LSLVXr;
840 case TargetOpcode::G_LSHR:
841 return AArch64::LSRVXr;
842 case TargetOpcode::G_ASHR:
843 return AArch64::ASRVXr;
849 case AArch64::FPRRegBankID:
852 switch (GenericOpc) {
853 case TargetOpcode::G_FADD:
854 return AArch64::FADDSrr;
855 case TargetOpcode::G_FSUB:
856 return AArch64::FSUBSrr;
857 case TargetOpcode::G_FMUL:
858 return AArch64::FMULSrr;
859 case TargetOpcode::G_FDIV:
860 return AArch64::FDIVSrr;
865 switch (GenericOpc) {
866 case TargetOpcode::G_FADD:
867 return AArch64::FADDDrr;
868 case TargetOpcode::G_FSUB:
869 return AArch64::FSUBDrr;
870 case TargetOpcode::G_FMUL:
871 return AArch64::FMULDrr;
872 case TargetOpcode::G_FDIV:
873 return AArch64::FDIVDrr;
874 case TargetOpcode::G_OR:
875 return AArch64::ORRv8i8;
892 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894 case AArch64::GPRRegBankID:
897 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901 return isStore ? AArch64::STRWui : AArch64::LDRWui;
903 return isStore ? AArch64::STRXui : AArch64::LDRXui;
906 case AArch64::FPRRegBankID:
909 return isStore ? AArch64::STRBui : AArch64::LDRBui;
911 return isStore ? AArch64::STRHui : AArch64::LDRHui;
913 return isStore ? AArch64::STRSui : AArch64::LDRSui;
915 return isStore ? AArch64::STRDui : AArch64::LDRDui;
917 return isStore ? AArch64::STRQui : AArch64::LDRQui;
931 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
932 assert(To &&
"Destination register class cannot be null");
939 RegOp.
setReg(SubRegCopy.getReg(0));
943 if (!
I.getOperand(0).getReg().isPhysical())
953static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
957 Register DstReg =
I.getOperand(0).getReg();
958 Register SrcReg =
I.getOperand(1).getReg();
973 if (SrcRegBank != DstRegBank &&
992 if (Reg.isPhysical())
994 LLT Ty =
MRI.getType(Reg);
997 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
999 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1000 RC = getRegClassForTypeOnBank(Ty, RB);
1003 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1016 Register DstReg =
I.getOperand(0).getReg();
1017 Register SrcReg =
I.getOperand(1).getReg();
1036 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1040 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1041 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1052 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1054 }
else if (SrcSize > DstSize) {
1061 }
else if (DstSize > SrcSize) {
1068 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1070 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1075 RegOp.
setReg(PromoteReg);
1094 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1095 I.setDesc(
TII.get(AArch64::COPY));
1096 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1100 I.setDesc(
TII.get(AArch64::COPY));
1115 switch (GenericOpc) {
1116 case TargetOpcode::G_SITOFP:
1117 return AArch64::SCVTFUWSri;
1118 case TargetOpcode::G_UITOFP:
1119 return AArch64::UCVTFUWSri;
1120 case TargetOpcode::G_FPTOSI:
1121 return AArch64::FCVTZSUWSr;
1122 case TargetOpcode::G_FPTOUI:
1123 return AArch64::FCVTZUUWSr;
1128 switch (GenericOpc) {
1129 case TargetOpcode::G_SITOFP:
1130 return AArch64::SCVTFUXSri;
1131 case TargetOpcode::G_UITOFP:
1132 return AArch64::UCVTFUXSri;
1133 case TargetOpcode::G_FPTOSI:
1134 return AArch64::FCVTZSUWDr;
1135 case TargetOpcode::G_FPTOUI:
1136 return AArch64::FCVTZUUWDr;
1146 switch (GenericOpc) {
1147 case TargetOpcode::G_SITOFP:
1148 return AArch64::SCVTFUWDri;
1149 case TargetOpcode::G_UITOFP:
1150 return AArch64::UCVTFUWDri;
1151 case TargetOpcode::G_FPTOSI:
1152 return AArch64::FCVTZSUXSr;
1153 case TargetOpcode::G_FPTOUI:
1154 return AArch64::FCVTZUUXSr;
1159 switch (GenericOpc) {
1160 case TargetOpcode::G_SITOFP:
1161 return AArch64::SCVTFUXDri;
1162 case TargetOpcode::G_UITOFP:
1163 return AArch64::UCVTFUXDri;
1164 case TargetOpcode::G_FPTOSI:
1165 return AArch64::FCVTZSUXDr;
1166 case TargetOpcode::G_FPTOUI:
1167 return AArch64::FCVTZUUXDr;
1186 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1187 "Expected both select operands to have the same regbank?");
1188 LLT Ty =
MRI.getType(True);
1193 "Expected 32 bit or 64 bit select only?");
1194 const bool Is32Bit =
Size == 32;
1195 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1196 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1197 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1203 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1205 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1220 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1237 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1256 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1272 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1278 if (!TrueCst && !FalseCst)
1281 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1282 if (TrueCst && FalseCst) {
1283 int64_t
T = TrueCst->Value.getSExtValue();
1284 int64_t
F = FalseCst->Value.getSExtValue();
1286 if (
T == 0 &&
F == 1) {
1288 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1294 if (
T == 0 &&
F == -1) {
1296 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1304 int64_t
T = TrueCst->Value.getSExtValue();
1307 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1316 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1325 int64_t
F = FalseCst->Value.getSExtValue();
1328 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1335 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1343 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1344 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1465 assert(Reg.isValid() &&
"Expected valid register!");
1466 bool HasZext =
false;
1468 unsigned Opc =
MI->getOpcode();
1470 if (!
MI->getOperand(0).isReg() ||
1471 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1478 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1479 Opc == TargetOpcode::G_TRUNC) {
1480 if (Opc == TargetOpcode::G_ZEXT)
1483 Register NextReg =
MI->getOperand(1).getReg();
1485 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1494 std::optional<uint64_t>
C;
1499 case TargetOpcode::G_AND:
1500 case TargetOpcode::G_XOR: {
1501 TestReg =
MI->getOperand(1).getReg();
1502 Register ConstantReg =
MI->getOperand(2).getReg();
1513 C = VRegAndVal->Value.getZExtValue();
1515 C = VRegAndVal->Value.getSExtValue();
1519 case TargetOpcode::G_ASHR:
1520 case TargetOpcode::G_LSHR:
1521 case TargetOpcode::G_SHL: {
1522 TestReg =
MI->getOperand(1).getReg();
1526 C = VRegAndVal->Value.getSExtValue();
1538 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1542 case TargetOpcode::G_AND:
1544 if ((*
C >> Bit) & 1)
1547 case TargetOpcode::G_SHL:
1550 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1555 case TargetOpcode::G_ASHR:
1560 if (Bit >= TestRegSize)
1561 Bit = TestRegSize - 1;
1563 case TargetOpcode::G_LSHR:
1565 if ((Bit + *
C) < TestRegSize) {
1570 case TargetOpcode::G_XOR:
1579 if ((*
C >> Bit) & 1)
1598 assert(ProduceNonFlagSettingCondBr &&
1599 "Cannot emit TB(N)Z with speculation tracking!");
1604 LLT Ty =
MRI.getType(TestReg);
1607 assert(Bit < 64 &&
"Bit is too large!");
1611 bool UseWReg =
Bit < 32;
1612 unsigned NecessarySize = UseWReg ? 32 : 64;
1613 if (
Size != NecessarySize)
1614 TestReg = moveScalarRegClass(
1615 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1618 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1619 {AArch64::TBZW, AArch64::TBNZW}};
1620 unsigned Opc = OpcTable[UseWReg][IsNegative];
1627bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1630 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1657 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1664 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1672 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1674 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1675 AArch64::GPRRegBankID &&
1676 "Expected GPRs only?");
1677 auto Ty =
MRI.getType(CompareReg);
1680 assert(Width <= 64 &&
"Expected width to be at most 64?");
1681 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1682 {AArch64::CBNZW, AArch64::CBNZX}};
1683 unsigned Opc = OpcTable[IsNegative][Width == 64];
1684 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1689bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1692 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1704 I.eraseFromParent();
1708bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1711 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1717 if (!ProduceNonFlagSettingCondBr)
1736 if (VRegAndVal && !AndInst) {
1737 int64_t
C = VRegAndVal->Value.getSExtValue();
1743 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1744 I.eraseFromParent();
1752 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1753 I.eraseFromParent();
1761 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1762 I.eraseFromParent();
1776 if (VRegAndVal && VRegAndVal->Value == 0) {
1784 tryOptAndIntoCompareBranch(
1786 I.eraseFromParent();
1791 auto LHSTy =
MRI.getType(LHS);
1792 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1794 I.eraseFromParent();
1803bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1806 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1807 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1817 I.eraseFromParent();
1821bool AArch64InstructionSelector::selectCompareBranch(
1823 Register CondReg =
I.getOperand(0).getReg();
1828 if (CCMIOpc == TargetOpcode::G_FCMP)
1829 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1830 if (CCMIOpc == TargetOpcode::G_ICMP)
1831 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1836 if (ProduceNonFlagSettingCondBr) {
1837 emitTestBit(CondReg, 0,
true,
1838 I.getOperand(1).getMBB(), MIB);
1839 I.eraseFromParent();
1849 .
addMBB(
I.getOperand(1).getMBB());
1850 I.eraseFromParent();
1858 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1869 return std::nullopt;
1871 int64_t Imm = *ShiftImm;
1873 return std::nullopt;
1877 return std::nullopt;
1880 return std::nullopt;
1884 return std::nullopt;
1888 return std::nullopt;
1892 return std::nullopt;
1898bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1900 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1901 Register DstReg =
I.getOperand(0).getReg();
1902 const LLT Ty =
MRI.getType(DstReg);
1903 Register Src1Reg =
I.getOperand(1).getReg();
1904 Register Src2Reg =
I.getOperand(2).getReg();
1915 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1917 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1919 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1921 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1923 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1925 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1927 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1933 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1939 I.eraseFromParent();
1943bool AArch64InstructionSelector::selectVectorAshrLshr(
1945 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1946 I.getOpcode() == TargetOpcode::G_LSHR);
1947 Register DstReg =
I.getOperand(0).getReg();
1948 const LLT Ty =
MRI.getType(DstReg);
1949 Register Src1Reg =
I.getOperand(1).getReg();
1950 Register Src2Reg =
I.getOperand(2).getReg();
1955 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1965 unsigned NegOpc = 0;
1967 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1969 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1970 NegOpc = AArch64::NEGv2i64;
1972 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1973 NegOpc = AArch64::NEGv4i32;
1975 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1976 NegOpc = AArch64::NEGv2i32;
1978 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1979 NegOpc = AArch64::NEGv4i16;
1981 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1982 NegOpc = AArch64::NEGv8i16;
1984 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1985 NegOpc = AArch64::NEGv16i8;
1987 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1988 NegOpc = AArch64::NEGv8i8;
1994 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1996 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1998 I.eraseFromParent();
2002bool AArch64InstructionSelector::selectVaStartAAPCS(
2013 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
2014 const auto *PtrRegClass =
2015 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2018 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2020 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2031 const auto VAList =
I.getOperand(0).getReg();
2034 unsigned OffsetBytes = 0;
2038 const auto PushAddress = [&](
const int FrameIndex,
const int64_t
Imm) {
2039 const Register Top =
MRI.createVirtualRegister(PtrRegClass);
2040 auto MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(), MCIDAddAddr)
2047 const auto *MMO = *
I.memoperands_begin();
2048 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(), MCIDStoreAddr)
2051 .
addImm(OffsetBytes / PtrSize)
2053 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2057 OffsetBytes += PtrSize;
2073 const auto PushIntConstant = [&](
const int32_t
Value) {
2074 constexpr int IntSize = 4;
2075 const Register Temp =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2077 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::MOVi32imm))
2082 const auto *MMO = *
I.memoperands_begin();
2083 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRWui))
2086 .
addImm(OffsetBytes / IntSize)
2088 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2091 OffsetBytes += IntSize;
2095 PushIntConstant(-
static_cast<int32_t
>(GPRSize));
2098 PushIntConstant(-
static_cast<int32_t
>(FPRSize));
2100 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) &&
"Unexpected offset");
2102 I.eraseFromParent();
2106bool AArch64InstructionSelector::selectVaStartDarwin(
2109 Register ListReg =
I.getOperand(0).getReg();
2111 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2122 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2130 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2137 I.eraseFromParent();
2141void AArch64InstructionSelector::materializeLargeCMVal(
2147 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2158 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2160 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2162 GV, MovZ->getOperand(1).getOffset(), Flags));
2166 MovZ->getOperand(1).getOffset(), Flags));
2172 Register DstReg = BuildMovK(MovZ.getReg(0),
2178bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2183 switch (
I.getOpcode()) {
2184 case TargetOpcode::G_STORE: {
2185 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2193 SrcOp.setReg(NewSrc);
2194 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2199 case TargetOpcode::G_PTR_ADD:
2200 return convertPtrAddToAdd(
I,
MRI);
2201 case TargetOpcode::G_LOAD: {
2206 Register DstReg =
I.getOperand(0).getReg();
2207 const LLT DstTy =
MRI.getType(DstReg);
2213 case AArch64::G_DUP: {
2215 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2219 MRI.setType(
I.getOperand(0).getReg(),
2221 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2222 I.getOperand(1).setReg(NewSrc.getReg(0));
2225 case AArch64::G_INSERT_VECTOR_ELT: {
2227 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2228 LLT SrcVecTy =
MRI.getType(
I.getOperand(1).getReg());
2232 MRI.setType(
I.getOperand(1).getReg(),
2234 MRI.setType(
I.getOperand(0).getReg(),
2236 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2237 I.getOperand(2).setReg(NewSrc.getReg(0));
2240 case TargetOpcode::G_UITOFP:
2241 case TargetOpcode::G_SITOFP: {
2246 Register SrcReg =
I.getOperand(1).getReg();
2247 LLT SrcTy =
MRI.getType(SrcReg);
2248 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2252 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2253 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2254 I.setDesc(
TII.get(AArch64::G_SITOF));
2256 I.setDesc(
TII.get(AArch64::G_UITOF));
2274bool AArch64InstructionSelector::convertPtrAddToAdd(
2276 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2277 Register DstReg =
I.getOperand(0).getReg();
2278 Register AddOp1Reg =
I.getOperand(1).getReg();
2279 const LLT PtrTy =
MRI.getType(DstReg);
2283 const LLT CastPtrTy =
2288 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2290 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2294 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2295 MRI.setType(DstReg, CastPtrTy);
2296 I.getOperand(1).setReg(PtrToInt.getReg(0));
2297 if (!select(*PtrToInt)) {
2298 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2307 I.getOperand(2).setReg(NegatedReg);
2308 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2312bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2317 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2318 const auto &MO =
I.getOperand(2);
2323 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2327 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2328 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2330 if (!Imm1Fn || !Imm2Fn)
2334 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2337 for (
auto &RenderFn : *Imm1Fn)
2339 for (
auto &RenderFn : *Imm2Fn)
2342 I.eraseFromParent();
2346bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2348 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2366 LLT DefDstTy =
MRI.getType(DefDstReg);
2367 Register StoreSrcReg =
I.getOperand(0).getReg();
2368 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2379 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2380 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2384 I.getOperand(0).setReg(DefDstReg);
2388bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2389 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2390 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2396 switch (
I.getOpcode()) {
2397 case AArch64::G_DUP: {
2400 Register Src =
I.getOperand(1).getReg();
2405 Register Dst =
I.getOperand(0).getReg();
2407 MRI.getType(Dst).getNumElements(),
2410 ValAndVReg->Value.trunc(
MRI.getType(Dst).getScalarSizeInBits())));
2411 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2413 I.eraseFromParent();
2416 case TargetOpcode::G_SEXT:
2419 if (selectUSMovFromExtend(
I,
MRI))
2422 case TargetOpcode::G_BR:
2424 case TargetOpcode::G_SHL:
2425 return earlySelectSHL(
I,
MRI);
2426 case TargetOpcode::G_CONSTANT: {
2427 bool IsZero =
false;
2428 if (
I.getOperand(1).isCImm())
2429 IsZero =
I.getOperand(1).getCImm()->isZero();
2430 else if (
I.getOperand(1).isImm())
2431 IsZero =
I.getOperand(1).getImm() == 0;
2436 Register DefReg =
I.getOperand(0).getReg();
2437 LLT Ty =
MRI.getType(DefReg);
2439 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2440 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2442 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2443 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2447 I.setDesc(
TII.get(TargetOpcode::COPY));
2451 case TargetOpcode::G_ADD: {
2460 Register AddDst =
I.getOperand(0).getReg();
2461 Register AddLHS =
I.getOperand(1).getReg();
2462 Register AddRHS =
I.getOperand(2).getReg();
2464 LLT Ty =
MRI.getType(AddLHS);
2473 if (!
MRI.hasOneNonDBGUse(Reg))
2487 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2497 Cmp = MatchCmp(AddRHS);
2501 auto &PredOp =
Cmp->getOperand(1);
2506 emitIntegerCompare(
Cmp->getOperand(2),
2507 Cmp->getOperand(3), PredOp, MIB);
2508 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2509 I.eraseFromParent();
2512 case TargetOpcode::G_OR: {
2516 Register Dst =
I.getOperand(0).getReg();
2517 LLT Ty =
MRI.getType(Dst);
2536 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2539 int64_t Immr =
Size - ShiftImm;
2540 int64_t Imms =
Size - ShiftImm - 1;
2541 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2542 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2543 I.eraseFromParent();
2546 case TargetOpcode::G_FENCE: {
2547 if (
I.getOperand(1).getImm() == 0)
2551 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2552 I.eraseFromParent();
2561 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2562 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2569 if (Subtarget->requiresStrictAlign()) {
2571 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2577 unsigned Opcode =
I.getOpcode();
2579 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2582 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2585 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2586 const Register DefReg =
I.getOperand(0).getReg();
2587 const LLT DefTy =
MRI.getType(DefReg);
2590 MRI.getRegClassOrRegBank(DefReg);
2593 dyn_cast<const TargetRegisterClass *>(RegClassOrBank);
2599 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2600 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2607 I.setDesc(
TII.get(TargetOpcode::PHI));
2609 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2615 if (
I.isDebugInstr())
2622 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2624 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2631 if (preISelLower(
I)) {
2632 Opcode =
I.getOpcode();
2643 if (selectImpl(
I, *CoverageInfo))
2647 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2650 case TargetOpcode::G_SBFX:
2651 case TargetOpcode::G_UBFX: {
2652 static const unsigned OpcTable[2][2] = {
2653 {AArch64::UBFMWri, AArch64::UBFMXri},
2654 {AArch64::SBFMWri, AArch64::SBFMXri}};
2655 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2657 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2660 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2663 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2664 auto LSB = Cst1->Value.getZExtValue();
2665 auto Width = Cst2->Value.getZExtValue();
2667 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2669 .
addImm(LSB + Width - 1);
2670 I.eraseFromParent();
2673 case TargetOpcode::G_BRCOND:
2674 return selectCompareBranch(
I, MF,
MRI);
2676 case TargetOpcode::G_BRINDIRECT: {
2678 if (std::optional<uint16_t> BADisc =
2679 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2683 MI.addReg(AArch64::XZR);
2684 I.eraseFromParent();
2687 I.setDesc(
TII.get(AArch64::BR));
2691 case TargetOpcode::G_BRJT:
2692 return selectBrJT(
I,
MRI);
2694 case AArch64::G_ADD_LOW: {
2700 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2701 I.setDesc(
TII.get(AArch64::ADDXri));
2706 "Expected small code model");
2708 auto Op2 =
I.getOperand(2);
2709 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2710 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2711 Op1.getTargetFlags())
2713 Op2.getTargetFlags());
2714 I.eraseFromParent();
2718 case TargetOpcode::G_FCONSTANT:
2719 case TargetOpcode::G_CONSTANT: {
2720 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2729 const Register DefReg =
I.getOperand(0).getReg();
2730 const LLT DefTy =
MRI.getType(DefReg);
2736 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2738 <<
" constant, expected: " << s16 <<
" or " << s32
2739 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2743 if (RB.
getID() != AArch64::FPRRegBankID) {
2745 <<
" constant on bank: " << RB
2746 <<
", expected: FPR\n");
2754 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2758 if (Ty != p0 && Ty != s8 && Ty != s16) {
2760 <<
" constant, expected: " << s32 <<
", " << s64
2761 <<
", or " << p0 <<
'\n');
2765 if (RB.
getID() != AArch64::GPRRegBankID) {
2767 <<
" constant on bank: " << RB
2768 <<
", expected: GPR\n");
2781 bool OptForSize = shouldOptForSize(&MF);
2785 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2792 auto *FPImm =
I.getOperand(1).getFPImm();
2795 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2799 I.eraseFromParent();
2800 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2804 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2806 const Register DefGPRReg =
MRI.createVirtualRegister(
2807 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2813 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2814 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2822 }
else if (
I.getOperand(1).isCImm()) {
2823 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2824 I.getOperand(1).ChangeToImmediate(Val);
2825 }
else if (
I.getOperand(1).isImm()) {
2826 uint64_t Val =
I.getOperand(1).getImm();
2827 I.getOperand(1).ChangeToImmediate(Val);
2830 const unsigned MovOpc =
2831 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2832 I.setDesc(
TII.get(MovOpc));
2836 case TargetOpcode::G_EXTRACT: {
2837 Register DstReg =
I.getOperand(0).getReg();
2838 Register SrcReg =
I.getOperand(1).getReg();
2839 LLT SrcTy =
MRI.getType(SrcReg);
2840 LLT DstTy =
MRI.getType(DstReg);
2852 unsigned Offset =
I.getOperand(2).getImm();
2861 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2863 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2865 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2867 AArch64::GPR64RegClass, NewI->getOperand(0));
2868 I.eraseFromParent();
2874 unsigned LaneIdx =
Offset / 64;
2876 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2879 I.eraseFromParent();
2883 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2889 "unexpected G_EXTRACT types");
2896 .addReg(DstReg, 0, AArch64::sub_32);
2897 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2898 AArch64::GPR32RegClass,
MRI);
2899 I.getOperand(0).setReg(DstReg);
2904 case TargetOpcode::G_INSERT: {
2905 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2906 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2913 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2914 unsigned LSB =
I.getOperand(3).getImm();
2915 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2916 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2921 "unexpected G_INSERT types");
2927 TII.get(AArch64::SUBREG_TO_REG))
2930 .
addUse(
I.getOperand(2).getReg())
2931 .
addImm(AArch64::sub_32);
2932 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2933 AArch64::GPR32RegClass,
MRI);
2934 I.getOperand(2).setReg(SrcReg);
2938 case TargetOpcode::G_FRAME_INDEX: {
2945 I.setDesc(
TII.get(AArch64::ADDXri));
2954 case TargetOpcode::G_GLOBAL_VALUE: {
2957 if (
I.getOperand(1).isSymbol()) {
2958 OpFlags =
I.getOperand(1).getTargetFlags();
2962 GV =
I.getOperand(1).getGlobal();
2964 return selectTLSGlobalValue(
I,
MRI);
2965 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2970 ? AArch64::LOADgotAUTH
2971 : AArch64::LOADgot));
2972 I.getOperand(1).setTargetFlags(OpFlags);
2974 !
TM.isPositionIndependent()) {
2976 materializeLargeCMVal(
I, GV, OpFlags);
2977 I.eraseFromParent();
2980 I.setDesc(
TII.get(AArch64::ADR));
2981 I.getOperand(1).setTargetFlags(OpFlags);
2983 I.setDesc(
TII.get(AArch64::MOVaddr));
2986 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2992 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2993 return selectPtrAuthGlobalValue(
I,
MRI);
2995 case TargetOpcode::G_ZEXTLOAD:
2996 case TargetOpcode::G_LOAD:
2997 case TargetOpcode::G_STORE: {
2999 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
3013 if (Order != AtomicOrdering::NotAtomic &&
3014 Order != AtomicOrdering::Unordered &&
3015 Order != AtomicOrdering::Monotonic) {
3016 assert(!isa<GZExtLoad>(LdSt));
3017 assert(MemSizeInBytes <= 8 &&
3018 "128-bit atomics should already be custom-legalized");
3020 if (isa<GLoad>(LdSt)) {
3021 static constexpr unsigned LDAPROpcodes[] = {
3022 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
3023 static constexpr unsigned LDAROpcodes[] = {
3024 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
3026 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
3029 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
3031 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3032 AArch64::STLRW, AArch64::STLRX};
3034 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
3036 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3037 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
3038 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
3039 I.getOperand(0).setReg(NewVal);
3041 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
3052 "Load/Store pointer operand isn't a GPR");
3053 assert(
MRI.getType(PtrReg).isPointer() &&
3054 "Load/Store pointer operand isn't a pointer");
3059 LLT ValTy =
MRI.getType(ValReg);
3063 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
3066 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3072 .addReg(ValReg, 0,
SubReg)
3074 RBI.constrainGenericRegister(Copy, *RC,
MRI);
3076 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
3079 if (RB.
getID() == AArch64::FPRRegBankID) {
3082 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3089 MRI.setRegBank(NewDst, RB);
3092 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3096 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
3097 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
3105 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
3106 bool IsStore = isa<GStore>(
I);
3107 const unsigned NewOpc =
3109 if (NewOpc ==
I.getOpcode())
3113 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
3116 I.setDesc(
TII.get(NewOpc));
3122 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
3123 Register CurValReg =
I.getOperand(0).getReg();
3124 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3125 NewInst.cloneMemRefs(
I);
3126 for (
auto &Fn : *AddrModeFns)
3128 I.eraseFromParent();
3137 if (Opcode == TargetOpcode::G_STORE) {
3140 if (CVal && CVal->Value == 0) {
3142 case AArch64::STRWui:
3143 case AArch64::STRHHui:
3144 case AArch64::STRBBui:
3145 LoadStore->getOperand(0).setReg(AArch64::WZR);
3147 case AArch64::STRXui:
3148 LoadStore->getOperand(0).setReg(AArch64::XZR);
3154 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3155 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3158 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3162 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3167 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3170 .
addImm(AArch64::sub_32);
3172 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3178 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3179 case TargetOpcode::G_INDEXED_SEXTLOAD:
3180 return selectIndexedExtLoad(
I,
MRI);
3181 case TargetOpcode::G_INDEXED_LOAD:
3182 return selectIndexedLoad(
I,
MRI);
3183 case TargetOpcode::G_INDEXED_STORE:
3184 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3186 case TargetOpcode::G_LSHR:
3187 case TargetOpcode::G_ASHR:
3188 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3189 return selectVectorAshrLshr(
I,
MRI);
3191 case TargetOpcode::G_SHL:
3192 if (Opcode == TargetOpcode::G_SHL &&
3193 MRI.getType(
I.getOperand(0).getReg()).isVector())
3194 return selectVectorSHL(
I,
MRI);
3201 Register SrcReg =
I.getOperand(1).getReg();
3202 Register ShiftReg =
I.getOperand(2).getReg();
3203 const LLT ShiftTy =
MRI.getType(ShiftReg);
3204 const LLT SrcTy =
MRI.getType(SrcReg);
3209 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3210 .addReg(ShiftReg, 0, AArch64::sub_32);
3211 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3212 I.getOperand(2).setReg(Trunc.getReg(0));
3216 case TargetOpcode::G_OR: {
3223 const Register DefReg =
I.getOperand(0).getReg();
3227 if (NewOpc ==
I.getOpcode())
3230 I.setDesc(
TII.get(NewOpc));
3238 case TargetOpcode::G_PTR_ADD: {
3239 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3240 I.eraseFromParent();
3244 case TargetOpcode::G_SADDE:
3245 case TargetOpcode::G_UADDE:
3246 case TargetOpcode::G_SSUBE:
3247 case TargetOpcode::G_USUBE:
3248 case TargetOpcode::G_SADDO:
3249 case TargetOpcode::G_UADDO:
3250 case TargetOpcode::G_SSUBO:
3251 case TargetOpcode::G_USUBO:
3252 return selectOverflowOp(
I,
MRI);
3254 case TargetOpcode::G_PTRMASK: {
3255 Register MaskReg =
I.getOperand(2).getReg();
3262 I.setDesc(
TII.get(AArch64::ANDXri));
3263 I.getOperand(2).ChangeToImmediate(
3268 case TargetOpcode::G_PTRTOINT:
3269 case TargetOpcode::G_TRUNC: {
3270 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3271 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3273 const Register DstReg =
I.getOperand(0).getReg();
3274 const Register SrcReg =
I.getOperand(1).getReg();
3281 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3285 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3294 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3295 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3296 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3300 if (DstRC == SrcRC) {
3302 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3306 }
else if (DstRC == &AArch64::GPR32RegClass &&
3307 SrcRC == &AArch64::GPR64RegClass) {
3308 I.getOperand(1).setSubReg(AArch64::sub_32);
3311 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3315 I.setDesc(
TII.get(TargetOpcode::COPY));
3317 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3320 I.setDesc(
TII.get(AArch64::XTNv4i16));
3330 I.eraseFromParent();
3335 if (Opcode == TargetOpcode::G_PTRTOINT) {
3336 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3337 I.setDesc(
TII.get(TargetOpcode::COPY));
3345 case TargetOpcode::G_ANYEXT: {
3346 if (selectUSMovFromExtend(
I,
MRI))
3349 const Register DstReg =
I.getOperand(0).getReg();
3350 const Register SrcReg =
I.getOperand(1).getReg();
3353 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3355 <<
", expected: GPR\n");
3360 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3362 <<
", expected: GPR\n");
3366 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3369 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3373 if (DstSize != 64 && DstSize > 32) {
3375 <<
", expected: 32 or 64\n");
3381 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3386 .
addImm(AArch64::sub_32);
3387 I.getOperand(1).setReg(ExtSrc);
3392 case TargetOpcode::G_ZEXT:
3393 case TargetOpcode::G_SEXT_INREG:
3394 case TargetOpcode::G_SEXT: {
3395 if (selectUSMovFromExtend(
I,
MRI))
3398 unsigned Opcode =
I.getOpcode();
3399 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3400 const Register DefReg =
I.getOperand(0).getReg();
3401 Register SrcReg =
I.getOperand(1).getReg();
3402 const LLT DstTy =
MRI.getType(DefReg);
3403 const LLT SrcTy =
MRI.getType(SrcReg);
3409 if (Opcode == TargetOpcode::G_SEXT_INREG)
3410 SrcSize =
I.getOperand(2).getImm();
3416 AArch64::GPRRegBankID &&
3417 "Unexpected ext regbank");
3430 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3431 if (LoadMI && IsGPR) {
3433 unsigned BytesLoaded =
MemOp->getSize().getValue();
3440 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3442 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3443 const Register ZReg = AArch64::WZR;
3444 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3447 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3450 .
addImm(AArch64::sub_32);
3452 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3454 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3458 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3464 I.eraseFromParent();
3469 if (DstSize == 64) {
3470 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3472 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3478 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3479 {&AArch64::GPR64RegClass}, {})
3486 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3490 }
else if (DstSize <= 32) {
3491 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3500 I.eraseFromParent();
3504 case TargetOpcode::G_SITOFP:
3505 case TargetOpcode::G_UITOFP:
3506 case TargetOpcode::G_FPTOSI:
3507 case TargetOpcode::G_FPTOUI: {
3508 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3509 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3511 if (NewOpc == Opcode)
3514 I.setDesc(
TII.get(NewOpc));
3521 case TargetOpcode::G_FREEZE:
3524 case TargetOpcode::G_INTTOPTR:
3529 case TargetOpcode::G_BITCAST:
3537 case TargetOpcode::G_SELECT: {
3538 auto &Sel = cast<GSelect>(
I);
3539 const Register CondReg = Sel.getCondReg();
3540 const Register TReg = Sel.getTrueReg();
3541 const Register FReg = Sel.getFalseReg();
3543 if (tryOptSelect(Sel))
3548 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3549 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3552 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3554 Sel.eraseFromParent();
3557 case TargetOpcode::G_ICMP: {
3570 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3571 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3572 AArch64::WZR, InvCC, MIB);
3573 I.eraseFromParent();
3577 case TargetOpcode::G_FCMP: {
3580 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3582 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3584 I.eraseFromParent();
3587 case TargetOpcode::G_VASTART:
3588 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3589 : selectVaStartAAPCS(
I, MF,
MRI);
3590 case TargetOpcode::G_INTRINSIC:
3591 return selectIntrinsic(
I,
MRI);
3592 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3593 return selectIntrinsicWithSideEffects(
I,
MRI);
3594 case TargetOpcode::G_IMPLICIT_DEF: {
3595 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3596 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3597 const Register DstReg =
I.getOperand(0).getReg();
3600 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3603 case TargetOpcode::G_BLOCK_ADDR: {
3604 Function *BAFn =
I.getOperand(1).getBlockAddress()->getFunction();
3605 if (std::optional<uint16_t> BADisc =
3606 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3607 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3608 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3616 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
3617 AArch64::GPR64RegClass,
MRI);
3618 I.eraseFromParent();
3622 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3623 I.eraseFromParent();
3626 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3627 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3628 I.getOperand(0).getReg())
3632 I.getOperand(1).getBlockAddress(), 0,
3634 I.eraseFromParent();
3638 case AArch64::G_DUP: {
3644 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3645 AArch64::GPRRegBankID)
3647 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3649 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3651 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3653 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3655 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3660 case TargetOpcode::G_BUILD_VECTOR:
3661 return selectBuildVector(
I,
MRI);
3662 case TargetOpcode::G_MERGE_VALUES:
3664 case TargetOpcode::G_UNMERGE_VALUES:
3666 case TargetOpcode::G_SHUFFLE_VECTOR:
3667 return selectShuffleVector(
I,
MRI);
3668 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3669 return selectExtractElt(
I,
MRI);
3670 case TargetOpcode::G_CONCAT_VECTORS:
3671 return selectConcatVectors(
I,
MRI);
3672 case TargetOpcode::G_JUMP_TABLE:
3673 return selectJumpTable(
I,
MRI);
3674 case TargetOpcode::G_MEMCPY:
3675 case TargetOpcode::G_MEMCPY_INLINE:
3676 case TargetOpcode::G_MEMMOVE:
3677 case TargetOpcode::G_MEMSET:
3678 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3679 return selectMOPS(
I,
MRI);
3685bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3692bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3696 case TargetOpcode::G_MEMCPY:
3697 case TargetOpcode::G_MEMCPY_INLINE:
3698 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3700 case TargetOpcode::G_MEMMOVE:
3701 Mopcode = AArch64::MOPSMemoryMovePseudo;
3703 case TargetOpcode::G_MEMSET:
3705 Mopcode = AArch64::MOPSMemorySetPseudo;
3714 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3715 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3718 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3719 const auto &SrcValRegClass =
3720 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3723 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3724 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3725 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3735 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3736 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3738 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3739 {DstPtrCopy, SizeCopy, SrcValCopy});
3741 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3742 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3743 {DstPtrCopy, SrcValCopy, SizeCopy});
3752 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3753 Register JTAddr =
I.getOperand(0).getReg();
3754 unsigned JTI =
I.getOperand(1).getIndex();
3763 if (STI.isTargetMachO()) {
3768 assert(STI.isTargetELF() &&
3769 "jump table hardening only supported on MachO/ELF");
3777 I.eraseFromParent();
3781 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3782 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3784 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3785 {TargetReg, ScratchReg}, {JTAddr,
Index})
3786 .addJumpTableIndex(JTI);
3788 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3789 {
static_cast<int64_t
>(JTI)});
3791 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3792 I.eraseFromParent();
3796bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3798 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3799 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3801 Register DstReg =
I.getOperand(0).getReg();
3802 unsigned JTI =
I.getOperand(1).getIndex();
3805 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3808 I.eraseFromParent();
3812bool AArch64InstructionSelector::selectTLSGlobalValue(
3814 if (!STI.isTargetMachO())
3819 const auto &GlobalOp =
I.getOperand(1);
3820 assert(GlobalOp.getOffset() == 0 &&
3821 "Shouldn't have an offset on TLS globals!");
3825 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3828 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3829 {LoadGOT.getReg(0)})
3840 assert(Opcode == AArch64::BLR);
3841 Opcode = AArch64::BLRAAZ;
3850 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3852 I.eraseFromParent();
3856MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3859 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3861 auto BuildFn = [&](
unsigned SubregIndex) {
3865 .addImm(SubregIndex);
3873 return BuildFn(AArch64::bsub);
3875 return BuildFn(AArch64::hsub);
3877 return BuildFn(AArch64::ssub);
3879 return BuildFn(AArch64::dsub);
3886AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3889 LLT DstTy =
MRI.getType(DstReg);
3891 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3892 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3899 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3905 .addReg(SrcReg, 0,
SubReg);
3906 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3910bool AArch64InstructionSelector::selectMergeValues(
3912 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3913 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3914 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3918 if (
I.getNumOperands() != 3)
3925 Register DstReg =
I.getOperand(0).getReg();
3926 Register Src1Reg =
I.getOperand(1).getReg();
3927 Register Src2Reg =
I.getOperand(2).getReg();
3928 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3929 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3934 Src2Reg, 1, RB, MIB);
3939 I.eraseFromParent();
3943 if (RB.
getID() != AArch64::GPRRegBankID)
3949 auto *DstRC = &AArch64::GPR64RegClass;
3950 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3952 TII.get(TargetOpcode::SUBREG_TO_REG))
3955 .
addUse(
I.getOperand(1).getReg())
3956 .
addImm(AArch64::sub_32);
3957 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3960 TII.get(TargetOpcode::SUBREG_TO_REG))
3963 .
addUse(
I.getOperand(2).getReg())
3964 .
addImm(AArch64::sub_32);
3966 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3967 .
addDef(
I.getOperand(0).getReg())
3975 I.eraseFromParent();
3980 const unsigned EltSize) {
3985 CopyOpc = AArch64::DUPi8;
3986 ExtractSubReg = AArch64::bsub;
3989 CopyOpc = AArch64::DUPi16;
3990 ExtractSubReg = AArch64::hsub;
3993 CopyOpc = AArch64::DUPi32;
3994 ExtractSubReg = AArch64::ssub;
3997 CopyOpc = AArch64::DUPi64;
3998 ExtractSubReg = AArch64::dsub;
4002 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
4008MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4009 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
4012 unsigned CopyOpc = 0;
4013 unsigned ExtractSubReg = 0;
4016 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
4021 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
4023 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
4028 const LLT &VecTy =
MRI.getType(VecReg);
4030 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4032 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4039 DstReg =
MRI.createVirtualRegister(DstRC);
4042 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4043 .addReg(VecReg, 0, ExtractSubReg);
4044 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4053 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4054 if (!ScalarToVector)
4060 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4064 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4068bool AArch64InstructionSelector::selectExtractElt(
4070 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4071 "unexpected opcode!");
4072 Register DstReg =
I.getOperand(0).getReg();
4073 const LLT NarrowTy =
MRI.getType(DstReg);
4074 const Register SrcReg =
I.getOperand(1).getReg();
4075 const LLT WideTy =
MRI.getType(SrcReg);
4078 "source register size too small!");
4079 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4083 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4085 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4094 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4098 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4103 I.eraseFromParent();
4107bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4109 unsigned NumElts =
I.getNumOperands() - 1;
4110 Register SrcReg =
I.getOperand(NumElts).getReg();
4111 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4112 const LLT SrcTy =
MRI.getType(SrcReg);
4114 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4116 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4123 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4124 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4125 Register Dst =
I.getOperand(OpIdx).getReg();
4127 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4131 I.eraseFromParent();
4135bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4137 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4138 "unexpected opcode");
4141 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4142 AArch64::FPRRegBankID ||
4143 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4144 AArch64::FPRRegBankID) {
4145 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4146 "currently unsupported.\n");
4152 unsigned NumElts =
I.getNumOperands() - 1;
4153 Register SrcReg =
I.getOperand(NumElts).getReg();
4154 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4155 const LLT WideTy =
MRI.getType(SrcReg);
4158 "can only unmerge from vector or s128 types!");
4160 "source register size too small!");
4163 return selectSplitVectorUnmerge(
I,
MRI);
4167 unsigned CopyOpc = 0;
4168 unsigned ExtractSubReg = 0;
4179 unsigned NumInsertRegs = NumElts - 1;
4191 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4195 assert(Found &&
"expected to find last operand's subeg idx");
4196 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4197 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4199 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4203 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4206 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4223 Register CopyTo =
I.getOperand(0).getReg();
4224 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4225 .addReg(InsertRegs[0], 0, ExtractSubReg);
4229 unsigned LaneIdx = 1;
4230 for (
Register InsReg : InsertRegs) {
4231 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4244 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4250 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4251 I.eraseFromParent();
4255bool AArch64InstructionSelector::selectConcatVectors(
4257 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4258 "Unexpected opcode");
4259 Register Dst =
I.getOperand(0).getReg();
4260 Register Op1 =
I.getOperand(1).getReg();
4261 Register Op2 =
I.getOperand(2).getReg();
4262 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4265 I.eraseFromParent();
4270AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4279MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4287 RC = &AArch64::FPR128RegClass;
4288 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4291 RC = &AArch64::FPR64RegClass;
4292 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4295 RC = &AArch64::FPR32RegClass;
4296 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4299 RC = &AArch64::FPR16RegClass;
4300 Opc = AArch64::LDRHui;
4303 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4309 auto &MF = MIRBuilder.
getMF();
4310 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4311 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4313 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4316 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4319 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4320 .addConstantPoolIndex(
4336static std::pair<unsigned, unsigned>
4338 unsigned Opc, SubregIdx;
4339 if (RB.
getID() == AArch64::GPRRegBankID) {
4341 Opc = AArch64::INSvi8gpr;
4342 SubregIdx = AArch64::bsub;
4343 }
else if (EltSize == 16) {
4344 Opc = AArch64::INSvi16gpr;
4345 SubregIdx = AArch64::ssub;
4346 }
else if (EltSize == 32) {
4347 Opc = AArch64::INSvi32gpr;
4348 SubregIdx = AArch64::ssub;
4349 }
else if (EltSize == 64) {
4350 Opc = AArch64::INSvi64gpr;
4351 SubregIdx = AArch64::dsub;
4357 Opc = AArch64::INSvi8lane;
4358 SubregIdx = AArch64::bsub;
4359 }
else if (EltSize == 16) {
4360 Opc = AArch64::INSvi16lane;
4361 SubregIdx = AArch64::hsub;
4362 }
else if (EltSize == 32) {
4363 Opc = AArch64::INSvi32lane;
4364 SubregIdx = AArch64::ssub;
4365 }
else if (EltSize == 64) {
4366 Opc = AArch64::INSvi64lane;
4367 SubregIdx = AArch64::dsub;
4372 return std::make_pair(Opc, SubregIdx);
4376 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4378 const ComplexRendererFns &RenderFns)
const {
4379 assert(Opcode &&
"Expected an opcode?");
4381 "Function should only be used to produce selected instructions!");
4382 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4384 for (
auto &Fn : *RenderFns)
4391 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4395 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4396 auto Ty =
MRI.getType(
LHS.getReg());
4399 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4400 bool Is32Bit =
Size == 32;
4403 if (
auto Fns = selectArithImmed(RHS))
4404 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4408 if (
auto Fns = selectNegArithImmed(RHS))
4409 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4413 if (
auto Fns = selectArithExtendedRegister(RHS))
4414 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4418 if (
auto Fns = selectShiftedRegister(RHS))
4419 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4421 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4429 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4430 {{AArch64::ADDXri, AArch64::ADDWri},
4431 {AArch64::ADDXrs, AArch64::ADDWrs},
4432 {AArch64::ADDXrr, AArch64::ADDWrr},
4433 {AArch64::SUBXri, AArch64::SUBWri},
4434 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4435 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4442 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4443 {{AArch64::ADDSXri, AArch64::ADDSWri},
4444 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4445 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4446 {AArch64::SUBSXri, AArch64::SUBSWri},
4447 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4448 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4455 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4456 {{AArch64::SUBSXri, AArch64::SUBSWri},
4457 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4458 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4459 {AArch64::ADDSXri, AArch64::ADDSWri},
4460 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4461 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4468 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4470 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4471 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4472 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4479 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4481 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4482 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4483 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4490 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4491 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4492 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4498 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4502 bool Is32Bit = (
RegSize == 32);
4503 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4504 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4505 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4509 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4512 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4519 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4520 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4521 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4524MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4527 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4534 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4536 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4538 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4539 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4542MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4546 LLT Ty =
MRI.getType(Dst);
4548 "Expected a 32-bit scalar register?");
4550 const Register ZReg = AArch64::WZR;
4555 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4561 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4562 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4563 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4568MachineInstr *AArch64InstructionSelector::emitFPCompare(
4570 std::optional<CmpInst::Predicate> Pred)
const {
4572 LLT Ty =
MRI.getType(LHS);
4576 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4587 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4591 ShouldUseImm =
true;
4595 unsigned CmpOpcTbl[2][3] = {
4596 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4597 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4599 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4611MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4620 const LLT Op1Ty =
MRI.getType(Op1);
4621 const LLT Op2Ty =
MRI.getType(Op2);
4623 if (Op1Ty != Op2Ty) {
4624 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4627 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4630 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4646 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4648 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4649 if (!WidenedOp1 || !WidenedOp2) {
4650 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4655 unsigned InsertOpc, InsSubRegIdx;
4656 std::tie(InsertOpc, InsSubRegIdx) =
4660 Dst =
MRI.createVirtualRegister(DstRC);