44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
233 unsigned Opc1,
unsigned Opc2,
bool isExt);
239 unsigned emitConstantPoolEntry(
const Constant *CPVal,
258 std::optional<CmpInst::Predicate> = std::nullopt)
const;
261 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
264 const ComplexRendererFns &RenderFns = std::nullopt)
const;
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
342 std::pair<MachineInstr *, AArch64CC::CondCode>
377 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
382 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
384 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
387 unsigned Size)
const;
389 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 1);
392 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 2);
395 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 4);
398 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 8);
401 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
402 return selectAddrModeUnscaled(Root, 16);
407 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
411 unsigned Size)
const;
413 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
414 return selectAddrModeIndexed(Root, Width / 8);
423 bool IsAddrOperand)
const;
426 unsigned SizeInBytes)
const;
434 bool WantsExt)
const;
435 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
440 return selectAddrModeXRO(Root, Width / 8);
444 unsigned SizeInBytes)
const;
446 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
447 return selectAddrModeWRO(Root, Width / 8);
451 bool AllowROR =
false)
const;
453 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
454 return selectShiftedRegister(Root);
457 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
458 return selectShiftedRegister(Root,
true);
468 bool IsLoadStore =
false)
const;
479 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
494 int OpIdx = -1)
const;
496 int OpIdx = -1)
const;
499 int OpIdx = -1)
const;
505 bool tryOptSelect(
GSelect &Sel);
512 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
525 bool ProduceNonFlagSettingCondBr =
false;
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
551AArch64InstructionSelector::AArch64InstructionSelector(
554 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
557#include
"AArch64GenGlobalISel.inc"
560#include
"AArch64GenGlobalISel.inc"
572 bool GetAllRegSet =
false) {
573 if (RB.
getID() == AArch64::GPRRegBankID) {
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
581 return &AArch64::XSeqPairsClassRegClass;
585 if (RB.
getID() == AArch64::FPRRegBankID) {
588 return &AArch64::FPR8RegClass;
590 return &AArch64::FPR16RegClass;
592 return &AArch64::FPR32RegClass;
594 return &AArch64::FPR64RegClass;
596 return &AArch64::FPR128RegClass;
608 bool GetAllRegSet =
false) {
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
615 unsigned RegBankID = RB.
getID();
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
633 return &AArch64::FPR8RegClass;
635 return &AArch64::FPR16RegClass;
637 return &AArch64::FPR32RegClass;
639 return &AArch64::FPR64RegClass;
641 return &AArch64::FPR128RegClass;
651 switch (
TRI.getRegSizeInBits(*RC)) {
659 if (RC != &AArch64::FPR32RegClass)
669 dbgs() <<
"Couldn't find appropriate subregister for register class.");
678 switch (RB.
getID()) {
679 case AArch64::GPRRegBankID:
681 case AArch64::FPRRegBankID:
704 const unsigned RegClassIDs[],
706 unsigned NumRegs = Regs.
size();
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
714 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
716 RegSequence.addUse(Regs[
I]);
717 RegSequence.addImm(SubRegs[
I]);
719 return RegSequence.getReg(0);
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
742 auto &
MBB = *
MI.getParent();
744 auto &
MRI = MF.getRegInfo();
750 else if (Root.
isReg()) {
755 Immed = ValAndVReg->Value.getSExtValue();
771 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
778 for (
auto &MO :
I.operands()) {
781 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
816 case AArch64::GPRRegBankID:
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
828 }
else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
843 case AArch64::FPRRegBankID:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
888 case AArch64::GPRRegBankID:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
900 case AArch64::FPRRegBankID:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
925 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
926 assert(To &&
"Destination register class cannot be null");
933 RegOp.
setReg(SubRegCopy.getReg(0));
937 if (!
I.getOperand(0).getReg().isPhysical())
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
951 Register DstReg =
I.getOperand(0).getReg();
952 Register SrcReg =
I.getOperand(1).getReg();
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
985 if (Reg.isPhysical())
987 LLT Ty =
MRI.getType(Reg);
993 RC = getRegClassForTypeOnBank(Ty, RB);
996 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1009 Register DstReg =
I.getOperand(0).getReg();
1010 Register SrcReg =
I.getOperand(1).getReg();
1029 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1033 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1045 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1047 }
else if (SrcSize > DstSize) {
1054 }
else if (DstSize > SrcSize) {
1061 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1068 RegOp.
setReg(PromoteReg);
1087 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(
TII.get(AArch64::COPY));
1089 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1093 I.setDesc(
TII.get(AArch64::COPY));
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1179 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty =
MRI.getType(True);
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit =
Size == 32;
1188 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1265 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1271 if (!TrueCst && !FalseCst)
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t
T = TrueCst->Value.getSExtValue();
1277 int64_t
F = FalseCst->Value.getSExtValue();
1279 if (
T == 0 &&
F == 1) {
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 if (
T == 0 &&
F == -1) {
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 int64_t
T = TrueCst->Value.getSExtValue();
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318 int64_t
F = FalseCst->Value.getSExtValue();
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1458 assert(Reg.isValid() &&
"Expected valid register!");
1459 bool HasZext =
false;
1461 unsigned Opc =
MI->getOpcode();
1463 if (!
MI->getOperand(0).isReg() ||
1464 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1476 Register NextReg =
MI->getOperand(1).getReg();
1478 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1487 std::optional<uint64_t>
C;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg =
MI->getOperand(1).getReg();
1495 Register ConstantReg =
MI->getOperand(2).getReg();
1506 C = VRegAndVal->Value.getZExtValue();
1508 C = VRegAndVal->Value.getSExtValue();
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg =
MI->getOperand(1).getReg();
1519 C = VRegAndVal->Value.getSExtValue();
1531 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1535 case TargetOpcode::G_AND:
1537 if ((*
C >> Bit) & 1)
1540 case TargetOpcode::G_SHL:
1543 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1548 case TargetOpcode::G_ASHR:
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1556 case TargetOpcode::G_LSHR:
1558 if ((Bit + *
C) < TestRegSize) {
1563 case TargetOpcode::G_XOR:
1572 if ((*
C >> Bit) & 1)
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1597 LLT Ty =
MRI.getType(TestReg);
1600 assert(Bit < 64 &&
"Bit is too large!");
1604 bool UseWReg =
Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (
Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1623 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1650 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1667 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty =
MRI.getType(CompareReg);
1673 assert(Width <= 64 &&
"Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1685 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1697 I.eraseFromParent();
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1704 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1710 if (!ProduceNonFlagSettingCondBr)
1729 if (VRegAndVal && !AndInst) {
1730 int64_t
C = VRegAndVal->Value.getSExtValue();
1736 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1737 I.eraseFromParent();
1745 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1746 I.eraseFromParent();
1754 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1755 I.eraseFromParent();
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 tryOptAndIntoCompareBranch(
1779 I.eraseFromParent();
1784 auto LHSTy =
MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1787 I.eraseFromParent();
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1799 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1810 I.eraseFromParent();
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg =
I.getOperand(0).getReg();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, 0,
true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1842 .
addMBB(
I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1851 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1862 return std::nullopt;
1864 int64_t Imm = *ShiftImm;
1866 return std::nullopt;
1870 return std::nullopt;
1873 return std::nullopt;
1877 return std::nullopt;
1881 return std::nullopt;
1885 return std::nullopt;
1891bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1893 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg =
I.getOperand(0).getReg();
1895 const LLT Ty =
MRI.getType(DstReg);
1896 Register Src1Reg =
I.getOperand(1).getReg();
1897 Register Src2Reg =
I.getOperand(2).getReg();
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1926 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1932 I.eraseFromParent();
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg =
I.getOperand(0).getReg();
1941 const LLT Ty =
MRI.getType(DstReg);
1942 Register Src1Reg =
I.getOperand(1).getReg();
1943 Register Src2Reg =
I.getOperand(2).getReg();
1948 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1958 unsigned NegOpc = 0;
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1987 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg =
I.getOperand(0).getReg();
2005 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2016 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2024 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2031 I.eraseFromParent();
2035void AArch64InstructionSelector::materializeLargeCMVal(
2041 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2052 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2054 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2060 MovZ->getOperand(1).getOffset(), Flags));
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2072bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2077 switch (
I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(
I,
MRI);
2095 case TargetOpcode::G_LOAD: {
2100 Register DstReg =
I.getOperand(0).getReg();
2101 const LLT DstTy =
MRI.getType(DstReg);
2107 case AArch64::G_DUP: {
2109 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2113 MRI.setType(
I.getOperand(0).getReg(),
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2125 Register SrcReg =
I.getOperand(1).getReg();
2126 LLT SrcTy =
MRI.getType(SrcReg);
2127 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2131 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(
TII.get(AArch64::G_SITOF));
2135 I.setDesc(
TII.get(AArch64::G_UITOF));
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2156 Register DstReg =
I.getOperand(0).getReg();
2157 Register AddOp1Reg =
I.getOperand(1).getReg();
2158 const LLT PtrTy =
MRI.getType(DstReg);
2162 const LLT CastPtrTy =
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2173 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2191bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2196 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2197 const auto &MO =
I.getOperand(2);
2202 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2209 if (!Imm1Fn || !Imm2Fn)
2213 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2216 for (
auto &RenderFn : *Imm1Fn)
2218 for (
auto &RenderFn : *Imm2Fn)
2221 I.eraseFromParent();
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2245 LLT DefDstTy =
MRI.getType(DefDstReg);
2246 Register StoreSrcReg =
I.getOperand(0).getReg();
2247 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2258 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2259 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2263 I.getOperand(0).setReg(DefDstReg);
2267bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2268 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2269 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2275 switch (
I.getOpcode()) {
2276 case AArch64::G_DUP: {
2279 Register Src =
I.getOperand(1).getReg();
2284 Register Dst =
I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2289 ValAndVReg->Value.trunc(
MRI.getType(Dst).getScalarSizeInBits())));
2290 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2292 I.eraseFromParent();
2295 case TargetOpcode::G_SEXT:
2298 if (selectUSMovFromExtend(
I,
MRI))
2301 case TargetOpcode::G_BR:
2303 case TargetOpcode::G_SHL:
2304 return earlySelectSHL(
I,
MRI);
2305 case TargetOpcode::G_CONSTANT: {
2306 bool IsZero =
false;
2307 if (
I.getOperand(1).isCImm())
2308 IsZero =
I.getOperand(1).getCImm()->isZero();
2309 else if (
I.getOperand(1).isImm())
2310 IsZero =
I.getOperand(1).getImm() == 0;
2315 Register DefReg =
I.getOperand(0).getReg();
2316 LLT Ty =
MRI.getType(DefReg);
2318 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2319 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2321 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2322 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2326 I.setDesc(
TII.get(TargetOpcode::COPY));
2330 case TargetOpcode::G_ADD: {
2339 Register AddDst =
I.getOperand(0).getReg();
2340 Register AddLHS =
I.getOperand(1).getReg();
2341 Register AddRHS =
I.getOperand(2).getReg();
2343 LLT Ty =
MRI.getType(AddLHS);
2352 if (!
MRI.hasOneNonDBGUse(Reg))
2366 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2376 Cmp = MatchCmp(AddRHS);
2380 auto &PredOp =
Cmp->getOperand(1);
2385 emitIntegerCompare(
Cmp->getOperand(2),
2386 Cmp->getOperand(3), PredOp, MIB);
2387 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2388 I.eraseFromParent();
2391 case TargetOpcode::G_OR: {
2395 Register Dst =
I.getOperand(0).getReg();
2396 LLT Ty =
MRI.getType(Dst);
2415 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2418 int64_t Immr =
Size - ShiftImm;
2419 int64_t Imms =
Size - ShiftImm - 1;
2420 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2421 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2422 I.eraseFromParent();
2425 case TargetOpcode::G_FENCE: {
2426 if (
I.getOperand(1).getImm() == 0)
2430 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2431 I.eraseFromParent();
2440 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2441 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2448 if (Subtarget->requiresStrictAlign()) {
2450 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2456 unsigned Opcode =
I.getOpcode();
2458 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2461 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2464 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2465 const Register DefReg =
I.getOperand(0).getReg();
2466 const LLT DefTy =
MRI.getType(DefReg);
2469 MRI.getRegClassOrRegBank(DefReg);
2479 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2486 I.setDesc(
TII.get(TargetOpcode::PHI));
2488 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2494 if (
I.isDebugInstr())
2501 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2503 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2510 if (preISelLower(
I)) {
2511 Opcode =
I.getOpcode();
2522 if (selectImpl(
I, *CoverageInfo))
2526 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2529 case TargetOpcode::G_SBFX:
2530 case TargetOpcode::G_UBFX: {
2531 static const unsigned OpcTable[2][2] = {
2532 {AArch64::UBFMWri, AArch64::UBFMXri},
2533 {AArch64::SBFMWri, AArch64::SBFMXri}};
2534 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2536 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2539 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2542 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2543 auto LSB = Cst1->Value.getZExtValue();
2544 auto Width = Cst2->Value.getZExtValue();
2546 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2548 .
addImm(LSB + Width - 1);
2549 I.eraseFromParent();
2552 case TargetOpcode::G_BRCOND:
2553 return selectCompareBranch(
I, MF,
MRI);
2555 case TargetOpcode::G_BRINDIRECT: {
2557 if (std::optional<uint16_t> BADisc =
2558 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2562 MI.addReg(AArch64::XZR);
2563 I.eraseFromParent();
2566 I.setDesc(
TII.get(AArch64::BR));
2570 case TargetOpcode::G_BRJT:
2571 return selectBrJT(
I,
MRI);
2573 case AArch64::G_ADD_LOW: {
2579 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2580 I.setDesc(
TII.get(AArch64::ADDXri));
2585 "Expected small code model");
2587 auto Op2 =
I.getOperand(2);
2588 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2589 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2590 Op1.getTargetFlags())
2592 Op2.getTargetFlags());
2593 I.eraseFromParent();
2597 case TargetOpcode::G_FCONSTANT:
2598 case TargetOpcode::G_CONSTANT: {
2599 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2608 const Register DefReg =
I.getOperand(0).getReg();
2609 const LLT DefTy =
MRI.getType(DefReg);
2615 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2617 <<
" constant, expected: " << s16 <<
" or " << s32
2618 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2622 if (RB.
getID() != AArch64::FPRRegBankID) {
2624 <<
" constant on bank: " << RB
2625 <<
", expected: FPR\n");
2633 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2637 if (Ty != p0 && Ty != s8 && Ty != s16) {
2639 <<
" constant, expected: " << s32 <<
", " << s64
2640 <<
", or " << p0 <<
'\n');
2644 if (RB.
getID() != AArch64::GPRRegBankID) {
2646 <<
" constant on bank: " << RB
2647 <<
", expected: GPR\n");
2664 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2671 auto *FPImm =
I.getOperand(1).getFPImm();
2674 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2678 I.eraseFromParent();
2679 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2683 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2685 const Register DefGPRReg =
MRI.createVirtualRegister(
2686 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2692 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2693 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2701 }
else if (
I.getOperand(1).isCImm()) {
2702 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2703 I.getOperand(1).ChangeToImmediate(Val);
2704 }
else if (
I.getOperand(1).isImm()) {
2705 uint64_t Val =
I.getOperand(1).getImm();
2706 I.getOperand(1).ChangeToImmediate(Val);
2709 const unsigned MovOpc =
2710 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2711 I.setDesc(
TII.get(MovOpc));
2715 case TargetOpcode::G_EXTRACT: {
2716 Register DstReg =
I.getOperand(0).getReg();
2717 Register SrcReg =
I.getOperand(1).getReg();
2718 LLT SrcTy =
MRI.getType(SrcReg);
2719 LLT DstTy =
MRI.getType(DstReg);
2731 unsigned Offset =
I.getOperand(2).getImm();
2740 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2742 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2744 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2746 AArch64::GPR64RegClass, NewI->getOperand(0));
2747 I.eraseFromParent();
2753 unsigned LaneIdx =
Offset / 64;
2755 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2758 I.eraseFromParent();
2762 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2768 "unexpected G_EXTRACT types");
2775 .addReg(DstReg, 0, AArch64::sub_32);
2776 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2777 AArch64::GPR32RegClass,
MRI);
2778 I.getOperand(0).setReg(DstReg);
2783 case TargetOpcode::G_INSERT: {
2784 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2785 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2792 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2793 unsigned LSB =
I.getOperand(3).getImm();
2794 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2795 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2800 "unexpected G_INSERT types");
2806 TII.get(AArch64::SUBREG_TO_REG))
2809 .
addUse(
I.getOperand(2).getReg())
2810 .
addImm(AArch64::sub_32);
2811 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2812 AArch64::GPR32RegClass,
MRI);
2813 I.getOperand(2).setReg(SrcReg);
2817 case TargetOpcode::G_FRAME_INDEX: {
2824 I.setDesc(
TII.get(AArch64::ADDXri));
2833 case TargetOpcode::G_GLOBAL_VALUE: {
2836 if (
I.getOperand(1).isSymbol()) {
2837 OpFlags =
I.getOperand(1).getTargetFlags();
2841 GV =
I.getOperand(1).getGlobal();
2843 return selectTLSGlobalValue(
I,
MRI);
2844 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2848 I.setDesc(
TII.get(AArch64::LOADgot));
2849 I.getOperand(1).setTargetFlags(OpFlags);
2851 !
TM.isPositionIndependent()) {
2853 materializeLargeCMVal(
I, GV, OpFlags);
2854 I.eraseFromParent();
2857 I.setDesc(
TII.get(AArch64::ADR));
2858 I.getOperand(1).setTargetFlags(OpFlags);
2860 I.setDesc(
TII.get(AArch64::MOVaddr));
2863 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2869 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2870 return selectPtrAuthGlobalValue(
I,
MRI);
2872 case TargetOpcode::G_ZEXTLOAD:
2873 case TargetOpcode::G_LOAD:
2874 case TargetOpcode::G_STORE: {
2876 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2890 if (Order != AtomicOrdering::NotAtomic &&
2891 Order != AtomicOrdering::Unordered &&
2892 Order != AtomicOrdering::Monotonic) {
2893 assert(!isa<GZExtLoad>(LdSt));
2894 assert(MemSizeInBytes <= 8 &&
2895 "128-bit atomics should already be custom-legalized");
2897 if (isa<GLoad>(LdSt)) {
2898 static constexpr unsigned LDAPROpcodes[] = {
2899 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2900 static constexpr unsigned LDAROpcodes[] = {
2901 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2903 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2906 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2908 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2909 AArch64::STLRW, AArch64::STLRX};
2911 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2913 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2914 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2915 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2916 I.getOperand(0).setReg(NewVal);
2918 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2929 "Load/Store pointer operand isn't a GPR");
2930 assert(
MRI.getType(PtrReg).isPointer() &&
2931 "Load/Store pointer operand isn't a pointer");
2935 const LLT ValTy =
MRI.getType(ValReg);
2940 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2943 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2949 .addReg(ValReg, 0,
SubReg)
2951 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2953 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2956 if (RB.
getID() == AArch64::FPRRegBankID) {
2959 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2966 MRI.setRegBank(NewDst, RB);
2969 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2973 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2974 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2981 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2982 bool IsStore = isa<GStore>(
I);
2983 const unsigned NewOpc =
2985 if (NewOpc ==
I.getOpcode())
2989 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2992 I.setDesc(
TII.get(NewOpc));
2998 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2999 Register CurValReg =
I.getOperand(0).getReg();
3000 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3001 NewInst.cloneMemRefs(
I);
3002 for (
auto &Fn : *AddrModeFns)
3004 I.eraseFromParent();
3013 if (Opcode == TargetOpcode::G_STORE) {
3016 if (CVal && CVal->Value == 0) {
3018 case AArch64::STRWui:
3019 case AArch64::STRHHui:
3020 case AArch64::STRBBui:
3021 LoadStore->getOperand(0).setReg(AArch64::WZR);
3023 case AArch64::STRXui:
3024 LoadStore->getOperand(0).setReg(AArch64::XZR);
3030 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3031 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3034 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3038 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3043 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3046 .
addImm(AArch64::sub_32);
3048 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3054 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3055 case TargetOpcode::G_INDEXED_SEXTLOAD:
3056 return selectIndexedExtLoad(
I,
MRI);
3057 case TargetOpcode::G_INDEXED_LOAD:
3058 return selectIndexedLoad(
I,
MRI);
3059 case TargetOpcode::G_INDEXED_STORE:
3060 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3062 case TargetOpcode::G_LSHR:
3063 case TargetOpcode::G_ASHR:
3064 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3065 return selectVectorAshrLshr(
I,
MRI);
3067 case TargetOpcode::G_SHL:
3068 if (Opcode == TargetOpcode::G_SHL &&
3069 MRI.getType(
I.getOperand(0).getReg()).isVector())
3070 return selectVectorSHL(
I,
MRI);
3077 Register SrcReg =
I.getOperand(1).getReg();
3078 Register ShiftReg =
I.getOperand(2).getReg();
3079 const LLT ShiftTy =
MRI.getType(ShiftReg);
3080 const LLT SrcTy =
MRI.getType(SrcReg);
3085 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3086 .addReg(ShiftReg, 0, AArch64::sub_32);
3087 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3088 I.getOperand(2).setReg(Trunc.getReg(0));
3092 case TargetOpcode::G_OR: {
3099 const Register DefReg =
I.getOperand(0).getReg();
3103 if (NewOpc ==
I.getOpcode())
3106 I.setDesc(
TII.get(NewOpc));
3114 case TargetOpcode::G_PTR_ADD: {
3115 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3116 I.eraseFromParent();
3120 case TargetOpcode::G_SADDE:
3121 case TargetOpcode::G_UADDE:
3122 case TargetOpcode::G_SSUBE:
3123 case TargetOpcode::G_USUBE:
3124 case TargetOpcode::G_SADDO:
3125 case TargetOpcode::G_UADDO:
3126 case TargetOpcode::G_SSUBO:
3127 case TargetOpcode::G_USUBO:
3128 return selectOverflowOp(
I,
MRI);
3130 case TargetOpcode::G_PTRMASK: {
3131 Register MaskReg =
I.getOperand(2).getReg();
3138 I.setDesc(
TII.get(AArch64::ANDXri));
3139 I.getOperand(2).ChangeToImmediate(
3144 case TargetOpcode::G_PTRTOINT:
3145 case TargetOpcode::G_TRUNC: {
3146 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3147 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3149 const Register DstReg =
I.getOperand(0).getReg();
3150 const Register SrcReg =
I.getOperand(1).getReg();
3157 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3161 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3170 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3171 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3172 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3176 if (DstRC == SrcRC) {
3178 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3182 }
else if (DstRC == &AArch64::GPR32RegClass &&
3183 SrcRC == &AArch64::GPR64RegClass) {
3184 I.getOperand(1).setSubReg(AArch64::sub_32);
3187 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3191 I.setDesc(
TII.get(TargetOpcode::COPY));
3193 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3196 I.setDesc(
TII.get(AArch64::XTNv4i16));
3206 I.eraseFromParent();
3211 if (Opcode == TargetOpcode::G_PTRTOINT) {
3212 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3213 I.setDesc(
TII.get(TargetOpcode::COPY));
3221 case TargetOpcode::G_ANYEXT: {
3222 if (selectUSMovFromExtend(
I,
MRI))
3225 const Register DstReg =
I.getOperand(0).getReg();
3226 const Register SrcReg =
I.getOperand(1).getReg();
3229 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3231 <<
", expected: GPR\n");
3236 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3238 <<
", expected: GPR\n");
3242 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3245 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3249 if (DstSize != 64 && DstSize > 32) {
3251 <<
", expected: 32 or 64\n");
3257 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3262 .
addImm(AArch64::sub_32);
3263 I.getOperand(1).setReg(ExtSrc);
3268 case TargetOpcode::G_ZEXT:
3269 case TargetOpcode::G_SEXT_INREG:
3270 case TargetOpcode::G_SEXT: {
3271 if (selectUSMovFromExtend(
I,
MRI))
3274 unsigned Opcode =
I.getOpcode();
3275 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3276 const Register DefReg =
I.getOperand(0).getReg();
3277 Register SrcReg =
I.getOperand(1).getReg();
3278 const LLT DstTy =
MRI.getType(DefReg);
3279 const LLT SrcTy =
MRI.getType(SrcReg);
3285 if (Opcode == TargetOpcode::G_SEXT_INREG)
3286 SrcSize =
I.getOperand(2).getImm();
3292 AArch64::GPRRegBankID &&
3293 "Unexpected ext regbank");
3306 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3307 if (LoadMI && IsGPR) {
3309 unsigned BytesLoaded =
MemOp->getSize().getValue();
3316 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3318 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3319 const Register ZReg = AArch64::WZR;
3320 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3323 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3326 .
addImm(AArch64::sub_32);
3328 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3330 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3334 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3340 I.eraseFromParent();
3345 if (DstSize == 64) {
3346 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3348 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3354 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3355 {&AArch64::GPR64RegClass}, {})
3362 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3366 }
else if (DstSize <= 32) {
3367 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3376 I.eraseFromParent();
3380 case TargetOpcode::G_SITOFP:
3381 case TargetOpcode::G_UITOFP:
3382 case TargetOpcode::G_FPTOSI:
3383 case TargetOpcode::G_FPTOUI: {
3384 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3385 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3387 if (NewOpc == Opcode)
3390 I.setDesc(
TII.get(NewOpc));
3397 case TargetOpcode::G_FREEZE:
3400 case TargetOpcode::G_INTTOPTR:
3405 case TargetOpcode::G_BITCAST:
3413 case TargetOpcode::G_SELECT: {
3414 auto &Sel = cast<GSelect>(
I);
3415 const Register CondReg = Sel.getCondReg();
3416 const Register TReg = Sel.getTrueReg();
3417 const Register FReg = Sel.getFalseReg();
3419 if (tryOptSelect(Sel))
3424 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3425 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3428 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3430 Sel.eraseFromParent();
3433 case TargetOpcode::G_ICMP: {
3446 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3447 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3448 AArch64::WZR, InvCC, MIB);
3449 I.eraseFromParent();
3453 case TargetOpcode::G_FCMP: {
3456 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3458 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3460 I.eraseFromParent();
3463 case TargetOpcode::G_VASTART:
3464 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3465 : selectVaStartAAPCS(
I, MF,
MRI);
3466 case TargetOpcode::G_INTRINSIC:
3467 return selectIntrinsic(
I,
MRI);
3468 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3469 return selectIntrinsicWithSideEffects(
I,
MRI);
3470 case TargetOpcode::G_IMPLICIT_DEF: {
3471 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3472 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3473 const Register DstReg =
I.getOperand(0).getReg();
3476 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3479 case TargetOpcode::G_BLOCK_ADDR: {
3480 Function *BAFn =
I.getOperand(1).getBlockAddress()->getFunction();
3481 if (std::optional<uint16_t> BADisc =
3482 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3483 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3484 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3492 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
3493 AArch64::GPR64RegClass,
MRI);
3494 I.eraseFromParent();
3498 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3499 I.eraseFromParent();
3502 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3503 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3504 I.getOperand(0).getReg())
3508 I.getOperand(1).getBlockAddress(), 0,
3510 I.eraseFromParent();
3514 case AArch64::G_DUP: {
3520 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3521 AArch64::GPRRegBankID)
3523 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3525 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3527 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3529 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3531 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3536 case TargetOpcode::G_BUILD_VECTOR:
3537 return selectBuildVector(
I,
MRI);
3538 case TargetOpcode::G_MERGE_VALUES:
3540 case TargetOpcode::G_UNMERGE_VALUES:
3542 case TargetOpcode::G_SHUFFLE_VECTOR:
3543 return selectShuffleVector(
I,
MRI);
3544 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3545 return selectExtractElt(
I,
MRI);
3546 case TargetOpcode::G_CONCAT_VECTORS:
3547 return selectConcatVectors(
I,
MRI);
3548 case TargetOpcode::G_JUMP_TABLE:
3549 return selectJumpTable(
I,
MRI);
3550 case TargetOpcode::G_MEMCPY:
3551 case TargetOpcode::G_MEMCPY_INLINE:
3552 case TargetOpcode::G_MEMMOVE:
3553 case TargetOpcode::G_MEMSET:
3554 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3555 return selectMOPS(
I,
MRI);
3561bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3568bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3572 case TargetOpcode::G_MEMCPY:
3573 case TargetOpcode::G_MEMCPY_INLINE:
3574 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3576 case TargetOpcode::G_MEMMOVE:
3577 Mopcode = AArch64::MOPSMemoryMovePseudo;
3579 case TargetOpcode::G_MEMSET:
3581 Mopcode = AArch64::MOPSMemorySetPseudo;
3590 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3591 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3594 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3595 const auto &SrcValRegClass =
3596 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3599 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3600 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3601 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3611 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3612 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3614 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3615 {DstPtrCopy, SizeCopy, SrcValCopy});
3617 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3618 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3619 {DstPtrCopy, SrcValCopy, SizeCopy});
3628 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3629 Register JTAddr =
I.getOperand(0).getReg();
3630 unsigned JTI =
I.getOperand(1).getIndex();
3639 if (STI.isTargetMachO()) {
3644 assert(STI.isTargetELF() &&
3645 "jump table hardening only supported on MachO/ELF");
3653 I.eraseFromParent();
3657 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3658 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3660 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3661 {TargetReg, ScratchReg}, {JTAddr,
Index})
3662 .addJumpTableIndex(JTI);
3664 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3665 {
static_cast<int64_t
>(JTI)});
3667 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3668 I.eraseFromParent();
3672bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3674 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3675 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3677 Register DstReg =
I.getOperand(0).getReg();
3678 unsigned JTI =
I.getOperand(1).getIndex();
3681 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3684 I.eraseFromParent();
3688bool AArch64InstructionSelector::selectTLSGlobalValue(
3690 if (!STI.isTargetMachO())
3695 const auto &GlobalOp =
I.getOperand(1);
3696 assert(GlobalOp.getOffset() == 0 &&
3697 "Shouldn't have an offset on TLS globals!");
3701 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3704 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3705 {LoadGOT.getReg(0)})
3716 assert(Opcode == AArch64::BLR);
3717 Opcode = AArch64::BLRAAZ;
3726 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3728 I.eraseFromParent();
3732MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3735 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3737 auto BuildFn = [&](
unsigned SubregIndex) {
3741 .addImm(SubregIndex);
3749 return BuildFn(AArch64::bsub);
3751 return BuildFn(AArch64::hsub);
3753 return BuildFn(AArch64::ssub);
3755 return BuildFn(AArch64::dsub);
3762AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3765 LLT DstTy =
MRI.getType(DstReg);
3767 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3768 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3775 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3781 .addReg(SrcReg, 0,
SubReg);
3782 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3786bool AArch64InstructionSelector::selectMergeValues(
3788 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3789 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3790 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3794 if (
I.getNumOperands() != 3)
3801 Register DstReg =
I.getOperand(0).getReg();
3802 Register Src1Reg =
I.getOperand(1).getReg();
3803 Register Src2Reg =
I.getOperand(2).getReg();
3804 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3805 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3810 Src2Reg, 1, RB, MIB);
3815 I.eraseFromParent();
3819 if (RB.
getID() != AArch64::GPRRegBankID)
3825 auto *DstRC = &AArch64::GPR64RegClass;
3826 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3828 TII.get(TargetOpcode::SUBREG_TO_REG))
3831 .
addUse(
I.getOperand(1).getReg())
3832 .
addImm(AArch64::sub_32);
3833 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3836 TII.get(TargetOpcode::SUBREG_TO_REG))
3839 .
addUse(
I.getOperand(2).getReg())
3840 .
addImm(AArch64::sub_32);
3842 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3843 .
addDef(
I.getOperand(0).getReg())
3851 I.eraseFromParent();
3856 const unsigned EltSize) {
3861 CopyOpc = AArch64::DUPi8;
3862 ExtractSubReg = AArch64::bsub;
3865 CopyOpc = AArch64::DUPi16;
3866 ExtractSubReg = AArch64::hsub;
3869 CopyOpc = AArch64::DUPi32;
3870 ExtractSubReg = AArch64::ssub;
3873 CopyOpc = AArch64::DUPi64;
3874 ExtractSubReg = AArch64::dsub;
3878 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3884MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3885 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3888 unsigned CopyOpc = 0;
3889 unsigned ExtractSubReg = 0;
3892 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3897 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3899 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3904 const LLT &VecTy =
MRI.getType(VecReg);
3906 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3908 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3915 DstReg =
MRI.createVirtualRegister(DstRC);
3918 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3919 .addReg(VecReg, 0, ExtractSubReg);
3920 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3929 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3930 if (!ScalarToVector)
3936 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3940 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3944bool AArch64InstructionSelector::selectExtractElt(
3946 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3947 "unexpected opcode!");
3948 Register DstReg =
I.getOperand(0).getReg();
3949 const LLT NarrowTy =
MRI.getType(DstReg);
3950 const Register SrcReg =
I.getOperand(1).getReg();
3951 const LLT WideTy =
MRI.getType(SrcReg);
3954 "source register size too small!");
3955 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3959 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3961 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
3970 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3974 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3979 I.eraseFromParent();
3983bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3985 unsigned NumElts =
I.getNumOperands() - 1;
3986 Register SrcReg =
I.getOperand(NumElts).getReg();
3987 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3988 const LLT SrcTy =
MRI.getType(SrcReg);
3990 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3992 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3999 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4000 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4001 Register Dst =
I.getOperand(OpIdx).getReg();
4003 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4007 I.eraseFromParent();
4011bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4013 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4014 "unexpected opcode");
4017 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4018 AArch64::FPRRegBankID ||
4019 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4020 AArch64::FPRRegBankID) {
4021 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4022 "currently unsupported.\n");
4028 unsigned NumElts =
I.getNumOperands() - 1;
4029 Register SrcReg =
I.getOperand(NumElts).getReg();
4030 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4031 const LLT WideTy =
MRI.getType(SrcReg);
4034 "can only unmerge from vector or s128 types!");
4036 "source register size too small!");
4039 return selectSplitVectorUnmerge(
I,
MRI);
4043 unsigned CopyOpc = 0;
4044 unsigned ExtractSubReg = 0;
4055 unsigned NumInsertRegs = NumElts - 1;
4067 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4071 assert(Found &&
"expected to find last operand's subeg idx");
4072 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4073 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4075 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4079 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4082 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4099 Register CopyTo =
I.getOperand(0).getReg();
4100 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4101 .addReg(InsertRegs[0], 0, ExtractSubReg);
4105 unsigned LaneIdx = 1;
4106 for (
Register InsReg : InsertRegs) {
4107 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4120 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4126 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4127 I.eraseFromParent();
4131bool AArch64InstructionSelector::selectConcatVectors(
4133 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4134 "Unexpected opcode");
4135 Register Dst =
I.getOperand(0).getReg();
4136 Register Op1 =
I.getOperand(1).getReg();
4137 Register Op2 =
I.getOperand(2).getReg();
4138 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4141 I.eraseFromParent();
4146AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4155MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4163 RC = &AArch64::FPR128RegClass;
4164 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4167 RC = &AArch64::FPR64RegClass;
4168 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4171 RC = &AArch64::FPR32RegClass;
4172 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4175 RC = &AArch64::FPR16RegClass;
4176 Opc = AArch64::LDRHui;
4179 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4185 auto &MF = MIRBuilder.
getMF();
4186 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4187 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4189 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4192 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4195 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4196 .addConstantPoolIndex(
4212static std::pair<unsigned, unsigned>
4214 unsigned Opc, SubregIdx;
4215 if (RB.
getID() == AArch64::GPRRegBankID) {
4217 Opc = AArch64::INSvi8gpr;
4218 SubregIdx = AArch64::bsub;
4219 }
else if (EltSize == 16) {
4220 Opc = AArch64::INSvi16gpr;
4221 SubregIdx = AArch64::ssub;
4222 }
else if (EltSize == 32) {
4223 Opc = AArch64::INSvi32gpr;
4224 SubregIdx = AArch64::ssub;
4225 }
else if (EltSize == 64) {
4226 Opc = AArch64::INSvi64gpr;
4227 SubregIdx = AArch64::dsub;
4233 Opc = AArch64::INSvi8lane;
4234 SubregIdx = AArch64::bsub;
4235 }
else if (EltSize == 16) {
4236 Opc = AArch64::INSvi16lane;
4237 SubregIdx = AArch64::hsub;
4238 }
else if (EltSize == 32) {
4239 Opc = AArch64::INSvi32lane;
4240 SubregIdx = AArch64::ssub;
4241 }
else if (EltSize == 64) {
4242 Opc = AArch64::INSvi64lane;
4243 SubregIdx = AArch64::dsub;
4248 return std::make_pair(Opc, SubregIdx);
4252 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4254 const ComplexRendererFns &RenderFns)
const {
4255 assert(Opcode &&
"Expected an opcode?");
4257 "Function should only be used to produce selected instructions!");
4258 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4260 for (
auto &Fn : *RenderFns)
4267 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4271 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4272 auto Ty =
MRI.getType(
LHS.getReg());
4275 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4276 bool Is32Bit =
Size == 32;
4279 if (
auto Fns = selectArithImmed(RHS))
4280 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4284 if (
auto Fns = selectNegArithImmed(RHS))
4285 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4289 if (
auto Fns = selectArithExtendedRegister(RHS))
4290 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4294 if (
auto Fns = selectShiftedRegister(RHS))
4295 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4297 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4305 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4306 {{AArch64::ADDXri, AArch64::ADDWri},
4307 {AArch64::ADDXrs, AArch64::ADDWrs},
4308 {AArch64::ADDXrr, AArch64::ADDWrr},
4309 {AArch64::SUBXri, AArch64::SUBWri},
4310 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4311 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4318 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4319 {{AArch64::ADDSXri, AArch64::ADDSWri},
4320 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4321 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4322 {AArch64::SUBSXri, AArch64::SUBSWri},
4323 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4324 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4331 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4332 {{AArch64::SUBSXri, AArch64::SUBSWri},
4333 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4334 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4335 {AArch64::ADDSXri, AArch64::ADDSWri},
4336 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4337 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4344 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4346 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4347 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4348 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4355 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4357 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4358 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4359 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4366 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4367 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4368 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4374 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4378 bool Is32Bit = (
RegSize == 32);
4379 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4380 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4381 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4385 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4388 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4395 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4396 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4397 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4400MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4403 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4410 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4412 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4414 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4415 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4418MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4422 LLT Ty =
MRI.getType(Dst);
4424 "Expected a 32-bit scalar register?");
4426 const Register ZReg = AArch64::WZR;
4431 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4437 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4438 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4439 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4444MachineInstr *AArch64InstructionSelector::emitFPCompare(
4446 std::optional<CmpInst::Predicate> Pred)
const {
4448 LLT Ty =
MRI.getType(LHS);
4452 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4463 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4467 ShouldUseImm =
true;
4471 unsigned CmpOpcTbl[2][3] = {
4472 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4473 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4475 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4487MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4496 const LLT Op1Ty =
MRI.getType(Op1);
4497 const LLT Op2Ty =
MRI.getType(Op2);
4499 if (Op1Ty != Op2Ty) {
4500 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4503 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4506 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4522 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4524 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4525 if (!WidenedOp1 || !WidenedOp2) {
4526 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4531 unsigned InsertOpc, InsSubRegIdx;
4532 std::tie(InsertOpc, InsSubRegIdx) =
4536 Dst =
MRI.createVirtualRegister(DstRC);
4557 Size =
TRI.getRegSizeInBits(*RC);
4559 Size =
MRI.getType(Dst).getSizeInBits();
4561 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4562 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4563 unsigned Opc = OpcTable[
Size == 64];
4564 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4572 unsigned Opcode =
I.getOpcode();
4576 bool NeedsNegatedCarry =
4577 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4587 if (SrcMI ==
I.getPrevNode()) {
4588 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4589 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4590 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4591 CarrySrcMI->isUnsigned() &&
4592 CarrySrcMI->getCarryOutReg() == CarryReg &&
4593 selectAndRestoreState(*SrcMI))
4598 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4600 if (NeedsNegatedCarry) {
4603 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4607 auto Fns = select12BitValueWithLeftShift(1);
4608 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4611bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4613 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4615 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4617 emitCarryIn(
I, CarryInMI->getCarryInReg());
4621 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4622 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4624 Register CarryOutReg = CarryMI.getCarryOutReg();
4627 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4633 emitCSINC(CarryOutReg, ZReg, ZReg,
4634 getInvertedCondCode(OpAndCC.second), MIB);
4637 I.eraseFromParent();
4641std::pair<MachineInstr *, AArch64CC::CondCode>
4642AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4649 case TargetOpcode::G_SADDO:
4650 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4651 case TargetOpcode::G_UADDO:
4652 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4653 case TargetOpcode::G_SSUBO:
4654 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4655 case TargetOpcode::G_USUBO:
4656 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4657 case TargetOpcode::G_SADDE:
4658 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4659 case TargetOpcode::G_UADDE:
4660 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4661 case TargetOpcode::G_SSUBE:
4662 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4663 case TargetOpcode::G_USUBE:
4664 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4684 unsigned Depth = 0) {
4685 if (!
MRI.hasOneNonDBGUse(Val))