43#include "llvm/IR/IntrinsicsAArch64.h"
51#define DEBUG_TYPE "aarch64-isel"
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
86 ProduceNonFlagSettingCondBr =
129 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
193 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
195 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
197 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
199 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
212 unsigned emitConstantPoolEntry(
const Constant *CPVal,
231 std::optional<CmpInst::Predicate> = std::nullopt)
const;
234 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
235 std::initializer_list<llvm::SrcOp> SrcOps,
237 const ComplexRendererFns &RenderFns = std::nullopt)
const;
272 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
293 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
315 std::pair<MachineInstr *, AArch64CC::CondCode>
350 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
351 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
352 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
353 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
355 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
357 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
360 unsigned Size)
const;
362 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
363 return selectAddrModeUnscaled(Root, 1);
365 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
366 return selectAddrModeUnscaled(Root, 2);
368 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
369 return selectAddrModeUnscaled(Root, 4);
371 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
372 return selectAddrModeUnscaled(Root, 8);
374 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
375 return selectAddrModeUnscaled(Root, 16);
380 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
384 unsigned Size)
const;
386 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
387 return selectAddrModeIndexed(Root, Width / 8);
394 unsigned SizeInBytes)
const;
402 bool WantsExt)
const;
403 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
405 unsigned SizeInBytes)
const;
407 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
408 return selectAddrModeXRO(Root, Width / 8);
412 unsigned SizeInBytes)
const;
414 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
415 return selectAddrModeWRO(Root, Width / 8);
419 bool AllowROR =
false)
const;
421 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
422 return selectShiftedRegister(Root);
425 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
426 return selectShiftedRegister(Root,
true);
436 bool IsLoadStore =
false)
const;
447 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
452 int OpIdx = -1)
const;
454 int OpIdx = -1)
const;
456 int OpIdx = -1)
const;
458 int OpIdx = -1)
const;
460 int OpIdx = -1)
const;
462 int OpIdx = -1)
const;
465 int OpIdx = -1)
const;
471 bool tryOptSelect(
GSelect &Sel);
478 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
491 bool ProduceNonFlagSettingCondBr =
false;
500#define GET_GLOBALISEL_PREDICATES_DECL
501#include "AArch64GenGlobalISel.inc"
502#undef GET_GLOBALISEL_PREDICATES_DECL
506#define GET_GLOBALISEL_TEMPORARIES_DECL
507#include "AArch64GenGlobalISel.inc"
508#undef GET_GLOBALISEL_TEMPORARIES_DECL
513#define GET_GLOBALISEL_IMPL
514#include "AArch64GenGlobalISel.inc"
515#undef GET_GLOBALISEL_IMPL
517AArch64InstructionSelector::AArch64InstructionSelector(
520 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
523#include
"AArch64GenGlobalISel.inc"
526#include
"AArch64GenGlobalISel.inc"
538 bool GetAllRegSet =
false) {
539 if (RB.
getID() == AArch64::GPRRegBankID) {
541 return GetAllRegSet ? &AArch64::GPR32allRegClass
542 : &AArch64::GPR32RegClass;
544 return GetAllRegSet ? &AArch64::GPR64allRegClass
545 : &AArch64::GPR64RegClass;
547 return &AArch64::XSeqPairsClassRegClass;
551 if (RB.
getID() == AArch64::FPRRegBankID) {
554 return &AArch64::FPR8RegClass;
556 return &AArch64::FPR16RegClass;
558 return &AArch64::FPR32RegClass;
560 return &AArch64::FPR64RegClass;
562 return &AArch64::FPR128RegClass;
574 bool GetAllRegSet =
false) {
575 unsigned RegBankID = RB.
getID();
577 if (RegBankID == AArch64::GPRRegBankID) {
578 if (SizeInBits <= 32)
579 return GetAllRegSet ? &AArch64::GPR32allRegClass
580 : &AArch64::GPR32RegClass;
581 if (SizeInBits == 64)
582 return GetAllRegSet ? &AArch64::GPR64allRegClass
583 : &AArch64::GPR64RegClass;
584 if (SizeInBits == 128)
585 return &AArch64::XSeqPairsClassRegClass;
588 if (RegBankID == AArch64::FPRRegBankID) {
589 switch (SizeInBits) {
593 return &AArch64::FPR8RegClass;
595 return &AArch64::FPR16RegClass;
597 return &AArch64::FPR32RegClass;
599 return &AArch64::FPR64RegClass;
601 return &AArch64::FPR128RegClass;
611 switch (
TRI.getRegSizeInBits(*RC)) {
619 if (RC != &AArch64::FPR32RegClass)
629 dbgs() <<
"Couldn't find appropriate subregister for register class.");
638 switch (RB.
getID()) {
639 case AArch64::GPRRegBankID:
641 case AArch64::FPRRegBankID:
664 const unsigned RegClassIDs[],
666 unsigned NumRegs = Regs.
size();
669 assert(NumRegs >= 2 && NumRegs <= 4 &&
670 "Only support between two and 4 registers in a tuple!");
672 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
674 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
675 for (
unsigned I = 0,
E = Regs.
size();
I <
E; ++
I) {
676 RegSequence.addUse(Regs[
I]);
677 RegSequence.addImm(SubRegs[
I]);
679 return RegSequence.getReg(0);
684 static const unsigned RegClassIDs[] = {
685 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
686 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
687 AArch64::dsub2, AArch64::dsub3};
688 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
693 static const unsigned RegClassIDs[] = {
694 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
695 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
696 AArch64::qsub2, AArch64::qsub3};
697 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
702 auto &
MBB = *
MI.getParent();
704 auto &
MRI = MF.getRegInfo();
710 else if (Root.
isReg()) {
715 Immed = ValAndVReg->Value.getSExtValue();
731 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
738 for (
auto &MO :
I.operands()) {
741 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
749 if (!MO.getReg().isVirtual()) {
750 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
760 if (PrevOpBank && OpBank != PrevOpBank) {
761 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
776 case AArch64::GPRRegBankID:
778 switch (GenericOpc) {
779 case TargetOpcode::G_SHL:
780 return AArch64::LSLVWr;
781 case TargetOpcode::G_LSHR:
782 return AArch64::LSRVWr;
783 case TargetOpcode::G_ASHR:
784 return AArch64::ASRVWr;
788 }
else if (OpSize == 64) {
789 switch (GenericOpc) {
790 case TargetOpcode::G_PTR_ADD:
791 return AArch64::ADDXrr;
792 case TargetOpcode::G_SHL:
793 return AArch64::LSLVXr;
794 case TargetOpcode::G_LSHR:
795 return AArch64::LSRVXr;
796 case TargetOpcode::G_ASHR:
797 return AArch64::ASRVXr;
803 case AArch64::FPRRegBankID:
806 switch (GenericOpc) {
807 case TargetOpcode::G_FADD:
808 return AArch64::FADDSrr;
809 case TargetOpcode::G_FSUB:
810 return AArch64::FSUBSrr;
811 case TargetOpcode::G_FMUL:
812 return AArch64::FMULSrr;
813 case TargetOpcode::G_FDIV:
814 return AArch64::FDIVSrr;
819 switch (GenericOpc) {
820 case TargetOpcode::G_FADD:
821 return AArch64::FADDDrr;
822 case TargetOpcode::G_FSUB:
823 return AArch64::FSUBDrr;
824 case TargetOpcode::G_FMUL:
825 return AArch64::FMULDrr;
826 case TargetOpcode::G_FDIV:
827 return AArch64::FDIVDrr;
828 case TargetOpcode::G_OR:
829 return AArch64::ORRv8i8;
846 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
848 case AArch64::GPRRegBankID:
851 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
853 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
855 return isStore ? AArch64::STRWui : AArch64::LDRWui;
857 return isStore ? AArch64::STRXui : AArch64::LDRXui;
860 case AArch64::FPRRegBankID:
863 return isStore ? AArch64::STRBui : AArch64::LDRBui;
865 return isStore ? AArch64::STRHui : AArch64::LDRHui;
867 return isStore ? AArch64::STRSui : AArch64::LDRSui;
869 return isStore ? AArch64::STRDui : AArch64::LDRDui;
871 return isStore ? AArch64::STRQui : AArch64::LDRQui;
885 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
886 assert(To &&
"Destination register class cannot be null");
893 RegOp.
setReg(SubRegCopy.getReg(0));
897 if (!
I.getOperand(0).getReg().isPhysical())
907static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
911 Register DstReg =
I.getOperand(0).getReg();
912 Register SrcReg =
I.getOperand(1).getReg();
926 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
927 SrcSize = DstSize = 32;
944 if (Reg.isPhysical())
946 LLT Ty =
MRI.getType(Reg);
952 RC = getRegClassForTypeOnBank(Ty, RB);
955 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
968 Register DstReg =
I.getOperand(0).getReg();
969 Register SrcReg =
I.getOperand(1).getReg();
988 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
992 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
993 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
1004 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1006 }
else if (SrcSize > DstSize) {
1013 }
else if (DstSize > SrcSize) {
1020 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1022 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1027 RegOp.
setReg(PromoteReg);
1046 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1047 I.setDesc(
TII.get(AArch64::COPY));
1048 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1052 I.setDesc(
TII.get(AArch64::COPY));
1067 switch (GenericOpc) {
1068 case TargetOpcode::G_SITOFP:
1069 return AArch64::SCVTFUWSri;
1070 case TargetOpcode::G_UITOFP:
1071 return AArch64::UCVTFUWSri;
1072 case TargetOpcode::G_FPTOSI:
1073 return AArch64::FCVTZSUWSr;
1074 case TargetOpcode::G_FPTOUI:
1075 return AArch64::FCVTZUUWSr;
1080 switch (GenericOpc) {
1081 case TargetOpcode::G_SITOFP:
1082 return AArch64::SCVTFUXSri;
1083 case TargetOpcode::G_UITOFP:
1084 return AArch64::UCVTFUXSri;
1085 case TargetOpcode::G_FPTOSI:
1086 return AArch64::FCVTZSUWDr;
1087 case TargetOpcode::G_FPTOUI:
1088 return AArch64::FCVTZUUWDr;
1098 switch (GenericOpc) {
1099 case TargetOpcode::G_SITOFP:
1100 return AArch64::SCVTFUWDri;
1101 case TargetOpcode::G_UITOFP:
1102 return AArch64::UCVTFUWDri;
1103 case TargetOpcode::G_FPTOSI:
1104 return AArch64::FCVTZSUXSr;
1105 case TargetOpcode::G_FPTOUI:
1106 return AArch64::FCVTZUUXSr;
1111 switch (GenericOpc) {
1112 case TargetOpcode::G_SITOFP:
1113 return AArch64::SCVTFUXDri;
1114 case TargetOpcode::G_UITOFP:
1115 return AArch64::UCVTFUXDri;
1116 case TargetOpcode::G_FPTOSI:
1117 return AArch64::FCVTZSUXDr;
1118 case TargetOpcode::G_FPTOUI:
1119 return AArch64::FCVTZUUXDr;
1138 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1139 "Expected both select operands to have the same regbank?");
1140 LLT Ty =
MRI.getType(True);
1145 "Expected 32 bit or 64 bit select only?");
1146 const bool Is32Bit =
Size == 32;
1147 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1148 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1149 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1155 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1157 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1172 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1189 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1208 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1224 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1230 if (!TrueCst && !FalseCst)
1233 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1234 if (TrueCst && FalseCst) {
1235 int64_t
T = TrueCst->Value.getSExtValue();
1236 int64_t
F = FalseCst->Value.getSExtValue();
1238 if (
T == 0 &&
F == 1) {
1240 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1246 if (
T == 0 &&
F == -1) {
1248 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1256 int64_t
T = TrueCst->Value.getSExtValue();
1259 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1268 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1277 int64_t
F = FalseCst->Value.getSExtValue();
1280 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1295 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1296 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1417 assert(Reg.isValid() &&
"Expected valid register!");
1418 bool HasZext =
false;
1420 unsigned Opc =
MI->getOpcode();
1422 if (!
MI->getOperand(0).isReg() ||
1423 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1430 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1431 Opc == TargetOpcode::G_TRUNC) {
1432 if (Opc == TargetOpcode::G_ZEXT)
1435 Register NextReg =
MI->getOperand(1).getReg();
1437 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1446 std::optional<uint64_t>
C;
1451 case TargetOpcode::G_AND:
1452 case TargetOpcode::G_XOR: {
1453 TestReg =
MI->getOperand(1).getReg();
1454 Register ConstantReg =
MI->getOperand(2).getReg();
1465 C = VRegAndVal->Value.getZExtValue();
1467 C = VRegAndVal->Value.getSExtValue();
1471 case TargetOpcode::G_ASHR:
1472 case TargetOpcode::G_LSHR:
1473 case TargetOpcode::G_SHL: {
1474 TestReg =
MI->getOperand(1).getReg();
1478 C = VRegAndVal->Value.getSExtValue();
1490 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1494 case TargetOpcode::G_AND:
1496 if ((*
C >> Bit) & 1)
1499 case TargetOpcode::G_SHL:
1502 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1507 case TargetOpcode::G_ASHR:
1512 if (Bit >= TestRegSize)
1513 Bit = TestRegSize - 1;
1515 case TargetOpcode::G_LSHR:
1517 if ((Bit + *
C) < TestRegSize) {
1522 case TargetOpcode::G_XOR:
1531 if ((*
C >> Bit) & 1)
1550 assert(ProduceNonFlagSettingCondBr &&
1551 "Cannot emit TB(N)Z with speculation tracking!");
1556 LLT Ty =
MRI.getType(TestReg);
1559 assert(Bit < 64 &&
"Bit is too large!");
1563 bool UseWReg =
Bit < 32;
1564 unsigned NecessarySize = UseWReg ? 32 : 64;
1565 if (
Size != NecessarySize)
1566 TestReg = moveScalarRegClass(
1567 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1570 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1571 {AArch64::TBZW, AArch64::TBNZW}};
1572 unsigned Opc = OpcTable[UseWReg][IsNegative];
1579bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1582 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1609 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1616 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1624 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1626 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1627 AArch64::GPRRegBankID &&
1628 "Expected GPRs only?");
1629 auto Ty =
MRI.getType(CompareReg);
1632 assert(Width <= 64 &&
"Expected width to be at most 64?");
1633 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1634 {AArch64::CBNZW, AArch64::CBNZX}};
1635 unsigned Opc = OpcTable[IsNegative][
Width == 64];
1636 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1641bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1644 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1656 I.eraseFromParent();
1660bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1663 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1669 if (!ProduceNonFlagSettingCondBr)
1688 if (VRegAndVal && !AndInst) {
1689 int64_t
C = VRegAndVal->Value.getSExtValue();
1695 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1696 I.eraseFromParent();
1704 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1705 I.eraseFromParent();
1713 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1714 I.eraseFromParent();
1728 if (VRegAndVal && VRegAndVal->Value == 0) {
1736 tryOptAndIntoCompareBranch(
1738 I.eraseFromParent();
1743 auto LHSTy =
MRI.getType(LHS);
1744 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1746 I.eraseFromParent();
1755bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1758 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1759 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1769 I.eraseFromParent();
1773bool AArch64InstructionSelector::selectCompareBranch(
1775 Register CondReg =
I.getOperand(0).getReg();
1780 if (CCMIOpc == TargetOpcode::G_FCMP)
1781 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1782 if (CCMIOpc == TargetOpcode::G_ICMP)
1783 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1788 if (ProduceNonFlagSettingCondBr) {
1789 emitTestBit(CondReg, 0,
true,
1790 I.getOperand(1).getMBB(), MIB);
1791 I.eraseFromParent();
1801 .
addMBB(
I.getOperand(1).getMBB());
1802 I.eraseFromParent();
1810 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1821 return std::nullopt;
1823 int64_t Imm = *ShiftImm;
1825 return std::nullopt;
1829 return std::nullopt;
1832 return std::nullopt;
1836 return std::nullopt;
1840 return std::nullopt;
1844 return std::nullopt;
1850bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1852 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1853 Register DstReg =
I.getOperand(0).getReg();
1854 const LLT Ty =
MRI.getType(DstReg);
1855 Register Src1Reg =
I.getOperand(1).getReg();
1856 Register Src2Reg =
I.getOperand(2).getReg();
1867 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1869 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1871 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1873 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1875 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1877 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1879 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1885 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1891 I.eraseFromParent();
1895bool AArch64InstructionSelector::selectVectorAshrLshr(
1897 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1898 I.getOpcode() == TargetOpcode::G_LSHR);
1899 Register DstReg =
I.getOperand(0).getReg();
1900 const LLT Ty =
MRI.getType(DstReg);
1901 Register Src1Reg =
I.getOperand(1).getReg();
1902 Register Src2Reg =
I.getOperand(2).getReg();
1907 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1917 unsigned NegOpc = 0;
1919 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1921 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1922 NegOpc = AArch64::NEGv2i64;
1924 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1925 NegOpc = AArch64::NEGv4i32;
1927 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1928 NegOpc = AArch64::NEGv2i32;
1930 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1931 NegOpc = AArch64::NEGv4i16;
1933 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1934 NegOpc = AArch64::NEGv8i16;
1936 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1937 NegOpc = AArch64::NEGv16i8;
1939 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1940 NegOpc = AArch64::NEGv8i8;
1946 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1948 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1950 I.eraseFromParent();
1954bool AArch64InstructionSelector::selectVaStartAAPCS(
1959bool AArch64InstructionSelector::selectVaStartDarwin(
1962 Register ListReg =
I.getOperand(0).getReg();
1964 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1975 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
1983 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
1990 I.eraseFromParent();
1994void AArch64InstructionSelector::materializeLargeCMVal(
2000 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2011 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2013 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2015 GV, MovZ->getOperand(1).getOffset(), Flags));
2019 MovZ->getOperand(1).getOffset(), Flags));
2025 Register DstReg = BuildMovK(MovZ.getReg(0),
2031bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2036 switch (
I.getOpcode()) {
2037 case TargetOpcode::G_STORE: {
2038 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2046 SrcOp.setReg(NewSrc);
2047 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2052 case TargetOpcode::G_PTR_ADD:
2053 return convertPtrAddToAdd(
I,
MRI);
2054 case TargetOpcode::G_LOAD: {
2059 Register DstReg =
I.getOperand(0).getReg();
2060 const LLT DstTy =
MRI.getType(DstReg);
2066 case AArch64::G_DUP: {
2068 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2072 MRI.setType(
I.getOperand(0).getReg(),
2074 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2075 I.getOperand(1).setReg(NewSrc.getReg(0));
2078 case TargetOpcode::G_UITOFP:
2079 case TargetOpcode::G_SITOFP: {
2084 Register SrcReg =
I.getOperand(1).getReg();
2085 LLT SrcTy =
MRI.getType(SrcReg);
2086 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2090 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2091 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2092 I.setDesc(
TII.get(AArch64::G_SITOF));
2094 I.setDesc(
TII.get(AArch64::G_UITOF));
2112bool AArch64InstructionSelector::convertPtrAddToAdd(
2114 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2115 Register DstReg =
I.getOperand(0).getReg();
2116 Register AddOp1Reg =
I.getOperand(1).getReg();
2117 const LLT PtrTy =
MRI.getType(DstReg);
2121 const LLT CastPtrTy =
2126 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2128 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2132 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2133 MRI.setType(DstReg, CastPtrTy);
2134 I.getOperand(1).setReg(PtrToInt.getReg(0));
2135 if (!select(*PtrToInt)) {
2136 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2145 I.getOperand(2).setReg(NegatedReg);
2146 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2150bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2155 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2156 const auto &MO =
I.getOperand(2);
2161 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2165 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2166 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2168 if (!Imm1Fn || !Imm2Fn)
2172 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2175 for (
auto &RenderFn : *Imm1Fn)
2177 for (
auto &RenderFn : *Imm2Fn)
2180 I.eraseFromParent();
2184bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2186 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2204 LLT DefDstTy =
MRI.getType(DefDstReg);
2205 Register StoreSrcReg =
I.getOperand(0).getReg();
2206 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2217 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2218 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2222 I.getOperand(0).setReg(DefDstReg);
2226bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2227 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2228 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2234 switch (
I.getOpcode()) {
2235 case AArch64::G_DUP: {
2238 Register Src =
I.getOperand(1).getReg();
2243 Register Dst =
I.getOperand(0).getReg();
2245 MRI.getType(Dst).getNumElements(),
2247 ValAndVReg->Value));
2248 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2250 I.eraseFromParent();
2253 case TargetOpcode::G_SEXT:
2256 if (selectUSMovFromExtend(
I,
MRI))
2259 case TargetOpcode::G_BR:
2261 case TargetOpcode::G_SHL:
2262 return earlySelectSHL(
I,
MRI);
2263 case TargetOpcode::G_CONSTANT: {
2264 bool IsZero =
false;
2265 if (
I.getOperand(1).isCImm())
2266 IsZero =
I.getOperand(1).getCImm()->isZero();
2267 else if (
I.getOperand(1).isImm())
2268 IsZero =
I.getOperand(1).getImm() == 0;
2273 Register DefReg =
I.getOperand(0).getReg();
2274 LLT Ty =
MRI.getType(DefReg);
2276 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2277 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2279 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2280 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2284 I.setDesc(
TII.get(TargetOpcode::COPY));
2288 case TargetOpcode::G_ADD: {
2297 Register AddDst =
I.getOperand(0).getReg();
2298 Register AddLHS =
I.getOperand(1).getReg();
2299 Register AddRHS =
I.getOperand(2).getReg();
2301 LLT Ty =
MRI.getType(AddLHS);
2310 if (!
MRI.hasOneNonDBGUse(Reg))
2324 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2334 Cmp = MatchCmp(AddRHS);
2338 auto &PredOp =
Cmp->getOperand(1);
2343 emitIntegerCompare(
Cmp->getOperand(2),
2344 Cmp->getOperand(3), PredOp, MIB);
2345 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2346 I.eraseFromParent();
2349 case TargetOpcode::G_OR: {
2353 Register Dst =
I.getOperand(0).getReg();
2354 LLT Ty =
MRI.getType(Dst);
2373 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2376 int64_t Immr =
Size - ShiftImm;
2377 int64_t Imms =
Size - ShiftImm - 1;
2378 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2379 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2380 I.eraseFromParent();
2383 case TargetOpcode::G_FENCE: {
2384 if (
I.getOperand(1).getImm() == 0)
2388 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2389 I.eraseFromParent();
2398 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2399 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2406 if (Subtarget->requiresStrictAlign()) {
2408 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2414 unsigned Opcode =
I.getOpcode();
2416 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2419 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2422 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2423 const Register DefReg =
I.getOperand(0).getReg();
2424 const LLT DefTy =
MRI.getType(DefReg);
2427 MRI.getRegClassOrRegBank(DefReg);
2437 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2444 I.setDesc(
TII.get(TargetOpcode::PHI));
2446 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2452 if (
I.isDebugInstr())
2459 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2461 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2468 if (preISelLower(
I)) {
2469 Opcode =
I.getOpcode();
2480 if (selectImpl(
I, *CoverageInfo))
2484 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2487 case TargetOpcode::G_SBFX:
2488 case TargetOpcode::G_UBFX: {
2489 static const unsigned OpcTable[2][2] = {
2490 {AArch64::UBFMWri, AArch64::UBFMXri},
2491 {AArch64::SBFMWri, AArch64::SBFMXri}};
2492 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2494 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2497 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2500 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2501 auto LSB = Cst1->Value.getZExtValue();
2502 auto Width = Cst2->Value.getZExtValue();
2504 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2506 .
addImm(LSB + Width - 1);
2507 I.eraseFromParent();
2510 case TargetOpcode::G_BRCOND:
2511 return selectCompareBranch(
I, MF,
MRI);
2513 case TargetOpcode::G_BRINDIRECT: {
2514 I.setDesc(
TII.get(AArch64::BR));
2518 case TargetOpcode::G_BRJT:
2519 return selectBrJT(
I,
MRI);
2521 case AArch64::G_ADD_LOW: {
2527 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2528 I.setDesc(
TII.get(AArch64::ADDXri));
2533 "Expected small code model");
2535 auto Op2 =
I.getOperand(2);
2536 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2537 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2538 Op1.getTargetFlags())
2540 Op2.getTargetFlags());
2541 I.eraseFromParent();
2545 case TargetOpcode::G_BSWAP: {
2547 Register DstReg =
I.getOperand(0).getReg();
2548 LLT DstTy =
MRI.getType(DstReg);
2553 LLVM_DEBUG(
dbgs() <<
"Dst type for G_BSWAP currently unsupported.\n");
2560 if (NumElts != 4 && NumElts != 2) {
2561 LLVM_DEBUG(
dbgs() <<
"Unsupported number of elements for G_BSWAP.\n");
2571 : AArch64::REV32v16i8;
2572 else if (EltSize == 64)
2573 Opc = AArch64::REV64v16i8;
2576 assert(Opc != 0 &&
"Didn't get an opcode for G_BSWAP?");
2578 I.setDesc(
TII.get(Opc));
2582 case TargetOpcode::G_FCONSTANT:
2583 case TargetOpcode::G_CONSTANT: {
2584 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2593 const Register DefReg =
I.getOperand(0).getReg();
2594 const LLT DefTy =
MRI.getType(DefReg);
2600 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2602 <<
" constant, expected: " << s16 <<
" or " << s32
2603 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2607 if (RB.
getID() != AArch64::FPRRegBankID) {
2609 <<
" constant on bank: " << RB
2610 <<
", expected: FPR\n");
2618 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2622 if (Ty != p0 && Ty != s8 && Ty != s16) {
2624 <<
" constant, expected: " << s32 <<
", " << s64
2625 <<
", or " << p0 <<
'\n');
2629 if (RB.
getID() != AArch64::GPRRegBankID) {
2631 <<
" constant on bank: " << RB
2632 <<
", expected: GPR\n");
2649 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2656 auto *FPImm =
I.getOperand(1).getFPImm();
2657 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2659 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2663 I.eraseFromParent();
2664 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2668 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2670 const Register DefGPRReg =
MRI.createVirtualRegister(
2671 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2677 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2678 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2686 }
else if (
I.getOperand(1).isCImm()) {
2687 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2688 I.getOperand(1).ChangeToImmediate(Val);
2689 }
else if (
I.getOperand(1).isImm()) {
2690 uint64_t Val =
I.getOperand(1).getImm();
2691 I.getOperand(1).ChangeToImmediate(Val);
2694 const unsigned MovOpc =
2695 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2696 I.setDesc(
TII.get(MovOpc));
2700 case TargetOpcode::G_EXTRACT: {
2701 Register DstReg =
I.getOperand(0).getReg();
2702 Register SrcReg =
I.getOperand(1).getReg();
2703 LLT SrcTy =
MRI.getType(SrcReg);
2704 LLT DstTy =
MRI.getType(DstReg);
2716 unsigned Offset =
I.getOperand(2).getImm();
2725 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2727 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2729 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2731 AArch64::GPR64RegClass, NewI->getOperand(0));
2732 I.eraseFromParent();
2738 unsigned LaneIdx =
Offset / 64;
2740 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2743 I.eraseFromParent();
2747 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2753 "unexpected G_EXTRACT types");
2760 .addReg(DstReg, 0, AArch64::sub_32);
2761 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2762 AArch64::GPR32RegClass,
MRI);
2763 I.getOperand(0).setReg(DstReg);
2768 case TargetOpcode::G_INSERT: {
2769 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2770 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2777 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2778 unsigned LSB =
I.getOperand(3).getImm();
2779 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2780 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2785 "unexpected G_INSERT types");
2791 TII.get(AArch64::SUBREG_TO_REG))
2794 .
addUse(
I.getOperand(2).getReg())
2795 .
addImm(AArch64::sub_32);
2796 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2797 AArch64::GPR32RegClass,
MRI);
2798 I.getOperand(2).setReg(SrcReg);
2802 case TargetOpcode::G_FRAME_INDEX: {
2809 I.setDesc(
TII.get(AArch64::ADDXri));
2818 case TargetOpcode::G_GLOBAL_VALUE: {
2819 auto GV =
I.getOperand(1).getGlobal();
2820 if (GV->isThreadLocal())
2821 return selectTLSGlobalValue(
I,
MRI);
2823 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2825 I.setDesc(
TII.get(AArch64::LOADgot));
2826 I.getOperand(1).setTargetFlags(OpFlags);
2829 materializeLargeCMVal(
I, GV, OpFlags);
2830 I.eraseFromParent();
2833 I.setDesc(
TII.get(AArch64::ADR));
2834 I.getOperand(1).setTargetFlags(OpFlags);
2836 I.setDesc(
TII.get(AArch64::MOVaddr));
2839 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2845 case TargetOpcode::G_ZEXTLOAD:
2846 case TargetOpcode::G_LOAD:
2847 case TargetOpcode::G_STORE: {
2849 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2863 if (Order != AtomicOrdering::NotAtomic &&
2864 Order != AtomicOrdering::Unordered &&
2865 Order != AtomicOrdering::Monotonic) {
2866 assert(!isa<GZExtLoad>(LdSt));
2867 if (MemSizeInBytes > 64)
2870 if (isa<GLoad>(LdSt)) {
2871 static constexpr unsigned LDAPROpcodes[] = {
2872 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2873 static constexpr unsigned LDAROpcodes[] = {
2874 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2876 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2879 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2881 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2882 AArch64::STLRW, AArch64::STLRX};
2884 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2886 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2887 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2888 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2889 I.getOperand(0).setReg(NewVal);
2891 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2902 "Load/Store pointer operand isn't a GPR");
2903 assert(
MRI.getType(PtrReg).isPointer() &&
2904 "Load/Store pointer operand isn't a pointer");
2908 const LLT ValTy =
MRI.getType(ValReg);
2913 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2916 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2922 .addReg(ValReg, 0,
SubReg)
2924 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2926 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2929 if (RB.
getID() == AArch64::FPRRegBankID) {
2932 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2939 MRI.setRegBank(NewDst, RB);
2942 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2946 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2947 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2954 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2955 bool IsStore = isa<GStore>(
I);
2956 const unsigned NewOpc =
2958 if (NewOpc ==
I.getOpcode())
2962 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2965 I.setDesc(
TII.get(NewOpc));
2971 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2972 Register CurValReg =
I.getOperand(0).getReg();
2973 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2974 NewInst.cloneMemRefs(
I);
2975 for (
auto &Fn : *AddrModeFns)
2977 I.eraseFromParent();
2986 if (Opcode == TargetOpcode::G_STORE) {
2989 if (CVal && CVal->Value == 0) {
2991 case AArch64::STRWui:
2992 case AArch64::STRHHui:
2993 case AArch64::STRBBui:
2994 LoadStore->getOperand(0).setReg(AArch64::WZR);
2996 case AArch64::STRXui:
2997 LoadStore->getOperand(0).setReg(AArch64::XZR);
3006 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3010 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3015 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3018 .
addImm(AArch64::sub_32);
3020 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3026 case TargetOpcode::G_SMULH:
3027 case TargetOpcode::G_UMULH: {
3032 const Register DefReg =
I.getOperand(0).getReg();
3035 if (RB.
getID() != AArch64::GPRRegBankID) {
3036 LLVM_DEBUG(
dbgs() <<
"G_[SU]MULH on bank: " << RB <<
", expected: GPR\n");
3046 unsigned NewOpc =
I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3048 I.setDesc(
TII.get(NewOpc));
3054 case TargetOpcode::G_LSHR:
3055 case TargetOpcode::G_ASHR:
3056 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3057 return selectVectorAshrLshr(
I,
MRI);
3059 case TargetOpcode::G_SHL:
3060 if (Opcode == TargetOpcode::G_SHL &&
3061 MRI.getType(
I.getOperand(0).getReg()).isVector())
3062 return selectVectorSHL(
I,
MRI);
3069 Register SrcReg =
I.getOperand(1).getReg();
3070 Register ShiftReg =
I.getOperand(2).getReg();
3071 const LLT ShiftTy =
MRI.getType(ShiftReg);
3072 const LLT SrcTy =
MRI.getType(SrcReg);
3077 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3078 .addReg(ShiftReg, 0, AArch64::sub_32);
3079 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3080 I.getOperand(2).setReg(Trunc.getReg(0));
3084 case TargetOpcode::G_OR: {
3091 const Register DefReg =
I.getOperand(0).getReg();
3095 if (NewOpc ==
I.getOpcode())
3098 I.setDesc(
TII.get(NewOpc));
3106 case TargetOpcode::G_PTR_ADD: {
3107 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3108 I.eraseFromParent();
3112 case TargetOpcode::G_SADDE:
3113 case TargetOpcode::G_UADDE:
3114 case TargetOpcode::G_SSUBE:
3115 case TargetOpcode::G_USUBE:
3116 case TargetOpcode::G_SADDO:
3117 case TargetOpcode::G_UADDO:
3118 case TargetOpcode::G_SSUBO:
3119 case TargetOpcode::G_USUBO:
3120 return selectOverflowOp(
I,
MRI);
3122 case TargetOpcode::G_PTRMASK: {
3123 Register MaskReg =
I.getOperand(2).getReg();
3130 I.setDesc(
TII.get(AArch64::ANDXri));
3131 I.getOperand(2).ChangeToImmediate(
3136 case TargetOpcode::G_PTRTOINT:
3137 case TargetOpcode::G_TRUNC: {
3138 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3139 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3141 const Register DstReg =
I.getOperand(0).getReg();
3142 const Register SrcReg =
I.getOperand(1).getReg();
3149 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3153 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3162 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3163 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3164 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3168 if (DstRC == SrcRC) {
3170 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3174 }
else if (DstRC == &AArch64::GPR32RegClass &&
3175 SrcRC == &AArch64::GPR64RegClass) {
3176 I.getOperand(1).setSubReg(AArch64::sub_32);
3179 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3183 I.setDesc(
TII.get(TargetOpcode::COPY));
3185 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3188 I.setDesc(
TII.get(AArch64::XTNv4i16));
3198 I.eraseFromParent();
3203 if (Opcode == TargetOpcode::G_PTRTOINT) {
3204 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3205 I.setDesc(
TII.get(TargetOpcode::COPY));
3213 case TargetOpcode::G_ANYEXT: {
3214 if (selectUSMovFromExtend(
I,
MRI))
3217 const Register DstReg =
I.getOperand(0).getReg();
3218 const Register SrcReg =
I.getOperand(1).getReg();
3221 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3223 <<
", expected: GPR\n");
3228 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3230 <<
", expected: GPR\n");
3234 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3237 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3241 if (DstSize != 64 && DstSize > 32) {
3243 <<
", expected: 32 or 64\n");
3249 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3254 .
addImm(AArch64::sub_32);
3255 I.getOperand(1).setReg(ExtSrc);
3260 case TargetOpcode::G_ZEXT:
3261 case TargetOpcode::G_SEXT_INREG:
3262 case TargetOpcode::G_SEXT: {
3263 if (selectUSMovFromExtend(
I,
MRI))
3266 unsigned Opcode =
I.getOpcode();
3267 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3268 const Register DefReg =
I.getOperand(0).getReg();
3269 Register SrcReg =
I.getOperand(1).getReg();
3270 const LLT DstTy =
MRI.getType(DefReg);
3271 const LLT SrcTy =
MRI.getType(SrcReg);
3277 if (Opcode == TargetOpcode::G_SEXT_INREG)
3278 SrcSize =
I.getOperand(2).getImm();
3284 AArch64::GPRRegBankID &&
3285 "Unexpected ext regbank");
3298 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3299 if (LoadMI && IsGPR) {
3301 unsigned BytesLoaded =
MemOp->getSize();
3308 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3310 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3311 const Register ZReg = AArch64::WZR;
3312 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3315 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3318 .
addImm(AArch64::sub_32);
3320 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3322 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3326 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3332 I.eraseFromParent();
3337 if (DstSize == 64) {
3338 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3340 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3346 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3347 {&AArch64::GPR64RegClass}, {})
3354 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3358 }
else if (DstSize <= 32) {
3359 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3368 I.eraseFromParent();
3372 case TargetOpcode::G_SITOFP:
3373 case TargetOpcode::G_UITOFP:
3374 case TargetOpcode::G_FPTOSI:
3375 case TargetOpcode::G_FPTOUI: {
3376 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3377 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3379 if (NewOpc == Opcode)
3382 I.setDesc(
TII.get(NewOpc));
3389 case TargetOpcode::G_FREEZE:
3392 case TargetOpcode::G_INTTOPTR:
3397 case TargetOpcode::G_BITCAST:
3405 case TargetOpcode::G_SELECT: {
3406 auto &Sel = cast<GSelect>(
I);
3407 const Register CondReg = Sel.getCondReg();
3408 const Register TReg = Sel.getTrueReg();
3409 const Register FReg = Sel.getFalseReg();
3411 if (tryOptSelect(Sel))
3416 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3417 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3420 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3422 Sel.eraseFromParent();
3425 case TargetOpcode::G_ICMP: {
3427 return selectVectorICmp(
I,
MRI);
3438 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3439 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3440 AArch64::WZR, InvCC, MIB);
3441 I.eraseFromParent();
3445 case TargetOpcode::G_FCMP: {
3448 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3450 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3452 I.eraseFromParent();
3455 case TargetOpcode::G_VASTART:
3456 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3457 : selectVaStartAAPCS(
I, MF,
MRI);
3458 case TargetOpcode::G_INTRINSIC:
3459 return selectIntrinsic(
I,
MRI);
3460 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3461 return selectIntrinsicWithSideEffects(
I,
MRI);
3462 case TargetOpcode::G_IMPLICIT_DEF: {
3463 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3464 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3465 const Register DstReg =
I.getOperand(0).getReg();
3468 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3471 case TargetOpcode::G_BLOCK_ADDR: {
3473 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3474 I.eraseFromParent();
3477 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3478 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3479 I.getOperand(0).getReg())
3483 I.getOperand(1).getBlockAddress(), 0,
3485 I.eraseFromParent();
3489 case AArch64::G_DUP: {
3495 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3496 AArch64::GPRRegBankID)
3498 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3500 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3502 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3504 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3506 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3511 case TargetOpcode::G_BUILD_VECTOR:
3512 return selectBuildVector(
I,
MRI);
3513 case TargetOpcode::G_MERGE_VALUES:
3515 case TargetOpcode::G_UNMERGE_VALUES:
3517 case TargetOpcode::G_SHUFFLE_VECTOR:
3518 return selectShuffleVector(
I,
MRI);
3519 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3520 return selectExtractElt(
I,
MRI);
3521 case TargetOpcode::G_INSERT_VECTOR_ELT:
3522 return selectInsertElt(
I,
MRI);
3523 case TargetOpcode::G_CONCAT_VECTORS:
3524 return selectConcatVectors(
I,
MRI);
3525 case TargetOpcode::G_JUMP_TABLE:
3526 return selectJumpTable(
I,
MRI);
3527 case TargetOpcode::G_VECREDUCE_ADD:
3528 return selectReduction(
I,
MRI);
3529 case TargetOpcode::G_MEMCPY:
3530 case TargetOpcode::G_MEMCPY_INLINE:
3531 case TargetOpcode::G_MEMMOVE:
3532 case TargetOpcode::G_MEMSET:
3533 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3534 return selectMOPS(
I,
MRI);
3540bool AArch64InstructionSelector::selectReduction(
MachineInstr &
I,
3542 Register VecReg =
I.getOperand(1).getReg();
3543 LLT VecTy =
MRI.getType(VecReg);
3544 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3548 Register DstReg =
I.getOperand(0).getReg();
3549 auto AddP = MIB.
buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3552 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3554 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass,
MRI);
3555 I.eraseFromParent();
3561 Opc = AArch64::ADDVv16i8v;
3563 Opc = AArch64::ADDVv8i16v;
3565 Opc = AArch64::ADDVv4i32v;
3567 Opc = AArch64::ADDPv2i64p;
3572 I.setDesc(
TII.get(Opc));
3579bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3583 case TargetOpcode::G_MEMCPY:
3584 case TargetOpcode::G_MEMCPY_INLINE:
3585 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3587 case TargetOpcode::G_MEMMOVE:
3588 Mopcode = AArch64::MOPSMemoryMovePseudo;
3590 case TargetOpcode::G_MEMSET:
3592 Mopcode = AArch64::MOPSMemorySetPseudo;
3601 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3602 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3605 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3606 const auto &SrcValRegClass =
3607 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3610 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3611 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3612 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3622 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3623 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3625 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3626 {DstPtrCopy, SizeCopy, SrcValCopy});
3628 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3629 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3630 {DstPtrCopy, SrcValCopy, SizeCopy});
3639 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3640 Register JTAddr =
I.getOperand(0).getReg();
3641 unsigned JTI =
I.getOperand(1).getIndex();
3644 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3645 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3648 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3649 {TargetReg, ScratchReg}, {JTAddr,
Index})
3650 .addJumpTableIndex(JTI);
3652 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3653 {
static_cast<int64_t
>(JTI)});
3655 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3656 I.eraseFromParent();
3660bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3662 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3663 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3665 Register DstReg =
I.getOperand(0).getReg();
3666 unsigned JTI =
I.getOperand(1).getIndex();
3669 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3672 I.eraseFromParent();
3676bool AArch64InstructionSelector::selectTLSGlobalValue(
3678 if (!STI.isTargetMachO())
3683 const auto &GlobalOp =
I.getOperand(1);
3684 assert(GlobalOp.getOffset() == 0 &&
3685 "Shouldn't have an offset on TLS globals!");
3689 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3692 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3693 {LoadGOT.getReg(0)})
3706 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3708 I.eraseFromParent();
3712bool AArch64InstructionSelector::selectVectorICmp(
3714 Register DstReg =
I.getOperand(0).getReg();
3715 LLT DstTy =
MRI.getType(DstReg);
3716 Register SrcReg =
I.getOperand(2).getReg();
3717 Register Src2Reg =
I.getOperand(3).getReg();
3718 LLT SrcTy =
MRI.getType(SrcReg);
3743 unsigned PredIdx = 0;
3744 bool SwapOperands =
false;
3759 SwapOperands =
true;
3763 SwapOperands =
true;
3773 SwapOperands =
true;
3777 SwapOperands =
true;
3787 static const unsigned OpcTable[4][4][9] = {
3795 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3796 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3797 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3798 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3799 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3800 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3806 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3807 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3808 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3809 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3810 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3811 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3817 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3818 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3819 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3820 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3821 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3822 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3831 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3832 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3833 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3845 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3846 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3847 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3855 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3857 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3861 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3863 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3878 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3879 I.eraseFromParent();
3883MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3886 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3888 auto BuildFn = [&](
unsigned SubregIndex) {
3892 .addImm(SubregIndex);
3900 return BuildFn(AArch64::bsub);
3902 return BuildFn(AArch64::hsub);
3904 return BuildFn(AArch64::ssub);
3906 return BuildFn(AArch64::dsub);
3913AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3916 LLT DstTy =
MRI.getType(DstReg);
3918 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3919 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3926 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3932 .addReg(SrcReg, 0,
SubReg);
3933 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3937bool AArch64InstructionSelector::selectMergeValues(
3939 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3940 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3941 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3945 if (
I.getNumOperands() != 3)
3952 Register DstReg =
I.getOperand(0).getReg();
3953 Register Src1Reg =
I.getOperand(1).getReg();
3954 Register Src2Reg =
I.getOperand(2).getReg();
3955 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3956 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3961 Src2Reg, 1, RB, MIB);
3966 I.eraseFromParent();
3970 if (RB.
getID() != AArch64::GPRRegBankID)
3976 auto *DstRC = &AArch64::GPR64RegClass;
3977 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3979 TII.get(TargetOpcode::SUBREG_TO_REG))
3982 .
addUse(
I.getOperand(1).getReg())
3983 .
addImm(AArch64::sub_32);
3984 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3987 TII.get(TargetOpcode::SUBREG_TO_REG))
3990 .
addUse(
I.getOperand(2).getReg())
3991 .
addImm(AArch64::sub_32);
3993 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3994 .
addDef(
I.getOperand(0).getReg())
4002 I.eraseFromParent();
4007 const unsigned EltSize) {
4012 CopyOpc = AArch64::DUPi8;
4013 ExtractSubReg = AArch64::bsub;
4016 CopyOpc = AArch64::DUPi16;
4017 ExtractSubReg = AArch64::hsub;
4020 CopyOpc = AArch64::DUPi32;
4021 ExtractSubReg = AArch64::ssub;
4024 CopyOpc = AArch64::DUPi64;
4025 ExtractSubReg = AArch64::dsub;
4029 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
4035MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4036 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
4039 unsigned CopyOpc = 0;
4040 unsigned ExtractSubReg = 0;
4043 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
4048 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
4050 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
4055 const LLT &VecTy =
MRI.getType(VecReg);
4057 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4059 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4066 DstReg =
MRI.createVirtualRegister(DstRC);
4069 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4070 .addReg(VecReg, 0, ExtractSubReg);
4071 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4080 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4081 if (!ScalarToVector)
4087 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4091 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4095bool AArch64InstructionSelector::selectExtractElt(
4097 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4098 "unexpected opcode!");
4099 Register DstReg =
I.getOperand(0).getReg();
4100 const LLT NarrowTy =
MRI.getType(DstReg);
4101 const Register SrcReg =
I.getOperand(1).getReg();
4102 const LLT WideTy =
MRI.getType(SrcReg);
4105 "source register size too small!");
4106 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4110 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4112 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4121 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4125 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4130 I.eraseFromParent();
4134bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4136 unsigned NumElts =
I.getNumOperands() - 1;
4137 Register SrcReg =
I.getOperand(NumElts).getReg();
4138 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4139 const LLT SrcTy =
MRI.getType(SrcReg);
4141 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4143 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4150 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4151 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4152 Register Dst =
I.getOperand(OpIdx).getReg();
4154 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4158 I.eraseFromParent();
4162bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4164 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4165 "unexpected opcode");
4168 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4169 AArch64::FPRRegBankID ||
4170 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4171 AArch64::FPRRegBankID) {
4172 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4173 "currently unsupported.\n");
4179 unsigned NumElts =
I.getNumOperands() - 1;
4180 Register SrcReg =
I.getOperand(NumElts).getReg();
4181 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4182 const LLT WideTy =
MRI.getType(SrcReg);
4185 "can only unmerge from vector or s128 types!");
4187 "source register size too small!");
4190 return selectSplitVectorUnmerge(
I,
MRI);
4194 unsigned CopyOpc = 0;
4195 unsigned ExtractSubReg = 0;
4206 unsigned NumInsertRegs = NumElts - 1;
4218 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4222 assert(Found &&
"expected to find last operand's subeg idx");
4223 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4224 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4226 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4230 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4233 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4250 Register CopyTo =
I.getOperand(0).getReg();
4251 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4252 .addReg(InsertRegs[0], 0, ExtractSubReg);
4256 unsigned LaneIdx = 1;
4257 for (
Register InsReg : InsertRegs) {
4258 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4271 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4277 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4278 I.eraseFromParent();
4282bool AArch64InstructionSelector::selectConcatVectors(
4284 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4285 "Unexpected opcode");
4286 Register Dst =
I.getOperand(0).getReg();
4287 Register Op1 =
I.getOperand(1).getReg();
4288 Register Op2 =
I.getOperand(2).getReg();
4289 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4292 I.eraseFromParent();
4297AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4306MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4314 RC = &AArch64::FPR128RegClass;
4315 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4318 RC = &AArch64::FPR64RegClass;
4319 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4322 RC = &AArch64::FPR32RegClass;
4323 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4326 RC = &AArch64::FPR16RegClass;
4327 Opc = AArch64::LDRHui;
4330 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4336 auto &MF = MIRBuilder.
getMF();
4337 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4338 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4340 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4343 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4346 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4347 .addConstantPoolIndex(
4363static std::pair<unsigned, unsigned>
4365 unsigned Opc, SubregIdx;
4366 if (RB.
getID() == AArch64::GPRRegBankID) {
4368 Opc = AArch64::INSvi8gpr;
4369 SubregIdx = AArch64::bsub;
4370 }
else if (EltSize == 16) {
4371 Opc = AArch64::INSvi16gpr;
4372 SubregIdx = AArch64::ssub;
4373 }
else if (EltSize == 32) {
4374 Opc = AArch64::INSvi32gpr;
4375 SubregIdx = AArch64::ssub;
4376 }
else if (EltSize == 64) {
4377 Opc = AArch64::INSvi64gpr;
4378 SubregIdx = AArch64::dsub;
4384 Opc = AArch64::INSvi8lane;
4385 SubregIdx = AArch64::bsub;
4386 }
else if (EltSize == 16) {
4387 Opc = AArch64::INSvi16lane;
4388 SubregIdx = AArch64::hsub;
4389 }
else if (EltSize == 32) {
4390 Opc = AArch64::INSvi32lane;
4391 SubregIdx = AArch64::ssub;
4392 }
else if (EltSize == 64) {
4393 Opc = AArch64::INSvi64lane;
4394 SubregIdx = AArch64::dsub;
4399 return std::make_pair(Opc, SubregIdx);
4403 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4405 const ComplexRendererFns &RenderFns)
const {
4406 assert(Opcode &&
"Expected an opcode?");
4408 "Function should only be used to produce selected instructions!");
4409 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4411 for (
auto &Fn : *RenderFns)
4418 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4422 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4423 auto Ty =
MRI.getType(
LHS.getReg());
4426 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4427 bool Is32Bit =
Size == 32;
4430 if (
auto Fns = selectArithImmed(RHS))
4431 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4435 if (
auto Fns = selectNegArithImmed(RHS))
4436 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4440 if (
auto Fns = selectArithExtendedRegister(RHS))
4441 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4445 if (
auto Fns = selectShiftedRegister(RHS))
4446 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4448 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4456 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4457 {{AArch64::ADDXri, AArch64::ADDWri},
4458 {AArch64::ADDXrs, AArch64::ADDWrs},
4459 {AArch64::ADDXrr, AArch64::ADDWrr},
4460 {AArch64::SUBXri, AArch64::SUBWri},
4461 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4462 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4469 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4470 {{AArch64::ADDSXri, AArch64::ADDSWri},
4471 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4472 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4473 {AArch64::SUBSXri, AArch64::SUBSWri},
4474 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4475 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4482 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4483 {{AArch64::SUBSXri, AArch64::SUBSWri},
4484 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4485 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4486 {AArch64::ADDSXri, AArch64::ADDSWri},
4487 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4488 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4495 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4497 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4498 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4499 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4506 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4508 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4509 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4510 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4517 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4518 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4519 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4525 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4529 bool Is32Bit = (
RegSize == 32);
4530 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4531 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4532 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4536 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4539 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4546 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4547 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4548 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4551MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4554 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4561 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4563 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4565 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4566 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4569MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4573 LLT Ty =
MRI.getType(Dst);
4575 "Expected a 32-bit scalar register?");
4577 const Register ZReg = AArch64::WZR;
4582 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4588 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4589 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4590 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4595MachineInstr *AArch64InstructionSelector::emitFPCompare(
4597 std::optional<CmpInst::Predicate> Pred)
const {
4599 LLT Ty =
MRI.getType(LHS);
4603 if (OpSize != 32 && OpSize != 64)
4615 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4619 ShouldUseImm =
true;
4623 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4624 {AArch64::FCMPSri, AArch64::FCMPDri}};
4625 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4637MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4646 const LLT Op1Ty =
MRI.getType(Op1);
4647 const LLT Op2Ty =
MRI.getType(Op2);
4649 if (Op1Ty != Op2Ty) {
4650 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4653 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4656 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4672 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4674 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4675 if (!WidenedOp1 || !WidenedOp2) {
4676 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4681 unsigned InsertOpc, InsSubRegIdx;
4682 std::tie(InsertOpc, InsSubRegIdx) =
4686 Dst =
MRI.createVirtualRegister(DstRC);
4707 Size =
TRI.getRegSizeInBits(*RC);
4709 Size =
MRI.getType(Dst).getSizeInBits();
4711 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4712 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4713 unsigned Opc = OpcTable[
Size == 64];
4714 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);