43#include "llvm/IR/IntrinsicsAArch64.h"
51#define DEBUG_TYPE "aarch64-isel"
54using namespace MIPatternMatch;
55using namespace AArch64GISelUtils;
64#define GET_GLOBALISEL_PREDICATE_BITSET
65#include "AArch64GenGlobalISel.inc"
66#undef GET_GLOBALISEL_PREDICATE_BITSET
86 ProduceNonFlagSettingCondBr =
129 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
187 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
202 unsigned emitConstantPoolEntry(
const Constant *CPVal,
221 std::optional<CmpInst::Predicate> = std::nullopt)
const;
224 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
227 const ComplexRendererFns &RenderFns = std::nullopt)
const;
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
279 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
296 std::pair<MachineInstr *, AArch64CC::CondCode>
329 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
330 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
331 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
332 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
334 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
336 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
339 unsigned Size)
const;
341 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
342 return selectAddrModeUnscaled(Root, 1);
344 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
345 return selectAddrModeUnscaled(Root, 2);
347 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
348 return selectAddrModeUnscaled(Root, 4);
350 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
351 return selectAddrModeUnscaled(Root, 8);
353 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
354 return selectAddrModeUnscaled(Root, 16);
359 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
363 unsigned Size)
const;
365 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
366 return selectAddrModeIndexed(Root, Width / 8);
373 unsigned SizeInBytes)
const;
381 bool WantsExt)
const;
382 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
384 unsigned SizeInBytes)
const;
386 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
387 return selectAddrModeXRO(Root, Width / 8);
391 unsigned SizeInBytes)
const;
393 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
394 return selectAddrModeWRO(Root, Width / 8);
398 bool AllowROR =
false)
const;
400 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
401 return selectShiftedRegister(Root);
404 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
405 return selectShiftedRegister(Root,
true);
415 bool IsLoadStore =
false)
const;
426 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
429 int OpIdx = -1)
const;
431 int OpIdx = -1)
const;
433 int OpIdx = -1)
const;
435 int OpIdx = -1)
const;
437 int OpIdx = -1)
const;
439 int OpIdx = -1)
const;
442 int OpIdx = -1)
const;
448 bool tryOptSelect(
GSelect &Sel);
455 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
468 bool ProduceNonFlagSettingCondBr =
false;
477#define GET_GLOBALISEL_PREDICATES_DECL
478#include "AArch64GenGlobalISel.inc"
479#undef GET_GLOBALISEL_PREDICATES_DECL
483#define GET_GLOBALISEL_TEMPORARIES_DECL
484#include "AArch64GenGlobalISel.inc"
485#undef GET_GLOBALISEL_TEMPORARIES_DECL
490#define GET_GLOBALISEL_IMPL
491#include "AArch64GenGlobalISel.inc"
492#undef GET_GLOBALISEL_IMPL
494AArch64InstructionSelector::AArch64InstructionSelector(
497 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
500#include
"AArch64GenGlobalISel.inc"
503#include
"AArch64GenGlobalISel.inc"
515 bool GetAllRegSet =
false) {
516 if (RB.
getID() == AArch64::GPRRegBankID) {
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
524 return &AArch64::XSeqPairsClassRegClass;
528 if (RB.
getID() == AArch64::FPRRegBankID) {
531 return &AArch64::FPR8RegClass;
533 return &AArch64::FPR16RegClass;
535 return &AArch64::FPR32RegClass;
537 return &AArch64::FPR64RegClass;
539 return &AArch64::FPR128RegClass;
551 bool GetAllRegSet =
false) {
552 unsigned RegBankID = RB.
getID();
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
570 return &AArch64::FPR8RegClass;
572 return &AArch64::FPR16RegClass;
574 return &AArch64::FPR32RegClass;
576 return &AArch64::FPR64RegClass;
578 return &AArch64::FPR128RegClass;
588 switch (
TRI.getRegSizeInBits(*RC)) {
596 if (RC != &AArch64::FPR32RegClass)
606 dbgs() <<
"Couldn't find appropriate subregister for register class.");
615 switch (RB.
getID()) {
616 case AArch64::GPRRegBankID:
618 case AArch64::FPRRegBankID:
641 const unsigned RegClassIDs[],
643 unsigned NumRegs = Regs.
size();
646 assert(NumRegs >= 2 && NumRegs <= 4 &&
647 "Only support between two and 4 registers in a tuple!");
649 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
651 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (
unsigned I = 0,
E = Regs.
size();
I <
E; ++
I) {
653 RegSequence.addUse(Regs[
I]);
654 RegSequence.addImm(SubRegs[
I]);
656 return RegSequence.getReg(0);
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
679 auto &
MBB = *
MI.getParent();
681 auto &
MRI = MF.getRegInfo();
687 else if (Root.
isReg()) {
692 Immed = ValAndVReg->Value.getSExtValue();
708 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
715 for (
auto &MO :
I.operands()) {
718 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
726 if (!MO.getReg().isVirtual()) {
727 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
753 case AArch64::GPRRegBankID:
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
765 }
else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
780 case AArch64::FPRRegBankID:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
825 case AArch64::GPRRegBankID:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
837 case AArch64::FPRRegBankID:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
862 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
863 assert(To &&
"Destination register class cannot be null");
870 RegOp.
setReg(SubRegCopy.getReg(0));
874 if (!
I.getOperand(0).getReg().isPhysical())
884static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
888 Register DstReg =
I.getOperand(0).getReg();
889 Register SrcReg =
I.getOperand(1).getReg();
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
921 if (Reg.isPhysical())
923 LLT Ty =
MRI.getType(Reg);
929 RC = getRegClassForTypeOnBank(Ty, RB);
932 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
945 Register DstReg =
I.getOperand(0).getReg();
946 Register SrcReg =
I.getOperand(1).getReg();
965 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
969 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
970 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
981 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
983 }
else if (SrcSize > DstSize) {
990 }
else if (DstSize > SrcSize) {
997 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
999 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1004 RegOp.
setReg(PromoteReg);
1023 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1024 I.setDesc(
TII.get(AArch64::COPY));
1025 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1029 I.setDesc(
TII.get(AArch64::COPY));
1044 switch (GenericOpc) {
1045 case TargetOpcode::G_SITOFP:
1046 return AArch64::SCVTFUWSri;
1047 case TargetOpcode::G_UITOFP:
1048 return AArch64::UCVTFUWSri;
1049 case TargetOpcode::G_FPTOSI:
1050 return AArch64::FCVTZSUWSr;
1051 case TargetOpcode::G_FPTOUI:
1052 return AArch64::FCVTZUUWSr;
1057 switch (GenericOpc) {
1058 case TargetOpcode::G_SITOFP:
1059 return AArch64::SCVTFUXSri;
1060 case TargetOpcode::G_UITOFP:
1061 return AArch64::UCVTFUXSri;
1062 case TargetOpcode::G_FPTOSI:
1063 return AArch64::FCVTZSUWDr;
1064 case TargetOpcode::G_FPTOUI:
1065 return AArch64::FCVTZUUWDr;
1075 switch (GenericOpc) {
1076 case TargetOpcode::G_SITOFP:
1077 return AArch64::SCVTFUWDri;
1078 case TargetOpcode::G_UITOFP:
1079 return AArch64::UCVTFUWDri;
1080 case TargetOpcode::G_FPTOSI:
1081 return AArch64::FCVTZSUXSr;
1082 case TargetOpcode::G_FPTOUI:
1083 return AArch64::FCVTZUUXSr;
1088 switch (GenericOpc) {
1089 case TargetOpcode::G_SITOFP:
1090 return AArch64::SCVTFUXDri;
1091 case TargetOpcode::G_UITOFP:
1092 return AArch64::UCVTFUXDri;
1093 case TargetOpcode::G_FPTOSI:
1094 return AArch64::FCVTZSUXDr;
1095 case TargetOpcode::G_FPTOUI:
1096 return AArch64::FCVTZUUXDr;
1115 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1116 "Expected both select operands to have the same regbank?");
1117 LLT Ty =
MRI.getType(True);
1122 "Expected 32 bit or 64 bit select only?");
1123 const bool Is32Bit =
Size == 32;
1124 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1125 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1126 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1132 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1134 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1149 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1166 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1201 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1207 if (!TrueCst && !FalseCst)
1210 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1211 if (TrueCst && FalseCst) {
1212 int64_t
T = TrueCst->Value.getSExtValue();
1213 int64_t
F = FalseCst->Value.getSExtValue();
1215 if (
T == 0 &&
F == 1) {
1217 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1223 if (
T == 0 &&
F == -1) {
1225 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1233 int64_t
T = TrueCst->Value.getSExtValue();
1236 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1245 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1254 int64_t
F = FalseCst->Value.getSExtValue();
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1264 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1272 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1273 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1394 assert(Reg.isValid() &&
"Expected valid register!");
1395 bool HasZext =
false;
1397 unsigned Opc =
MI->getOpcode();
1399 if (!
MI->getOperand(0).isReg() ||
1400 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1407 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1408 Opc == TargetOpcode::G_TRUNC) {
1409 if (Opc == TargetOpcode::G_ZEXT)
1412 Register NextReg =
MI->getOperand(1).getReg();
1414 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1423 std::optional<uint64_t>
C;
1428 case TargetOpcode::G_AND:
1429 case TargetOpcode::G_XOR: {
1430 TestReg =
MI->getOperand(1).getReg();
1431 Register ConstantReg =
MI->getOperand(2).getReg();
1442 C = VRegAndVal->Value.getZExtValue();
1444 C = VRegAndVal->Value.getSExtValue();
1448 case TargetOpcode::G_ASHR:
1449 case TargetOpcode::G_LSHR:
1450 case TargetOpcode::G_SHL: {
1451 TestReg =
MI->getOperand(1).getReg();
1455 C = VRegAndVal->Value.getSExtValue();
1467 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1471 case TargetOpcode::G_AND:
1473 if ((*
C >> Bit) & 1)
1476 case TargetOpcode::G_SHL:
1479 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1484 case TargetOpcode::G_ASHR:
1489 if (Bit >= TestRegSize)
1490 Bit = TestRegSize - 1;
1492 case TargetOpcode::G_LSHR:
1494 if ((Bit + *
C) < TestRegSize) {
1499 case TargetOpcode::G_XOR:
1508 if ((*
C >> Bit) & 1)
1527 assert(ProduceNonFlagSettingCondBr &&
1528 "Cannot emit TB(N)Z with speculation tracking!");
1533 LLT Ty =
MRI.getType(TestReg);
1536 assert(Bit < 64 &&
"Bit is too large!");
1540 bool UseWReg =
Bit < 32;
1541 unsigned NecessarySize = UseWReg ? 32 : 64;
1542 if (
Size != NecessarySize)
1543 TestReg = moveScalarRegClass(
1544 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1547 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1548 {AArch64::TBZW, AArch64::TBNZW}};
1549 unsigned Opc = OpcTable[UseWReg][IsNegative];
1556bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1559 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1586 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1593 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1601 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1603 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1604 AArch64::GPRRegBankID &&
1605 "Expected GPRs only?");
1606 auto Ty =
MRI.getType(CompareReg);
1609 assert(Width <= 64 &&
"Expected width to be at most 64?");
1610 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1611 {AArch64::CBNZW, AArch64::CBNZX}};
1612 unsigned Opc = OpcTable[IsNegative][
Width == 64];
1613 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1618bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1621 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1633 I.eraseFromParent();
1637bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1640 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1646 if (!ProduceNonFlagSettingCondBr)
1665 if (VRegAndVal && !AndInst) {
1666 int64_t
C = VRegAndVal->Value.getSExtValue();
1672 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1673 I.eraseFromParent();
1681 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1682 I.eraseFromParent();
1690 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1691 I.eraseFromParent();
1705 if (VRegAndVal && VRegAndVal->Value == 0) {
1713 tryOptAndIntoCompareBranch(
1715 I.eraseFromParent();
1720 auto LHSTy =
MRI.getType(LHS);
1721 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1723 I.eraseFromParent();
1732bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1735 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1736 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1746 I.eraseFromParent();
1750bool AArch64InstructionSelector::selectCompareBranch(
1752 Register CondReg =
I.getOperand(0).getReg();
1757 if (CCMIOpc == TargetOpcode::G_FCMP)
1758 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1759 if (CCMIOpc == TargetOpcode::G_ICMP)
1760 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1765 if (ProduceNonFlagSettingCondBr) {
1766 emitTestBit(CondReg, 0,
true,
1767 I.getOperand(1).getMBB(), MIB);
1768 I.eraseFromParent();
1778 .
addMBB(
I.getOperand(1).getMBB());
1779 I.eraseFromParent();
1787 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1798 return std::nullopt;
1800 int64_t Imm = *ShiftImm;
1802 return std::nullopt;
1806 return std::nullopt;
1809 return std::nullopt;
1813 return std::nullopt;
1817 return std::nullopt;
1821 return std::nullopt;
1827bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1829 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1830 Register DstReg =
I.getOperand(0).getReg();
1831 const LLT Ty =
MRI.getType(DstReg);
1832 Register Src1Reg =
I.getOperand(1).getReg();
1833 Register Src2Reg =
I.getOperand(2).getReg();
1844 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1846 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1848 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1850 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1852 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1854 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1856 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1862 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1868 I.eraseFromParent();
1872bool AArch64InstructionSelector::selectVectorAshrLshr(
1874 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1875 I.getOpcode() == TargetOpcode::G_LSHR);
1876 Register DstReg =
I.getOperand(0).getReg();
1877 const LLT Ty =
MRI.getType(DstReg);
1878 Register Src1Reg =
I.getOperand(1).getReg();
1879 Register Src2Reg =
I.getOperand(2).getReg();
1884 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1894 unsigned NegOpc = 0;
1896 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1898 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1899 NegOpc = AArch64::NEGv2i64;
1901 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1902 NegOpc = AArch64::NEGv4i32;
1904 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1905 NegOpc = AArch64::NEGv2i32;
1907 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1908 NegOpc = AArch64::NEGv4i16;
1910 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1911 NegOpc = AArch64::NEGv8i16;
1913 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1914 NegOpc = AArch64::NEGv16i8;
1916 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1917 NegOpc = AArch64::NEGv8i8;
1923 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1925 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1927 I.eraseFromParent();
1931bool AArch64InstructionSelector::selectVaStartAAPCS(
1936bool AArch64InstructionSelector::selectVaStartDarwin(
1939 Register ListReg =
I.getOperand(0).getReg();
1941 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1952 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
1960 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
1967 I.eraseFromParent();
1971void AArch64InstructionSelector::materializeLargeCMVal(
1977 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1988 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1990 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
1992 GV, MovZ->getOperand(1).getOffset(), Flags));
1996 MovZ->getOperand(1).getOffset(), Flags));
2002 Register DstReg = BuildMovK(MovZ.getReg(0),
2008bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2013 switch (
I.getOpcode()) {
2014 case TargetOpcode::G_STORE: {
2015 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2023 SrcOp.setReg(NewSrc);
2024 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2029 case TargetOpcode::G_PTR_ADD:
2030 return convertPtrAddToAdd(
I,
MRI);
2031 case TargetOpcode::G_LOAD: {
2036 Register DstReg =
I.getOperand(0).getReg();
2037 const LLT DstTy =
MRI.getType(DstReg);
2043 case AArch64::G_DUP: {
2045 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2049 MRI.setType(
I.getOperand(0).getReg(),
2051 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2052 I.getOperand(1).setReg(NewSrc.getReg(0));
2055 case TargetOpcode::G_UITOFP:
2056 case TargetOpcode::G_SITOFP: {
2061 Register SrcReg =
I.getOperand(1).getReg();
2062 LLT SrcTy =
MRI.getType(SrcReg);
2063 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2067 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2068 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2069 I.setDesc(
TII.get(AArch64::G_SITOF));
2071 I.setDesc(
TII.get(AArch64::G_UITOF));
2089bool AArch64InstructionSelector::convertPtrAddToAdd(
2091 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2092 Register DstReg =
I.getOperand(0).getReg();
2093 Register AddOp1Reg =
I.getOperand(1).getReg();
2094 const LLT PtrTy =
MRI.getType(DstReg);
2098 const LLT CastPtrTy =
2103 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2105 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2109 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2110 MRI.setType(DstReg, CastPtrTy);
2111 I.getOperand(1).setReg(PtrToInt.getReg(0));
2112 if (!select(*PtrToInt)) {
2113 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2122 I.getOperand(2).setReg(NegatedReg);
2123 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2127bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2132 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2133 const auto &MO =
I.getOperand(2);
2138 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2142 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2143 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2145 if (!Imm1Fn || !Imm2Fn)
2149 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2152 for (
auto &RenderFn : *Imm1Fn)
2154 for (
auto &RenderFn : *Imm2Fn)
2157 I.eraseFromParent();
2161bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2163 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2181 LLT DefDstTy =
MRI.getType(DefDstReg);
2182 Register StoreSrcReg =
I.getOperand(0).getReg();
2183 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2194 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2195 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2199 I.getOperand(0).setReg(DefDstReg);
2203bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2204 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2205 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2211 switch (
I.getOpcode()) {
2212 case AArch64::G_DUP: {
2215 Register Src =
I.getOperand(1).getReg();
2220 Register Dst =
I.getOperand(0).getReg();
2222 MRI.getType(Dst).getNumElements(),
2224 ValAndVReg->Value));
2225 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2227 I.eraseFromParent();
2230 case TargetOpcode::G_SEXT:
2233 if (selectUSMovFromExtend(
I,
MRI))
2236 case TargetOpcode::G_BR:
2238 case TargetOpcode::G_SHL:
2239 return earlySelectSHL(
I,
MRI);
2240 case TargetOpcode::G_CONSTANT: {
2241 bool IsZero =
false;
2242 if (
I.getOperand(1).isCImm())
2243 IsZero =
I.getOperand(1).getCImm()->getZExtValue() == 0;
2244 else if (
I.getOperand(1).isImm())
2245 IsZero =
I.getOperand(1).getImm() == 0;
2250 Register DefReg =
I.getOperand(0).getReg();
2251 LLT Ty =
MRI.getType(DefReg);
2253 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2254 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2256 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2257 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2261 I.setDesc(
TII.get(TargetOpcode::COPY));
2265 case TargetOpcode::G_ADD: {
2274 Register AddDst =
I.getOperand(0).getReg();
2275 Register AddLHS =
I.getOperand(1).getReg();
2276 Register AddRHS =
I.getOperand(2).getReg();
2278 LLT Ty =
MRI.getType(AddLHS);
2287 if (!
MRI.hasOneNonDBGUse(Reg))
2301 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2311 Cmp = MatchCmp(AddRHS);
2315 auto &PredOp =
Cmp->getOperand(1);
2320 emitIntegerCompare(
Cmp->getOperand(2),
2321 Cmp->getOperand(3), PredOp, MIB);
2322 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2323 I.eraseFromParent();
2326 case TargetOpcode::G_OR: {
2330 Register Dst =
I.getOperand(0).getReg();
2331 LLT Ty =
MRI.getType(Dst);
2350 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2353 int64_t Immr =
Size - ShiftImm;
2354 int64_t Imms =
Size - ShiftImm - 1;
2355 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2356 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2357 I.eraseFromParent();
2360 case TargetOpcode::G_FENCE: {
2361 if (
I.getOperand(1).getImm() == 0)
2365 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2366 I.eraseFromParent();
2375 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2376 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2383 if (Subtarget->requiresStrictAlign()) {
2385 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2391 unsigned Opcode =
I.getOpcode();
2393 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2396 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2399 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2400 const Register DefReg =
I.getOperand(0).getReg();
2401 const LLT DefTy =
MRI.getType(DefReg);
2404 MRI.getRegClassOrRegBank(DefReg);
2414 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2421 I.setDesc(
TII.get(TargetOpcode::PHI));
2423 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2429 if (
I.isDebugInstr())
2436 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2438 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2445 if (preISelLower(
I)) {
2446 Opcode =
I.getOpcode();
2457 if (selectImpl(
I, *CoverageInfo))
2461 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2464 case TargetOpcode::G_SBFX:
2465 case TargetOpcode::G_UBFX: {
2466 static const unsigned OpcTable[2][2] = {
2467 {AArch64::UBFMWri, AArch64::UBFMXri},
2468 {AArch64::SBFMWri, AArch64::SBFMXri}};
2469 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2471 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2474 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2477 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2478 auto LSB = Cst1->Value.getZExtValue();
2479 auto Width = Cst2->Value.getZExtValue();
2481 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2483 .
addImm(LSB + Width - 1);
2484 I.eraseFromParent();
2487 case TargetOpcode::G_BRCOND:
2488 return selectCompareBranch(
I, MF,
MRI);
2490 case TargetOpcode::G_BRINDIRECT: {
2491 I.setDesc(
TII.get(AArch64::BR));
2495 case TargetOpcode::G_BRJT:
2496 return selectBrJT(
I,
MRI);
2498 case AArch64::G_ADD_LOW: {
2504 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2505 I.setDesc(
TII.get(AArch64::ADDXri));
2510 "Expected small code model");
2512 auto Op2 =
I.getOperand(2);
2513 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2514 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2515 Op1.getTargetFlags())
2517 Op2.getTargetFlags());
2518 I.eraseFromParent();
2522 case TargetOpcode::G_BSWAP: {
2524 Register DstReg =
I.getOperand(0).getReg();
2525 LLT DstTy =
MRI.getType(DstReg);
2530 LLVM_DEBUG(
dbgs() <<
"Dst type for G_BSWAP currently unsupported.\n");
2537 if (NumElts != 4 && NumElts != 2) {
2538 LLVM_DEBUG(
dbgs() <<
"Unsupported number of elements for G_BSWAP.\n");
2548 : AArch64::REV32v16i8;
2549 else if (EltSize == 64)
2550 Opc = AArch64::REV64v16i8;
2553 assert(Opc != 0 &&
"Didn't get an opcode for G_BSWAP?");
2555 I.setDesc(
TII.get(Opc));
2559 case TargetOpcode::G_FCONSTANT:
2560 case TargetOpcode::G_CONSTANT: {
2561 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2570 const Register DefReg =
I.getOperand(0).getReg();
2571 const LLT DefTy =
MRI.getType(DefReg);
2577 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2579 <<
" constant, expected: " << s16 <<
" or " << s32
2580 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2584 if (RB.
getID() != AArch64::FPRRegBankID) {
2586 <<
" constant on bank: " << RB
2587 <<
", expected: FPR\n");
2595 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2599 if (Ty != p0 && Ty != s8 && Ty != s16) {
2601 <<
" constant, expected: " << s32 <<
", " << s64
2602 <<
", or " << p0 <<
'\n');
2606 if (RB.
getID() != AArch64::GPRRegBankID) {
2608 <<
" constant on bank: " << RB
2609 <<
", expected: GPR\n");
2628 auto *FPImm =
I.getOperand(1).getFPImm();
2629 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2631 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2635 I.eraseFromParent();
2636 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2642 "Expected constant pool loads for all sizes other than 32!");
2644 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2650 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2651 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2659 }
else if (
I.getOperand(1).isCImm()) {
2660 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2661 I.getOperand(1).ChangeToImmediate(Val);
2662 }
else if (
I.getOperand(1).isImm()) {
2663 uint64_t Val =
I.getOperand(1).getImm();
2664 I.getOperand(1).ChangeToImmediate(Val);
2667 const unsigned MovOpc =
2668 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2669 I.setDesc(
TII.get(MovOpc));
2673 case TargetOpcode::G_EXTRACT: {
2674 Register DstReg =
I.getOperand(0).getReg();
2675 Register SrcReg =
I.getOperand(1).getReg();
2676 LLT SrcTy =
MRI.getType(SrcReg);
2677 LLT DstTy =
MRI.getType(DstReg);
2689 unsigned Offset =
I.getOperand(2).getImm();
2698 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2700 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2702 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2704 AArch64::GPR64RegClass, NewI->getOperand(0));
2705 I.eraseFromParent();
2711 unsigned LaneIdx =
Offset / 64;
2713 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2716 I.eraseFromParent();
2720 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2726 "unexpected G_EXTRACT types");
2733 .addReg(DstReg, 0, AArch64::sub_32);
2734 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2735 AArch64::GPR32RegClass,
MRI);
2736 I.getOperand(0).setReg(DstReg);
2741 case TargetOpcode::G_INSERT: {
2742 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2743 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2750 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2751 unsigned LSB =
I.getOperand(3).getImm();
2752 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2753 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2758 "unexpected G_INSERT types");
2764 TII.get(AArch64::SUBREG_TO_REG))
2767 .
addUse(
I.getOperand(2).getReg())
2768 .
addImm(AArch64::sub_32);
2769 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2770 AArch64::GPR32RegClass,
MRI);
2771 I.getOperand(2).setReg(SrcReg);
2775 case TargetOpcode::G_FRAME_INDEX: {
2782 I.setDesc(
TII.get(AArch64::ADDXri));
2791 case TargetOpcode::G_GLOBAL_VALUE: {
2792 auto GV =
I.getOperand(1).getGlobal();
2793 if (GV->isThreadLocal())
2794 return selectTLSGlobalValue(
I,
MRI);
2796 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2798 I.setDesc(
TII.get(AArch64::LOADgot));
2799 I.getOperand(1).setTargetFlags(OpFlags);
2802 materializeLargeCMVal(
I, GV, OpFlags);
2803 I.eraseFromParent();
2806 I.setDesc(
TII.get(AArch64::ADR));
2807 I.getOperand(1).setTargetFlags(OpFlags);
2809 I.setDesc(
TII.get(AArch64::MOVaddr));
2812 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2818 case TargetOpcode::G_ZEXTLOAD:
2819 case TargetOpcode::G_LOAD:
2820 case TargetOpcode::G_STORE: {
2822 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2836 if (Order != AtomicOrdering::NotAtomic &&
2837 Order != AtomicOrdering::Unordered &&
2838 Order != AtomicOrdering::Monotonic) {
2839 assert(!isa<GZExtLoad>(LdSt));
2840 if (MemSizeInBytes > 64)
2843 if (isa<GLoad>(LdSt)) {
2844 static constexpr unsigned LDAPROpcodes[] = {
2845 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2846 static constexpr unsigned LDAROpcodes[] = {
2847 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2849 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2852 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2854 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2855 AArch64::STLRW, AArch64::STLRX};
2857 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2859 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2860 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2861 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2862 I.getOperand(0).setReg(NewVal);
2864 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2875 "Load/Store pointer operand isn't a GPR");
2876 assert(
MRI.getType(PtrReg).isPointer() &&
2877 "Load/Store pointer operand isn't a pointer");
2881 const LLT ValTy =
MRI.getType(ValReg);
2886 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2889 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2895 .addReg(ValReg, 0,
SubReg)
2897 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2899 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2902 if (RB.
getID() == AArch64::FPRRegBankID) {
2905 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2912 MRI.setRegBank(NewDst, RB);
2915 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2919 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2920 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2927 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2928 bool IsStore = isa<GStore>(
I);
2929 const unsigned NewOpc =
2931 if (NewOpc ==
I.getOpcode())
2935 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2938 I.setDesc(
TII.get(NewOpc));
2944 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2945 Register CurValReg =
I.getOperand(0).getReg();
2946 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2947 NewInst.cloneMemRefs(
I);
2948 for (
auto &Fn : *AddrModeFns)
2950 I.eraseFromParent();
2959 if (Opcode == TargetOpcode::G_STORE) {
2962 if (CVal && CVal->Value == 0) {
2964 case AArch64::STRWui:
2965 case AArch64::STRHHui:
2966 case AArch64::STRBBui:
2967 LoadStore->getOperand(0).setReg(AArch64::WZR);
2969 case AArch64::STRXui:
2970 LoadStore->getOperand(0).setReg(AArch64::XZR);
2979 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2983 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2988 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2991 .
addImm(AArch64::sub_32);
2993 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2999 case TargetOpcode::G_SMULH:
3000 case TargetOpcode::G_UMULH: {
3005 const Register DefReg =
I.getOperand(0).getReg();
3008 if (RB.
getID() != AArch64::GPRRegBankID) {
3009 LLVM_DEBUG(
dbgs() <<
"G_[SU]MULH on bank: " << RB <<
", expected: GPR\n");
3019 unsigned NewOpc =
I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
3021 I.setDesc(
TII.get(NewOpc));
3027 case TargetOpcode::G_LSHR:
3028 case TargetOpcode::G_ASHR:
3029 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3030 return selectVectorAshrLshr(
I,
MRI);
3032 case TargetOpcode::G_SHL:
3033 if (Opcode == TargetOpcode::G_SHL &&
3034 MRI.getType(
I.getOperand(0).getReg()).isVector())
3035 return selectVectorSHL(
I,
MRI);
3042 Register SrcReg =
I.getOperand(1).getReg();
3043 Register ShiftReg =
I.getOperand(2).getReg();
3044 const LLT ShiftTy =
MRI.getType(ShiftReg);
3045 const LLT SrcTy =
MRI.getType(SrcReg);
3050 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3051 .addReg(ShiftReg, 0, AArch64::sub_32);
3052 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3053 I.getOperand(2).setReg(Trunc.getReg(0));
3057 case TargetOpcode::G_OR: {
3064 const Register DefReg =
I.getOperand(0).getReg();
3068 if (NewOpc ==
I.getOpcode())
3071 I.setDesc(
TII.get(NewOpc));
3079 case TargetOpcode::G_PTR_ADD: {
3080 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3081 I.eraseFromParent();
3084 case TargetOpcode::G_SADDO:
3085 case TargetOpcode::G_UADDO:
3086 case TargetOpcode::G_SSUBO:
3087 case TargetOpcode::G_USUBO: {
3089 auto OpAndCC = emitOverflowOp(Opcode,
I.getOperand(0).getReg(),
3090 I.getOperand(2),
I.getOperand(3), MIB);
3097 emitCSINC(
I.getOperand(1).getReg(), ZReg, ZReg,
3098 getInvertedCondCode(OpAndCC.second), MIB);
3099 I.eraseFromParent();
3103 case TargetOpcode::G_PTRMASK: {
3104 Register MaskReg =
I.getOperand(2).getReg();
3111 I.setDesc(
TII.get(AArch64::ANDXri));
3112 I.getOperand(2).ChangeToImmediate(
3117 case TargetOpcode::G_PTRTOINT:
3118 case TargetOpcode::G_TRUNC: {
3119 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3120 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3122 const Register DstReg =
I.getOperand(0).getReg();
3123 const Register SrcReg =
I.getOperand(1).getReg();
3130 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3134 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3143 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3144 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3145 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3149 if (DstRC == SrcRC) {
3151 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3155 }
else if (DstRC == &AArch64::GPR32RegClass &&
3156 SrcRC == &AArch64::GPR64RegClass) {
3157 I.getOperand(1).setSubReg(AArch64::sub_32);
3160 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3164 I.setDesc(
TII.get(TargetOpcode::COPY));
3166 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3169 I.setDesc(
TII.get(AArch64::XTNv4i16));
3179 I.eraseFromParent();
3184 if (Opcode == TargetOpcode::G_PTRTOINT) {
3185 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3186 I.setDesc(
TII.get(TargetOpcode::COPY));
3194 case TargetOpcode::G_ANYEXT: {
3195 if (selectUSMovFromExtend(
I,
MRI))
3198 const Register DstReg =
I.getOperand(0).getReg();
3199 const Register SrcReg =
I.getOperand(1).getReg();
3202 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3204 <<
", expected: GPR\n");
3209 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3211 <<
", expected: GPR\n");
3215 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3218 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3222 if (DstSize != 64 && DstSize > 32) {
3224 <<
", expected: 32 or 64\n");
3230 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3235 .
addImm(AArch64::sub_32);
3236 I.getOperand(1).setReg(ExtSrc);
3241 case TargetOpcode::G_ZEXT:
3242 case TargetOpcode::G_SEXT_INREG:
3243 case TargetOpcode::G_SEXT: {
3244 if (selectUSMovFromExtend(
I,
MRI))
3247 unsigned Opcode =
I.getOpcode();
3248 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3249 const Register DefReg =
I.getOperand(0).getReg();
3250 Register SrcReg =
I.getOperand(1).getReg();
3251 const LLT DstTy =
MRI.getType(DefReg);
3252 const LLT SrcTy =
MRI.getType(SrcReg);
3258 if (Opcode == TargetOpcode::G_SEXT_INREG)
3259 SrcSize =
I.getOperand(2).getImm();
3265 AArch64::GPRRegBankID &&
3266 "Unexpected ext regbank");
3279 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3280 if (LoadMI && IsGPR) {
3282 unsigned BytesLoaded =
MemOp->getSize();
3289 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3291 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3292 const Register ZReg = AArch64::WZR;
3293 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3296 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3299 .
addImm(AArch64::sub_32);
3301 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3303 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3307 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3313 I.eraseFromParent();
3318 if (DstSize == 64) {
3319 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3321 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3327 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3328 {&AArch64::GPR64RegClass}, {})
3335 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3339 }
else if (DstSize <= 32) {
3340 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3349 I.eraseFromParent();
3353 case TargetOpcode::G_SITOFP:
3354 case TargetOpcode::G_UITOFP:
3355 case TargetOpcode::G_FPTOSI:
3356 case TargetOpcode::G_FPTOUI: {
3357 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3358 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3360 if (NewOpc == Opcode)
3363 I.setDesc(
TII.get(NewOpc));
3370 case TargetOpcode::G_FREEZE:
3373 case TargetOpcode::G_INTTOPTR:
3378 case TargetOpcode::G_BITCAST:
3386 case TargetOpcode::G_SELECT: {
3387 auto &Sel = cast<GSelect>(
I);
3388 const Register CondReg = Sel.getCondReg();
3389 const Register TReg = Sel.getTrueReg();
3390 const Register FReg = Sel.getFalseReg();
3392 if (tryOptSelect(Sel))
3397 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3398 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3401 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3403 Sel.eraseFromParent();
3406 case TargetOpcode::G_ICMP: {
3408 return selectVectorICmp(
I,
MRI);
3419 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3420 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3421 AArch64::WZR, InvCC, MIB);
3422 I.eraseFromParent();
3426 case TargetOpcode::G_FCMP: {
3429 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3431 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3433 I.eraseFromParent();
3436 case TargetOpcode::G_VASTART:
3437 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3438 : selectVaStartAAPCS(
I, MF,
MRI);
3439 case TargetOpcode::G_INTRINSIC:
3440 return selectIntrinsic(
I,
MRI);
3441 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3442 return selectIntrinsicWithSideEffects(
I,
MRI);
3443 case TargetOpcode::G_IMPLICIT_DEF: {
3444 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3445 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3446 const Register DstReg =
I.getOperand(0).getReg();
3449 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3452 case TargetOpcode::G_BLOCK_ADDR: {
3454 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3455 I.eraseFromParent();
3458 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3459 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3460 I.getOperand(0).getReg())
3464 I.getOperand(1).getBlockAddress(), 0,
3466 I.eraseFromParent();
3470 case AArch64::G_DUP: {
3476 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3477 AArch64::GPRRegBankID)
3479 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3481 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3483 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3485 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3487 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3492 case TargetOpcode::G_INTRINSIC_TRUNC:
3493 return selectIntrinsicTrunc(
I,
MRI);
3494 case TargetOpcode::G_INTRINSIC_ROUND:
3495 return selectIntrinsicRound(
I,
MRI);
3496 case TargetOpcode::G_BUILD_VECTOR:
3497 return selectBuildVector(
I,
MRI);
3498 case TargetOpcode::G_MERGE_VALUES:
3500 case TargetOpcode::G_UNMERGE_VALUES:
3502 case TargetOpcode::G_SHUFFLE_VECTOR:
3503 return selectShuffleVector(
I,
MRI);
3504 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3505 return selectExtractElt(
I,
MRI);
3506 case TargetOpcode::G_INSERT_VECTOR_ELT:
3507 return selectInsertElt(
I,
MRI);
3508 case TargetOpcode::G_CONCAT_VECTORS:
3509 return selectConcatVectors(
I,
MRI);
3510 case TargetOpcode::G_JUMP_TABLE:
3511 return selectJumpTable(
I,
MRI);
3512 case TargetOpcode::G_VECREDUCE_FADD:
3513 case TargetOpcode::G_VECREDUCE_ADD:
3514 return selectReduction(
I,
MRI);
3515 case TargetOpcode::G_MEMCPY:
3516 case TargetOpcode::G_MEMCPY_INLINE:
3517 case TargetOpcode::G_MEMMOVE:
3518 case TargetOpcode::G_MEMSET:
3519 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3520 return selectMOPS(
I,
MRI);
3526bool AArch64InstructionSelector::selectReduction(
MachineInstr &
I,
3528 Register VecReg =
I.getOperand(1).getReg();
3529 LLT VecTy =
MRI.getType(VecReg);
3530 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3534 Register DstReg =
I.getOperand(0).getReg();
3535 auto AddP = MIB.
buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3538 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3540 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass,
MRI);
3541 I.eraseFromParent();
3547 Opc = AArch64::ADDVv16i8v;
3549 Opc = AArch64::ADDVv8i16v;
3551 Opc = AArch64::ADDVv4i32v;
3553 Opc = AArch64::ADDPv2i64p;
3558 I.setDesc(
TII.get(Opc));
3562 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3565 Opc = AArch64::FADDPv2i32p;
3567 Opc = AArch64::FADDPv2i64p;
3572 I.setDesc(
TII.get(Opc));
3578bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3582 case TargetOpcode::G_MEMCPY:
3583 case TargetOpcode::G_MEMCPY_INLINE:
3584 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3586 case TargetOpcode::G_MEMMOVE:
3587 Mopcode = AArch64::MOPSMemoryMovePseudo;
3589 case TargetOpcode::G_MEMSET:
3591 Mopcode = AArch64::MOPSMemorySetPseudo;
3600 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3601 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3604 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3605 const auto &SrcValRegClass =
3606 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3609 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3610 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3611 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3621 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3622 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3624 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3625 {DstPtrCopy, SizeCopy, SrcValCopy});
3627 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3628 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3629 {DstPtrCopy, SrcValCopy, SizeCopy});
3638 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3639 Register JTAddr =
I.getOperand(0).getReg();
3640 unsigned JTI =
I.getOperand(1).getIndex();
3643 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3644 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3647 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3648 {TargetReg, ScratchReg}, {JTAddr,
Index})
3649 .addJumpTableIndex(JTI);
3651 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3652 I.eraseFromParent();
3656bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3658 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3659 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3661 Register DstReg =
I.getOperand(0).getReg();
3662 unsigned JTI =
I.getOperand(1).getIndex();
3665 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3668 I.eraseFromParent();
3672bool AArch64InstructionSelector::selectTLSGlobalValue(
3674 if (!STI.isTargetMachO())
3679 const auto &GlobalOp =
I.getOperand(1);
3680 assert(GlobalOp.getOffset() == 0 &&
3681 "Shouldn't have an offset on TLS globals!");
3685 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3688 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3689 {LoadGOT.getReg(0)})
3702 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3704 I.eraseFromParent();
3708bool AArch64InstructionSelector::selectIntrinsicTrunc(
3710 const LLT SrcTy =
MRI.getType(
I.getOperand(0).getReg());
3718 Opc = AArch64::FRINTZHr;
3721 Opc = AArch64::FRINTZSr;
3724 Opc = AArch64::FRINTZDr;
3734 Opc = AArch64::FRINTZv4f16;
3735 else if (NumElts == 8)
3736 Opc = AArch64::FRINTZv8f16;
3740 Opc = AArch64::FRINTZv2f32;
3741 else if (NumElts == 4)
3742 Opc = AArch64::FRINTZv4f32;
3746 Opc = AArch64::FRINTZv2f64;
3753 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_TRUNC!\n");
3759 I.setDesc(
TII.get(Opc));
3763bool AArch64InstructionSelector::selectIntrinsicRound(
3765 const LLT SrcTy =
MRI.getType(
I.getOperand(0).getReg());
3773 Opc = AArch64::FRINTAHr;
3776 Opc = AArch64::FRINTASr;
3779 Opc = AArch64::FRINTADr;
3789 Opc = AArch64::FRINTAv4f16;
3790 else if (NumElts == 8)
3791 Opc = AArch64::FRINTAv8f16;
3795 Opc = AArch64::FRINTAv2f32;
3796 else if (NumElts == 4)
3797 Opc = AArch64::FRINTAv4f32;
3801 Opc = AArch64::FRINTAv2f64;
3808 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_ROUND!\n");
3814 I.setDesc(
TII.get(Opc));
3818bool AArch64InstructionSelector::selectVectorICmp(
3820 Register DstReg =
I.getOperand(0).getReg();
3821 LLT DstTy =
MRI.getType(DstReg);
3822 Register SrcReg =
I.getOperand(2).getReg();
3823 Register Src2Reg =
I.getOperand(3).getReg();
3824 LLT SrcTy =
MRI.getType(SrcReg);
3849 unsigned PredIdx = 0;
3850 bool SwapOperands =
false;
3865 SwapOperands =
true;
3869 SwapOperands =
true;
3879 SwapOperands =
true;
3883 SwapOperands =
true;
3893 static const unsigned OpcTable[4][4][9] = {
3901 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3902 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3903 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3904 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3905 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3906 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3912 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3913 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3914 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3915 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3916 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3917 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3923 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3924 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3925 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3926 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3927 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3928 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3937 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3938 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3939 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3951 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3952 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3953 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3961 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3963 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3967 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3969 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3984 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3985 I.eraseFromParent();
3989MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3992 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3994 auto BuildFn = [&](
unsigned SubregIndex) {
3998 .addImm(SubregIndex);
4006 return BuildFn(AArch64::bsub);
4008 return BuildFn(AArch64::hsub);
4010 return BuildFn(AArch64::ssub);
4012 return BuildFn(AArch64::dsub);
4018bool AArch64InstructionSelector::selectMergeValues(
4020 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
4021 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
4022 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
4026 if (
I.getNumOperands() != 3)
4033 Register DstReg =
I.getOperand(0).getReg();
4034 Register Src1Reg =
I.getOperand(1).getReg();
4035 Register Src2Reg =
I.getOperand(2).getReg();
4036 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
4037 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
4042 Src2Reg, 1, RB, MIB);
4047 I.eraseFromParent();
4051 if (RB.
getID() != AArch64::GPRRegBankID)
4057 auto *DstRC = &AArch64::GPR64RegClass;
4058 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
4060 TII.get(TargetOpcode::SUBREG_TO_REG))
4063 .
addUse(
I.getOperand(1).getReg())
4064 .
addImm(AArch64::sub_32);
4065 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
4068 TII.get(TargetOpcode::SUBREG_TO_REG))
4071 .
addUse(
I.getOperand(2).getReg())
4072 .
addImm(AArch64::sub_32);
4074 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
4075 .
addDef(
I.getOperand(0).getReg())
4083 I.eraseFromParent();
4088 const unsigned EltSize) {
4093 CopyOpc = AArch64::DUPi8;
4094 ExtractSubReg = AArch64::bsub;
4097 CopyOpc = AArch64::DUPi16;
4098 ExtractSubReg = AArch64::hsub;
4101 CopyOpc = AArch64::DUPi32;
4102 ExtractSubReg = AArch64::ssub;
4105 CopyOpc = AArch64::DUPi64;
4106 ExtractSubReg = AArch64::dsub;
4110 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
4116MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4117 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
4120 unsigned CopyOpc = 0;
4121 unsigned ExtractSubReg = 0;
4124 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
4129 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
4131 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
4136 const LLT &VecTy =
MRI.getType(VecReg);
4138 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4140 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4147 DstReg =
MRI.createVirtualRegister(DstRC);
4150 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4151 .addReg(VecReg, 0, ExtractSubReg);
4152 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4161 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4162 if (!ScalarToVector)
4168 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4172 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4176bool AArch64InstructionSelector::selectExtractElt(
4178 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4179 "unexpected opcode!");
4180 Register DstReg =
I.getOperand(0).getReg();
4181 const LLT NarrowTy =
MRI.getType(DstReg);
4182 const Register SrcReg =
I.getOperand(1).getReg();
4183 const LLT WideTy =
MRI.getType(SrcReg);
4186 "source register size too small!");
4187 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4191 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4193 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4202 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4206 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4211 I.eraseFromParent();
4215bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4217 unsigned NumElts =
I.getNumOperands() - 1;
4218 Register SrcReg =
I.getOperand(NumElts).getReg();
4219 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4220 const LLT SrcTy =
MRI.getType(SrcReg);
4222 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4224 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4231 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4232 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4233 Register Dst =
I.getOperand(OpIdx).getReg();
4235 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4239 I.eraseFromParent();
4243bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4245 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4246 "unexpected opcode");
4249 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4250 AArch64::FPRRegBankID ||
4251 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4252 AArch64::FPRRegBankID) {
4253 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4254 "currently unsupported.\n");
4260 unsigned NumElts =
I.getNumOperands() - 1;
4261 Register SrcReg =
I.getOperand(NumElts).getReg();
4262 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4263 const LLT WideTy =
MRI.getType(SrcReg);
4266 "can only unmerge from vector or s128 types!");
4268 "source register size too small!");
4271 return selectSplitVectorUnmerge(
I,
MRI);
4275 unsigned CopyOpc = 0;
4276 unsigned ExtractSubReg = 0;
4287 unsigned NumInsertRegs = NumElts - 1;
4299 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4303 assert(Found &&
"expected to find last operand's subeg idx");
4304 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4305 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4307 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4311 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4314 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4331 Register CopyTo =
I.getOperand(0).getReg();
4332 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4333 .addReg(InsertRegs[0], 0, ExtractSubReg);
4337 unsigned LaneIdx = 1;
4338 for (
Register InsReg : InsertRegs) {
4339 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4352 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4358 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4359 I.eraseFromParent();
4363bool AArch64InstructionSelector::selectConcatVectors(
4365 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4366 "Unexpected opcode");
4367 Register Dst =
I.getOperand(0).getReg();
4368 Register Op1 =
I.getOperand(1).getReg();
4369 Register Op2 =
I.getOperand(2).getReg();
4370 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4373 I.eraseFromParent();
4378AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4387MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4395 RC = &AArch64::FPR128RegClass;
4396 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4399 RC = &AArch64::FPR64RegClass;
4400 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4403 RC = &AArch64::FPR32RegClass;
4404 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4407 RC = &AArch64::FPR16RegClass;
4408 Opc = AArch64::LDRHui;
4411 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4417 auto &MF = MIRBuilder.
getMF();
4418 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4419 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4421 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4424 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4427 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4428 .addConstantPoolIndex(
4444static std::pair<unsigned, unsigned>
4446 unsigned Opc, SubregIdx;
4447 if (RB.
getID() == AArch64::GPRRegBankID) {
4448 if (EltSize == 16) {
4449 Opc = AArch64::INSvi16gpr;
4450 SubregIdx = AArch64::ssub;
4451 }
else if (EltSize == 32) {
4452 Opc = AArch64::INSvi32gpr;
4453 SubregIdx = AArch64::ssub;
4454 }
else if (EltSize == 64) {
4455 Opc = AArch64::INSvi64gpr;
4456 SubregIdx = AArch64::dsub;
4462 Opc = AArch64::INSvi8lane;
4463 SubregIdx = AArch64::bsub;
4464 }
else if (EltSize == 16) {
4465 Opc = AArch64::INSvi16lane;
4466 SubregIdx = AArch64::hsub;
4467 }
else if (EltSize == 32) {
4468 Opc = AArch64::INSvi32lane;
4469 SubregIdx = AArch64::ssub;
4470 }
else if (EltSize == 64) {
4471 Opc = AArch64::INSvi64lane;
4472 SubregIdx = AArch64::dsub;
4477 return std::make_pair(Opc, SubregIdx);
4481 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4483 const ComplexRendererFns &RenderFns)
const {
4484 assert(Opcode &&
"Expected an opcode?");
4486 "Function should only be used to produce selected instructions!");
4487 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4489 for (
auto &Fn : *RenderFns)
4496 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4500 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4501 auto Ty =
MRI.getType(
LHS.getReg());
4504 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4505 bool Is32Bit =
Size == 32;
4508 if (
auto Fns = selectArithImmed(RHS))
4509 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4513 if (
auto Fns = selectNegArithImmed(RHS))
4514 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4518 if (
auto Fns = selectArithExtendedRegister(RHS))
4519 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4523 if (
auto Fns = selectShiftedRegister(RHS))
4524 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4526 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4534 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4535 {{AArch64::ADDXri, AArch64::ADDWri},
4536 {AArch64::ADDXrs, AArch64::ADDWrs},
4537 {AArch64::ADDXrr, AArch64::ADDWrr},
4538 {AArch64::SUBXri, AArch64::SUBWri},
4539 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4540 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4547 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4548 {{AArch64::ADDSXri, AArch64::ADDSWri},
4549 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4550 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4551 {AArch64::SUBSXri, AArch64::SUBSWri},
4552 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4553 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4560 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4561 {{AArch64::SUBSXri, AArch64::SUBSWri},
4562 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4563 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4564 {AArch64::ADDSXri, AArch64::ADDSWri},
4565 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4566 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4573 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4574 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4575 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4581 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4585 bool Is32Bit = (
RegSize == 32);
4586 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4587 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4588 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4592 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4595 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4602 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4603 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4604 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4607MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4610 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4617 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4619 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4621 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4622 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4625MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4629 LLT Ty =
MRI.getType(Dst);
4631 "Expected a 32-bit scalar register?");
4633 const Register ZReg = AArch64::WZR;
4638 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4644 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4645 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4646 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4651MachineInstr *AArch64InstructionSelector::emitFPCompare(
4653 std::optional<CmpInst::Predicate> Pred)
const {
4655 LLT Ty =
MRI.getType(LHS);
4659 if (OpSize != 32 && OpSize != 64)
4671 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4675 ShouldUseImm =
true;
4679 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4680 {AArch64::FCMPSri, AArch64::FCMPDri}};
4681 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4693MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4702 const LLT Op1Ty =
MRI.getType(Op1);
4703 const LLT Op2Ty =
MRI.getType(Op2);
4705 if (Op1Ty != Op2Ty) {
4706 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4709 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4712 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4728 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4730 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4731 if (!WidenedOp1 || !WidenedOp2) {
4732 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4737 unsigned InsertOpc, InsSubRegIdx;
4738 std::tie(InsertOpc, InsSubRegIdx) =
4742 Dst =
MRI.createVirtualRegister(DstRC);