44 #include "llvm/IR/IntrinsicsAArch64.h"
51 #define DEBUG_TYPE "aarch64-isel"
54 using namespace MIPatternMatch;
55 using namespace AArch64GISelUtils;
64 #define GET_GLOBALISEL_PREDICATE_BITSET
65 #include "AArch64GenGlobalISel.inc"
66 #undef GET_GLOBALISEL_PREDICATE_BITSET
86 ProduceNonFlagSettingCondBr =
129 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
187 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
202 unsigned emitConstantPoolEntry(
const Constant *CPVal,
224 std::initializer_list<llvm::DstOp> DstOps,
225 std::initializer_list<llvm::SrcOp> SrcOps,
227 const ComplexRendererFns &RenderFns =
None)
const;
262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
296 std::pair<MachineInstr *, AArch64CC::CondCode>
329 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
330 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
331 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
332 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
334 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
336 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
339 unsigned Size)
const;
341 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
342 return selectAddrModeUnscaled(Root, 1);
344 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
345 return selectAddrModeUnscaled(Root, 2);
347 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
348 return selectAddrModeUnscaled(Root, 4);
350 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
351 return selectAddrModeUnscaled(Root, 8);
353 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
354 return selectAddrModeUnscaled(Root, 16);
359 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
363 unsigned Size)
const;
365 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
366 return selectAddrModeIndexed(Root,
Width / 8);
373 unsigned SizeInBytes)
const;
381 bool WantsExt)
const;
382 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
384 unsigned SizeInBytes)
const;
386 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
387 return selectAddrModeXRO(Root,
Width / 8);
391 unsigned SizeInBytes)
const;
393 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
394 return selectAddrModeWRO(Root,
Width / 8);
398 bool AllowROR =
false)
const;
400 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
401 return selectShiftedRegister(Root);
404 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
405 return selectShiftedRegister(Root,
true);
415 bool IsLoadStore =
false)
const;
426 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
429 int OpIdx = -1)
const;
431 int OpIdx = -1)
const;
433 int OpIdx = -1)
const;
435 int OpIdx = -1)
const;
437 int OpIdx = -1)
const;
439 int OpIdx = -1)
const;
442 int OpIdx = -1)
const;
448 bool tryOptSelect(
GSelect &Sel);
455 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
468 bool ProduceNonFlagSettingCondBr =
false;
477 #define GET_GLOBALISEL_PREDICATES_DECL
478 #include "AArch64GenGlobalISel.inc"
479 #undef GET_GLOBALISEL_PREDICATES_DECL
483 #define GET_GLOBALISEL_TEMPORARIES_DECL
484 #include "AArch64GenGlobalISel.inc"
485 #undef GET_GLOBALISEL_TEMPORARIES_DECL
490 #define GET_GLOBALISEL_IMPL
491 #include "AArch64GenGlobalISel.inc"
492 #undef GET_GLOBALISEL_IMPL
494 AArch64InstructionSelector::AArch64InstructionSelector(
497 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
500 #
include "AArch64GenGlobalISel.inc"
503 #
include "AArch64GenGlobalISel.inc"
515 bool GetAllRegSet =
false) {
516 if (RB.
getID() == AArch64::GPRRegBankID) {
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
524 return &AArch64::XSeqPairsClassRegClass;
528 if (RB.
getID() == AArch64::FPRRegBankID) {
531 return &AArch64::FPR8RegClass;
533 return &AArch64::FPR16RegClass;
535 return &AArch64::FPR32RegClass;
537 return &AArch64::FPR64RegClass;
539 return &AArch64::FPR128RegClass;
551 bool GetAllRegSet =
false) {
552 unsigned RegBankID = RB.
getID();
554 if (RegBankID == AArch64::GPRRegBankID) {
555 if (SizeInBits <= 32)
556 return GetAllRegSet ? &AArch64::GPR32allRegClass
557 : &AArch64::GPR32RegClass;
558 if (SizeInBits == 64)
559 return GetAllRegSet ? &AArch64::GPR64allRegClass
560 : &AArch64::GPR64RegClass;
561 if (SizeInBits == 128)
562 return &AArch64::XSeqPairsClassRegClass;
565 if (RegBankID == AArch64::FPRRegBankID) {
566 switch (SizeInBits) {
570 return &AArch64::FPR8RegClass;
572 return &AArch64::FPR16RegClass;
574 return &AArch64::FPR32RegClass;
576 return &AArch64::FPR64RegClass;
578 return &AArch64::FPR128RegClass;
596 if (RC != &AArch64::FPR32RegClass)
606 dbgs() <<
"Couldn't find appropriate subregister for register class.");
615 switch (RB.
getID()) {
616 case AArch64::GPRRegBankID:
618 case AArch64::FPRRegBankID:
641 const unsigned RegClassIDs[],
643 unsigned NumRegs = Regs.
size();
646 assert(NumRegs >= 2 && NumRegs <= 4 &&
647 "Only support between two and 4 registers in a tuple!");
651 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
652 for (
unsigned I = 0,
E = Regs.
size();
I <
E; ++
I) {
661 static const unsigned RegClassIDs[] = {
662 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
663 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
664 AArch64::dsub2, AArch64::dsub3};
665 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
670 static const unsigned RegClassIDs[] = {
671 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
672 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
673 AArch64::qsub2, AArch64::qsub3};
674 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
679 auto &
MBB = *
MI.getParent();
681 auto &
MRI = MF.getRegInfo();
687 else if (Root.
isReg()) {
692 Immed = ValAndVReg->Value.getSExtValue();
715 for (
auto &MO :
I.operands()) {
718 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
726 if (!Register::isVirtualRegister(MO.getReg())) {
727 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
737 if (PrevOpBank && OpBank != PrevOpBank) {
738 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
753 case AArch64::GPRRegBankID:
755 switch (GenericOpc) {
756 case TargetOpcode::G_SHL:
757 return AArch64::LSLVWr;
758 case TargetOpcode::G_LSHR:
759 return AArch64::LSRVWr;
760 case TargetOpcode::G_ASHR:
761 return AArch64::ASRVWr;
765 }
else if (OpSize == 64) {
766 switch (GenericOpc) {
767 case TargetOpcode::G_PTR_ADD:
768 return AArch64::ADDXrr;
769 case TargetOpcode::G_SHL:
770 return AArch64::LSLVXr;
771 case TargetOpcode::G_LSHR:
772 return AArch64::LSRVXr;
773 case TargetOpcode::G_ASHR:
774 return AArch64::ASRVXr;
780 case AArch64::FPRRegBankID:
783 switch (GenericOpc) {
784 case TargetOpcode::G_FADD:
785 return AArch64::FADDSrr;
786 case TargetOpcode::G_FSUB:
787 return AArch64::FSUBSrr;
788 case TargetOpcode::G_FMUL:
789 return AArch64::FMULSrr;
790 case TargetOpcode::G_FDIV:
791 return AArch64::FDIVSrr;
796 switch (GenericOpc) {
797 case TargetOpcode::G_FADD:
798 return AArch64::FADDDrr;
799 case TargetOpcode::G_FSUB:
800 return AArch64::FSUBDrr;
801 case TargetOpcode::G_FMUL:
802 return AArch64::FMULDrr;
803 case TargetOpcode::G_FDIV:
804 return AArch64::FDIVDrr;
805 case TargetOpcode::G_OR:
806 return AArch64::ORRv8i8;
823 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
825 case AArch64::GPRRegBankID:
828 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
830 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
832 return isStore ? AArch64::STRWui : AArch64::LDRWui;
834 return isStore ? AArch64::STRXui : AArch64::LDRXui;
837 case AArch64::FPRRegBankID:
840 return isStore ? AArch64::STRBui : AArch64::LDRBui;
842 return isStore ? AArch64::STRHui : AArch64::LDRHui;
844 return isStore ? AArch64::STRSui : AArch64::LDRSui;
846 return isStore ? AArch64::STRDui : AArch64::LDRDui;
848 return isStore ? AArch64::STRQui : AArch64::LDRQui;
862 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
863 assert(To &&
"Destination register class cannot be null");
870 RegOp.
setReg(SubRegCopy.getReg(0));
874 if (!Register::isPhysicalRegister(
I.getOperand(0).getReg()))
884 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
888 Register DstReg =
I.getOperand(0).getReg();
889 Register SrcReg =
I.getOperand(1).getReg();
903 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
904 SrcSize = DstSize = 32;
913 Register DstReg =
I.getOperand(0).getReg();
914 Register SrcReg =
I.getOperand(1).getReg();
933 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
949 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
951 }
else if (SrcSize > DstSize) {
958 }
else if (DstSize > SrcSize) {
967 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
977 if (Register::isPhysicalRegister(DstReg))
991 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
992 I.setDesc(
TII.get(AArch64::COPY));
993 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
997 I.setDesc(
TII.get(AArch64::COPY));
1012 switch (GenericOpc) {
1013 case TargetOpcode::G_SITOFP:
1014 return AArch64::SCVTFUWSri;
1015 case TargetOpcode::G_UITOFP:
1016 return AArch64::UCVTFUWSri;
1017 case TargetOpcode::G_FPTOSI:
1018 return AArch64::FCVTZSUWSr;
1019 case TargetOpcode::G_FPTOUI:
1020 return AArch64::FCVTZUUWSr;
1025 switch (GenericOpc) {
1026 case TargetOpcode::G_SITOFP:
1027 return AArch64::SCVTFUXSri;
1028 case TargetOpcode::G_UITOFP:
1029 return AArch64::UCVTFUXSri;
1030 case TargetOpcode::G_FPTOSI:
1031 return AArch64::FCVTZSUWDr;
1032 case TargetOpcode::G_FPTOUI:
1033 return AArch64::FCVTZUUWDr;
1043 switch (GenericOpc) {
1044 case TargetOpcode::G_SITOFP:
1045 return AArch64::SCVTFUWDri;
1046 case TargetOpcode::G_UITOFP:
1047 return AArch64::UCVTFUWDri;
1048 case TargetOpcode::G_FPTOSI:
1049 return AArch64::FCVTZSUXSr;
1050 case TargetOpcode::G_FPTOUI:
1051 return AArch64::FCVTZUUXSr;
1056 switch (GenericOpc) {
1057 case TargetOpcode::G_SITOFP:
1058 return AArch64::SCVTFUXDri;
1059 case TargetOpcode::G_UITOFP:
1060 return AArch64::UCVTFUXDri;
1061 case TargetOpcode::G_FPTOSI:
1062 return AArch64::FCVTZSUXDr;
1063 case TargetOpcode::G_FPTOUI:
1064 return AArch64::FCVTZUUXDr;
1083 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1084 "Expected both select operands to have the same regbank?");
1090 "Expected 32 bit or 64 bit select only?");
1091 const bool Is32Bit =
Size == 32;
1092 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1093 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1094 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1100 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1101 bool Optimized =
false;
1102 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &
MRI,
1117 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1134 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1153 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1169 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &
MRI,
1175 if (!TrueCst && !FalseCst)
1178 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1179 if (TrueCst && FalseCst) {
1180 int64_t
T = TrueCst->Value.getSExtValue();
1181 int64_t
F = FalseCst->Value.getSExtValue();
1183 if (
T == 0 &&
F == 1) {
1185 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1191 if (
T == 0 &&
F == -1) {
1193 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1201 int64_t
T = TrueCst->Value.getSExtValue();
1204 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1213 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1222 int64_t
F = FalseCst->Value.getSExtValue();
1225 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1232 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1240 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1241 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1242 Optimized |= TryOptSelectCst();
1252 case CmpInst::ICMP_NE:
1254 case CmpInst::ICMP_EQ:
1256 case CmpInst::ICMP_SGT:
1258 case CmpInst::ICMP_SGE:
1260 case CmpInst::ICMP_SLT:
1262 case CmpInst::ICMP_SLE:
1264 case CmpInst::ICMP_UGT:
1266 case CmpInst::ICMP_UGE:
1268 case CmpInst::ICMP_ULT:
1270 case CmpInst::ICMP_ULE:
1283 case CmpInst::FCMP_OEQ:
1286 case CmpInst::FCMP_OGT:
1289 case CmpInst::FCMP_OGE:
1292 case CmpInst::FCMP_OLT:
1295 case CmpInst::FCMP_OLE:
1298 case CmpInst::FCMP_ONE:
1302 case CmpInst::FCMP_ORD:
1305 case CmpInst::FCMP_UNO:
1308 case CmpInst::FCMP_UEQ:
1312 case CmpInst::FCMP_UGT:
1315 case CmpInst::FCMP_UGE:
1318 case CmpInst::FCMP_ULT:
1321 case CmpInst::FCMP_ULE:
1324 case CmpInst::FCMP_UNE:
1342 case CmpInst::FCMP_ONE:
1349 case CmpInst::FCMP_UEQ:
1362 assert(
Reg.isValid() &&
"Expected valid register!");
1363 bool HasZext =
false;
1365 unsigned Opc =
MI->getOpcode();
1367 if (!
MI->getOperand(0).isReg() ||
1375 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1376 Opc == TargetOpcode::G_TRUNC) {
1377 if (Opc == TargetOpcode::G_ZEXT)
1380 Register NextReg =
MI->getOperand(1).getReg();
1396 case TargetOpcode::G_AND:
1397 case TargetOpcode::G_XOR: {
1398 TestReg =
MI->getOperand(1).getReg();
1399 Register ConstantReg =
MI->getOperand(2).getReg();
1410 C = VRegAndVal->Value.getZExtValue();
1412 C = VRegAndVal->Value.getSExtValue();
1416 case TargetOpcode::G_ASHR:
1417 case TargetOpcode::G_LSHR:
1418 case TargetOpcode::G_SHL: {
1419 TestReg =
MI->getOperand(1).getReg();
1423 C = VRegAndVal->Value.getSExtValue();
1439 case TargetOpcode::G_AND:
1441 if ((*
C >>
Bit) & 1)
1444 case TargetOpcode::G_SHL:
1447 if (*
C <=
Bit && (
Bit - *
C) < TestRegSize) {
1452 case TargetOpcode::G_ASHR:
1457 if (
Bit >= TestRegSize)
1458 Bit = TestRegSize - 1;
1460 case TargetOpcode::G_LSHR:
1462 if ((
Bit + *
C) < TestRegSize) {
1467 case TargetOpcode::G_XOR:
1476 if ((*
C >>
Bit) & 1)
1495 assert(ProduceNonFlagSettingCondBr &&
1496 "Cannot emit TB(N)Z with speculation tracking!");
1504 assert(
Bit < 64 &&
"Bit is too large!");
1508 bool UseWReg =
Bit < 32;
1509 unsigned NecessarySize = UseWReg ? 32 : 64;
1510 if (
Size != NecessarySize)
1511 TestReg = moveScalarRegClass(
1512 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1515 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1516 {AArch64::TBZW, AArch64::TBNZW}};
1517 unsigned Opc = OpcTable[UseWReg][IsNegative];
1524 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1527 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1554 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1561 emitTestBit(TestReg,
Bit, Invert, DstMBB, MIB);
1569 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1571 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1572 AArch64::GPRRegBankID &&
1573 "Expected GPRs only?");
1577 assert(
Width <= 64 &&
"Expected width to be at most 64?");
1578 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1579 {AArch64::CBNZW, AArch64::CBNZX}};
1580 unsigned Opc = OpcTable[IsNegative][
Width == 64];
1581 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1586 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1589 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1601 I.eraseFromParent();
1605 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1608 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1614 if (!ProduceNonFlagSettingCondBr)
1633 if (VRegAndVal && !AndInst) {
1634 int64_t
C = VRegAndVal->Value.getSExtValue();
1638 if (
C == -1 && Pred == CmpInst::ICMP_SGT) {
1640 emitTestBit(
LHS,
Bit,
false, DestMBB, MIB);
1641 I.eraseFromParent();
1647 if (
C == 0 && Pred == CmpInst::ICMP_SLT) {
1649 emitTestBit(
LHS,
Bit,
true, DestMBB, MIB);
1650 I.eraseFromParent();
1657 if (ICmpInst::isEquality(Pred)) {
1664 if (VRegAndVal && VRegAndVal->Value == 0) {
1672 tryOptAndIntoCompareBranch(
1673 *AndInst, Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1674 I.eraseFromParent();
1680 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1681 emitCBZ(
LHS, Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1682 I.eraseFromParent();
1691 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1694 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1695 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1705 I.eraseFromParent();
1709 bool AArch64InstructionSelector::selectCompareBranch(
1711 Register CondReg =
I.getOperand(0).getReg();
1713 if (CCMI->
getOpcode() == TargetOpcode::G_TRUNC) {
1721 if (CCMIOpc == TargetOpcode::G_FCMP)
1722 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1723 if (CCMIOpc == TargetOpcode::G_ICMP)
1724 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1729 if (ProduceNonFlagSettingCondBr) {
1730 emitTestBit(CondReg, 0,
true,
1731 I.getOperand(1).getMBB(), MIB);
1732 I.eraseFromParent();
1738 MIB.
buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1742 .
addMBB(
I.getOperand(1).getMBB());
1743 I.eraseFromParent();
1763 int64_t Imm = *ShiftImm;
1790 bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1792 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1793 Register DstReg =
I.getOperand(0).getReg();
1795 Register Src1Reg =
I.getOperand(1).getReg();
1796 Register Src2Reg =
I.getOperand(2).getReg();
1806 if (Ty == LLT::fixed_vector(2, 64)) {
1807 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1808 }
else if (Ty == LLT::fixed_vector(4, 32)) {
1809 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1810 }
else if (Ty == LLT::fixed_vector(2, 32)) {
1811 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1812 }
else if (Ty == LLT::fixed_vector(4, 16)) {
1813 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1814 }
else if (Ty == LLT::fixed_vector(8, 16)) {
1815 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1816 }
else if (Ty == LLT::fixed_vector(16, 8)) {
1817 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1818 }
else if (Ty == LLT::fixed_vector(8, 8)) {
1819 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1825 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1831 I.eraseFromParent();
1835 bool AArch64InstructionSelector::selectVectorAshrLshr(
1837 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1838 I.getOpcode() == TargetOpcode::G_LSHR);
1839 Register DstReg =
I.getOperand(0).getReg();
1841 Register Src1Reg =
I.getOperand(1).getReg();
1842 Register Src2Reg =
I.getOperand(2).getReg();
1847 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1857 unsigned NegOpc = 0;
1859 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1860 if (Ty == LLT::fixed_vector(2, 64)) {
1861 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1862 NegOpc = AArch64::NEGv2i64;
1863 }
else if (Ty == LLT::fixed_vector(4, 32)) {
1864 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1865 NegOpc = AArch64::NEGv4i32;
1866 }
else if (Ty == LLT::fixed_vector(2, 32)) {
1867 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1868 NegOpc = AArch64::NEGv2i32;
1869 }
else if (Ty == LLT::fixed_vector(4, 16)) {
1870 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1871 NegOpc = AArch64::NEGv4i16;
1872 }
else if (Ty == LLT::fixed_vector(8, 16)) {
1873 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1874 NegOpc = AArch64::NEGv8i16;
1875 }
else if (Ty == LLT::fixed_vector(16, 8)) {
1876 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1877 NegOpc = AArch64::NEGv16i8;
1878 }
else if (Ty == LLT::fixed_vector(8, 8)) {
1879 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1880 NegOpc = AArch64::NEGv8i8;
1886 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1888 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1890 I.eraseFromParent();
1894 bool AArch64InstructionSelector::selectVaStartAAPCS(
1899 bool AArch64InstructionSelector::selectVaStartDarwin(
1902 Register ListReg =
I.getOperand(0).getReg();
1907 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
1915 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
1922 I.eraseFromParent();
1926 void AArch64InstructionSelector::materializeLargeCMVal(
1932 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1936 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1939 auto BuildMovK = [&](
Register SrcReg,
unsigned char Flags,
unsigned Offset,
1945 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
1946 MovI->
addOperand(MF, MachineOperand::CreateGA(
1947 GV, MovZ->getOperand(1).getOffset(), Flags));
1950 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1951 MovZ->getOperand(1).getOffset(), Flags));
1953 MovI->addOperand(MF, MachineOperand::CreateImm(
Offset));
1957 Register DstReg = BuildMovK(MovZ.getReg(0),
1963 bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
1968 switch (
I.getOpcode()) {
1969 case TargetOpcode::G_STORE: {
1970 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
1978 SrcOp.setReg(NewSrc);
1979 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
1984 case TargetOpcode::G_PTR_ADD:
1985 return convertPtrAddToAdd(
I,
MRI);
1986 case TargetOpcode::G_LOAD: {
1991 Register DstReg =
I.getOperand(0).getReg();
1998 case AArch64::G_DUP: {
2003 auto NewSrc = MIB.
buildCopy(LLT::scalar(64),
I.getOperand(1).getReg());
2007 I.getOperand(1).setReg(NewSrc.getReg(0));
2010 case TargetOpcode::G_UITOFP:
2011 case TargetOpcode::G_SITOFP: {
2016 Register SrcReg =
I.getOperand(1).getReg();
2022 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2023 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2024 I.setDesc(
TII.get(AArch64::G_SITOF));
2026 I.setDesc(
TII.get(AArch64::G_UITOF));
2044 bool AArch64InstructionSelector::convertPtrAddToAdd(
2046 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2047 Register DstReg =
I.getOperand(0).getReg();
2048 Register AddOp1Reg =
I.getOperand(1).getReg();
2053 const LLT CastPtrTy =
2054 PtrTy.
isVector() ? LLT::fixed_vector(2, 64) :
LLT::scalar(64);
2058 MRI.
setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2060 MRI.
setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2064 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2066 I.getOperand(1).setReg(PtrToInt.getReg(0));
2067 if (!
select(*PtrToInt)) {
2068 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2077 I.getOperand(2).setReg(NegatedReg);
2078 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2082 bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2087 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2088 const auto &MO =
I.getOperand(2);
2097 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2098 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2100 if (!Imm1Fn || !Imm2Fn)
2104 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2107 for (
auto &RenderFn : *Imm1Fn)
2109 for (
auto &RenderFn : *Imm2Fn)
2112 I.eraseFromParent();
2116 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2118 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2137 Register StoreSrcReg =
I.getOperand(0).getReg();
2149 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2150 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2154 I.getOperand(0).setReg(DefDstReg);
2158 bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2159 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2160 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2166 switch (
I.getOpcode()) {
2167 case AArch64::G_DUP: {
2170 Register Src =
I.getOperand(1).getReg();
2175 Register Dst =
I.getOperand(0).getReg();
2176 auto *CV = ConstantDataVector::getSplat(
2179 ValAndVReg->Value));
2180 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2182 I.eraseFromParent();
2185 case TargetOpcode::G_SEXT:
2188 if (selectUSMovFromExtend(
I,
MRI))
2191 case TargetOpcode::G_BR:
2193 case TargetOpcode::G_SHL:
2194 return earlySelectSHL(
I,
MRI);
2195 case TargetOpcode::G_CONSTANT: {
2196 bool IsZero =
false;
2197 if (
I.getOperand(1).isCImm())
2198 IsZero =
I.getOperand(1).getCImm()->getZExtValue() == 0;
2199 else if (
I.getOperand(1).isImm())
2200 IsZero =
I.getOperand(1).getImm() == 0;
2205 Register DefReg =
I.getOperand(0).getReg();
2208 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2209 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2211 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2212 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2216 I.setDesc(
TII.get(TargetOpcode::COPY));
2220 case TargetOpcode::G_ADD: {
2229 Register AddDst =
I.getOperand(0).getReg();
2230 Register AddLHS =
I.getOperand(1).getReg();
2231 Register AddRHS =
I.getOperand(2).getReg();
2266 Cmp = MatchCmp(AddRHS);
2270 auto &PredOp =
Cmp->getOperand(1);
2275 emitIntegerCompare(
Cmp->getOperand(2),
2276 Cmp->getOperand(3), PredOp, MIB);
2277 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2278 I.eraseFromParent();
2281 case TargetOpcode::G_OR: {
2285 Register Dst =
I.getOperand(0).getReg();
2305 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2308 int64_t Immr =
Size - ShiftImm;
2309 int64_t Imms =
Size - ShiftImm - 1;
2310 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2311 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2312 I.eraseFromParent();
2315 case TargetOpcode::G_FENCE: {
2316 if (
I.getOperand(1).getImm() == 0)
2318 .
addImm(
I.getOperand(0).getImm());
2321 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2322 I.eraseFromParent();
2331 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2332 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2340 if (Subtarget->requiresStrictAlign()) {
2342 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2348 unsigned Opcode =
I.getOpcode();
2350 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2353 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2356 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2357 const Register DefReg =
I.getOperand(0).getReg();
2371 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2378 I.setDesc(
TII.get(TargetOpcode::PHI));
2380 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2390 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2392 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2399 if (preISelLower(
I)) {
2400 Opcode =
I.getOpcode();
2415 I.getOperand(0).isReg() ?
MRI.
getType(
I.getOperand(0).getReg()) :
LLT{};
2418 case TargetOpcode::G_SBFX:
2419 case TargetOpcode::G_UBFX: {
2420 static const unsigned OpcTable[2][2] = {
2421 {AArch64::UBFMWri, AArch64::UBFMXri},
2422 {AArch64::SBFMWri, AArch64::SBFMXri}};
2423 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2425 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2428 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2431 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2432 auto LSB = Cst1->Value.getZExtValue();
2433 auto Width = Cst2->Value.getZExtValue();
2435 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2438 I.eraseFromParent();
2441 case TargetOpcode::G_BRCOND:
2442 return selectCompareBranch(
I, MF,
MRI);
2444 case TargetOpcode::G_BRINDIRECT: {
2449 case TargetOpcode::G_BRJT:
2450 return selectBrJT(
I,
MRI);
2452 case AArch64::G_ADD_LOW: {
2459 I.setDesc(
TII.get(AArch64::ADDXri));
2460 I.addOperand(MachineOperand::CreateImm(0));
2464 "Expected small code model");
2466 auto Op2 =
I.getOperand(2);
2467 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2468 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2469 Op1.getTargetFlags())
2471 Op2.getTargetFlags());
2472 I.eraseFromParent();
2476 case TargetOpcode::G_BSWAP: {
2478 Register DstReg =
I.getOperand(0).getReg();
2484 LLVM_DEBUG(
dbgs() <<
"Dst type for G_BSWAP currently unsupported.\n");
2491 if (NumElts != 4 && NumElts != 2) {
2492 LLVM_DEBUG(
dbgs() <<
"Unsupported number of elements for G_BSWAP.\n");
2502 : AArch64::REV32v16i8;
2503 else if (EltSize == 64)
2504 Opc = AArch64::REV64v16i8;
2507 assert(Opc != 0 &&
"Didn't get an opcode for G_BSWAP?");
2509 I.setDesc(
TII.get(Opc));
2513 case TargetOpcode::G_FCONSTANT:
2514 case TargetOpcode::G_CONSTANT: {
2515 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2517 const LLT s8 = LLT::scalar(8);
2518 const LLT s16 = LLT::scalar(16);
2519 const LLT s32 = LLT::scalar(32);
2520 const LLT s64 = LLT::scalar(64);
2521 const LLT s128 = LLT::scalar(128);
2524 const Register DefReg =
I.getOperand(0).getReg();
2531 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2533 <<
" constant, expected: " << s16 <<
" or " << s32
2534 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2538 if (RB.
getID() != AArch64::FPRRegBankID) {
2540 <<
" constant on bank: " << RB
2541 <<
", expected: FPR\n");
2549 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2553 if (Ty != p0 && Ty != s8 && Ty != s16) {
2555 <<
" constant, expected: " << s32 <<
", " << s64
2556 <<
", or " << p0 <<
'\n');
2560 if (RB.
getID() != AArch64::GPRRegBankID) {
2562 <<
" constant on bank: " << RB
2563 <<
", expected: GPR\n");
2582 auto *FPImm =
I.getOperand(1).getFPImm();
2583 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2585 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2589 I.eraseFromParent();
2590 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2596 "Expected constant pool loads for all sizes other than 32!");
2604 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2605 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2613 }
else if (
I.getOperand(1).isCImm()) {
2614 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2615 I.getOperand(1).ChangeToImmediate(Val);
2616 }
else if (
I.getOperand(1).isImm()) {
2617 uint64_t Val =
I.getOperand(1).getImm();
2618 I.getOperand(1).ChangeToImmediate(Val);
2621 const unsigned MovOpc =
2622 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2623 I.setDesc(
TII.get(MovOpc));
2627 case TargetOpcode::G_EXTRACT: {
2628 Register DstReg =
I.getOperand(0).getReg();
2629 Register SrcReg =
I.getOperand(1).getReg();
2643 unsigned Offset =
I.getOperand(2).getImm();
2652 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2653 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2654 .addUse(SrcReg, 0,
Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2655 I.eraseFromParent();
2661 unsigned LaneIdx =
Offset / 64;
2663 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2666 I.eraseFromParent();
2670 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2676 "unexpected G_EXTRACT types");
2683 .addReg(DstReg, 0, AArch64::sub_32);
2684 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2685 AArch64::GPR32RegClass,
MRI);
2686 I.getOperand(0).setReg(DstReg);
2691 case TargetOpcode::G_INSERT: {
2700 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2701 unsigned LSB =
I.getOperand(3).getImm();
2703 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2708 "unexpected G_INSERT types");
2714 TII.get(AArch64::SUBREG_TO_REG))
2717 .
addUse(
I.getOperand(2).getReg())
2718 .
addImm(AArch64::sub_32);
2719 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2720 AArch64::GPR32RegClass,
MRI);
2721 I.getOperand(2).setReg(SrcReg);
2725 case TargetOpcode::G_FRAME_INDEX: {
2732 I.setDesc(
TII.get(AArch64::ADDXri));
2735 I.addOperand(MachineOperand::CreateImm(0));
2736 I.addOperand(MachineOperand::CreateImm(0));
2741 case TargetOpcode::G_GLOBAL_VALUE: {
2742 auto GV =
I.getOperand(1).getGlobal();
2743 if (GV->isThreadLocal())
2744 return selectTLSGlobalValue(
I,
MRI);
2746 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2749 I.getOperand(1).setTargetFlags(OpFlags);
2752 materializeLargeCMVal(
I, GV, OpFlags);
2753 I.eraseFromParent();
2757 I.getOperand(1).setTargetFlags(OpFlags);
2759 I.setDesc(
TII.get(AArch64::MOVaddr));
2762 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2768 case TargetOpcode::G_ZEXTLOAD:
2769 case TargetOpcode::G_LOAD:
2770 case TargetOpcode::G_STORE: {
2772 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2786 if (Order != AtomicOrdering::NotAtomic &&
2788 Order != AtomicOrdering::Monotonic) {
2789 assert(!isa<GZExtLoad>(LdSt));
2790 if (MemSizeInBytes > 64)
2793 if (isa<GLoad>(LdSt)) {
2794 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2795 AArch64::LDARW, AArch64::LDARX};
2796 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2798 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2799 AArch64::STLRW, AArch64::STLRX};
2804 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2805 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2806 I.getOperand(0).setReg(NewVal);
2808 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2819 "Load/Store pointer operand isn't a GPR");
2821 "Load/Store pointer operand isn't a pointer");
2830 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2833 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2839 .addReg(ValReg, 0,
SubReg)
2841 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2843 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2846 if (RB.
getID() == AArch64::FPRRegBankID) {
2849 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2859 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2863 auto SubRegRC = getRegClassForTypeOnBank(
MRI.
getType(OldDst), RB);
2864 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2871 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2872 bool IsStore = isa<GStore>(
I);
2873 const unsigned NewOpc =
2875 if (NewOpc ==
I.getOpcode())
2879 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2882 I.setDesc(
TII.get(NewOpc));
2883 I.addOperand(MachineOperand::CreateImm(0));
2888 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2889 Register CurValReg =
I.getOperand(0).getReg();
2890 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2891 NewInst.cloneMemRefs(
I);
2892 for (
auto &Fn : *AddrModeFns)
2894 I.eraseFromParent();
2903 if (Opcode == TargetOpcode::G_STORE) {
2906 if (CVal && CVal->Value == 0) {
2908 case AArch64::STRWui:
2909 case AArch64::STRHHui:
2910 case AArch64::STRBBui:
2911 LoadStore->getOperand(0).setReg(AArch64::WZR);
2913 case AArch64::STRXui:
2914 LoadStore->getOperand(0).setReg(AArch64::XZR);
2932 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2935 .
addImm(AArch64::sub_32);
2937 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2943 case TargetOpcode::G_SMULH:
2944 case TargetOpcode::G_UMULH: {
2949 const Register DefReg =
I.getOperand(0).getReg();
2952 if (RB.
getID() != AArch64::GPRRegBankID) {
2953 LLVM_DEBUG(
dbgs() <<
"G_[SU]MULH on bank: " << RB <<
", expected: GPR\n");
2957 if (Ty != LLT::scalar(64)) {
2959 <<
", expected: " << LLT::scalar(64) <<
'\n');
2963 unsigned NewOpc =
I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2965 I.setDesc(
TII.get(NewOpc));
2971 case TargetOpcode::G_LSHR:
2972 case TargetOpcode::G_ASHR:
2974 return selectVectorAshrLshr(
I,
MRI);
2976 case TargetOpcode::G_SHL:
2977 if (Opcode == TargetOpcode::G_SHL &&
2979 return selectVectorSHL(
I,
MRI);
2986 Register SrcReg =
I.getOperand(1).getReg();
2987 Register ShiftReg =
I.getOperand(2).getReg();
2994 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
2995 .addReg(ShiftReg, 0, AArch64::sub_32);
2996 MRI.
setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2997 I.getOperand(2).setReg(Trunc.getReg(0));
3001 case TargetOpcode::G_OR: {
3008 const Register DefReg =
I.getOperand(0).getReg();
3012 if (NewOpc ==
I.getOpcode())
3015 I.setDesc(
TII.get(NewOpc));
3023 case TargetOpcode::G_PTR_ADD: {
3024 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3025 I.eraseFromParent();
3028 case TargetOpcode::G_SADDO:
3029 case TargetOpcode::G_UADDO:
3030 case TargetOpcode::G_SSUBO:
3031 case TargetOpcode::G_USUBO: {
3033 auto OpAndCC = emitOverflowOp(Opcode,
I.getOperand(0).getReg(),
3034 I.getOperand(2),
I.getOperand(3), MIB);
3041 emitCSINC(
I.getOperand(1).getReg(), ZReg, ZReg,
3043 I.eraseFromParent();
3047 case TargetOpcode::G_PTRMASK: {
3048 Register MaskReg =
I.getOperand(2).getReg();
3055 I.setDesc(
TII.get(AArch64::ANDXri));
3056 I.getOperand(2).ChangeToImmediate(
3061 case TargetOpcode::G_PTRTOINT:
3062 case TargetOpcode::G_TRUNC: {
3066 const Register DstReg =
I.getOperand(0).getReg();
3067 const Register SrcReg =
I.getOperand(1).getReg();
3074 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3078 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3087 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3088 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3089 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3093 if (DstRC == SrcRC) {
3095 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3096 SrcTy == LLT::scalar(64)) {
3099 }
else if (DstRC == &AArch64::GPR32RegClass &&
3100 SrcRC == &AArch64::GPR64RegClass) {
3101 I.getOperand(1).setSubReg(AArch64::sub_32);
3104 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3108 I.setDesc(
TII.get(TargetOpcode::COPY));
3110 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3111 if (DstTy == LLT::fixed_vector(4, 16) &&
3112 SrcTy == LLT::fixed_vector(4, 32)) {
3113 I.setDesc(
TII.get(AArch64::XTNv4i16));
3120 DstReg, DstRB, LLT::scalar(DstTy.
getSizeInBits()), SrcReg, 0, MIB);
3123 I.eraseFromParent();
3128 if (Opcode == TargetOpcode::G_PTRTOINT) {
3129 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3130 I.setDesc(
TII.get(TargetOpcode::COPY));
3138 case TargetOpcode::G_ANYEXT: {
3139 if (selectUSMovFromExtend(
I,
MRI))
3142 const Register DstReg =
I.getOperand(0).getReg();
3143 const Register SrcReg =
I.getOperand(1).getReg();
3146 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3148 <<
", expected: GPR\n");
3153 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3155 <<
", expected: GPR\n");
3162 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3166 if (DstSize != 64 && DstSize > 32) {
3168 <<
", expected: 32 or 64\n");
3179 .
addImm(AArch64::sub_32);
3180 I.getOperand(1).setReg(ExtSrc);
3185 case TargetOpcode::G_ZEXT:
3186 case TargetOpcode::G_SEXT_INREG:
3187 case TargetOpcode::G_SEXT: {
3188 if (selectUSMovFromExtend(
I,
MRI))
3191 unsigned Opcode =
I.getOpcode();
3192 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3193 const Register DefReg =
I.getOperand(0).getReg();
3194 Register SrcReg =
I.getOperand(1).getReg();
3202 if (Opcode == TargetOpcode::G_SEXT_INREG)
3203 SrcSize =
I.getOperand(2).getImm();
3209 AArch64::GPRRegBankID &&
3210 "Unexpected ext regbank");
3223 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3224 if (LoadMI && IsGPR) {
3226 unsigned BytesLoaded =
MemOp->getSize();
3237 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3244 if (!
Def || !isDef32(*
Def)) {
3247 const Register ZReg = AArch64::WZR;
3248 MIB.
buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3249 SubregToRegSrc = OrDst;
3252 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3255 .
addImm(AArch64::sub_32);
3257 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3259 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3263 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3269 I.eraseFromParent();
3274 if (DstSize == 64) {
3275 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3277 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3283 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3284 {&AArch64::GPR64RegClass}, {})
3291 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3295 }
else if (DstSize <= 32) {
3296 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3305 I.eraseFromParent();
3309 case TargetOpcode::G_SITOFP:
3310 case TargetOpcode::G_UITOFP:
3311 case TargetOpcode::G_FPTOSI:
3312 case TargetOpcode::G_FPTOUI: {
3316 if (NewOpc == Opcode)
3319 I.setDesc(
TII.get(NewOpc));
3321 I.setFlags(MachineInstr::NoFPExcept);
3326 case TargetOpcode::G_FREEZE:
3329 case TargetOpcode::G_INTTOPTR:
3334 case TargetOpcode::G_BITCAST:
3342 case TargetOpcode::G_SELECT: {
3343 auto &Sel = cast<GSelect>(
I);
3344 if (
MRI.
getType(Sel.getCondReg()) != LLT::scalar(1)) {
3346 <<
", expected: " << LLT::scalar(1) <<
'\n');
3350 const Register CondReg = Sel.getCondReg();
3351 const Register TReg = Sel.getTrueReg();
3352 const Register FReg = Sel.getFalseReg();
3354 if (tryOptSelect(Sel))
3360 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3363 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3365 Sel.eraseFromParent();
3368 case TargetOpcode::G_ICMP: {
3370 return selectVectorICmp(
I,
MRI);
3372 if (Ty != LLT::scalar(32)) {
3374 <<
", expected: " << LLT::scalar(32) <<
'\n');
3381 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3382 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3383 AArch64::WZR, InvCC, MIB);
3384 I.eraseFromParent();
3388 case TargetOpcode::G_FCMP: {
3391 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3393 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3395 I.eraseFromParent();
3398 case TargetOpcode::G_VASTART:
3399 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3400 : selectVaStartAAPCS(
I, MF,
MRI);
3401 case TargetOpcode::G_INTRINSIC:
3402 return selectIntrinsic(
I,
MRI);
3403 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3404 return selectIntrinsicWithSideEffects(
I,
MRI);
3405 case TargetOpcode::G_IMPLICIT_DEF: {
3406 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3408 const Register DstReg =
I.getOperand(0).getReg();
3411 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3414 case TargetOpcode::G_BLOCK_ADDR: {
3416 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3417 I.eraseFromParent();
3420 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3421 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3422 I.getOperand(0).getReg())
3426 I.getOperand(1).getBlockAddress(), 0,
3428 I.eraseFromParent();
3432 case AArch64::G_DUP: {
3438 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3439 AArch64::GPRRegBankID)
3442 if (VecTy == LLT::fixed_vector(8, 8))
3443 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3444 else if (VecTy == LLT::fixed_vector(16, 8))
3445 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3446 else if (VecTy == LLT::fixed_vector(4, 16))
3447 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3448 else if (VecTy == LLT::fixed_vector(8, 16))
3449 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3454 case TargetOpcode::G_INTRINSIC_TRUNC:
3455 return selectIntrinsicTrunc(
I,
MRI);
3456 case TargetOpcode::G_INTRINSIC_ROUND:
3457 return selectIntrinsicRound(
I,
MRI);
3458 case TargetOpcode::G_BUILD_VECTOR:
3459 return selectBuildVector(
I,
MRI);
3460 case TargetOpcode::G_MERGE_VALUES:
3462 case TargetOpcode::G_UNMERGE_VALUES:
3464 case TargetOpcode::G_SHUFFLE_VECTOR:
3465 return selectShuffleVector(
I,
MRI);
3466 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3467 return selectExtractElt(
I,
MRI);
3468 case TargetOpcode::G_INSERT_VECTOR_ELT:
3469 return selectInsertElt(
I,
MRI);
3470 case TargetOpcode::G_CONCAT_VECTORS:
3471 return selectConcatVectors(
I,
MRI);
3472 case TargetOpcode::G_JUMP_TABLE:
3473 return selectJumpTable(
I,
MRI);
3474 case TargetOpcode::G_VECREDUCE_FADD:
3475 case TargetOpcode::G_VECREDUCE_ADD:
3476 return selectReduction(
I,
MRI);
3477 case TargetOpcode::G_MEMCPY:
3478 case TargetOpcode::G_MEMCPY_INLINE:
3479 case TargetOpcode::G_MEMMOVE:
3480 case TargetOpcode::G_MEMSET:
3481 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3482 return selectMOPS(
I,
MRI);
3488 bool AArch64InstructionSelector::selectReduction(
MachineInstr &
I,
3490 Register VecReg =
I.getOperand(1).getReg();
3492 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3495 if (VecTy == LLT::fixed_vector(2, 32)) {
3496 Register DstReg =
I.getOperand(0).getReg();
3497 auto AddP = MIB.
buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3500 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3502 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass,
MRI);
3503 I.eraseFromParent();
3508 if (VecTy == LLT::fixed_vector(16, 8))
3509 Opc = AArch64::ADDVv16i8v;
3510 else if (VecTy == LLT::fixed_vector(8, 16))
3511 Opc = AArch64::ADDVv8i16v;
3512 else if (VecTy == LLT::fixed_vector(4, 32))
3513 Opc = AArch64::ADDVv4i32v;
3514 else if (VecTy == LLT::fixed_vector(2, 64))
3515 Opc = AArch64::ADDPv2i64p;
3520 I.setDesc(
TII.get(Opc));
3524 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3526 if (VecTy == LLT::fixed_vector(2, 32))
3527 Opc = AArch64::FADDPv2i32p;
3528 else if (VecTy == LLT::fixed_vector(2, 64))
3529 Opc = AArch64::FADDPv2i64p;
3534 I.setDesc(
TII.get(Opc));
3540 bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3544 case TargetOpcode::G_MEMCPY:
3545 case TargetOpcode::G_MEMCPY_INLINE:
3546 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3548 case TargetOpcode::G_MEMMOVE:
3549 Mopcode = AArch64::MOPSMemoryMovePseudo;
3551 case TargetOpcode::G_MEMSET:
3553 Mopcode = AArch64::MOPSMemorySetPseudo;
3566 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3567 const auto &SrcValRegClass =
3568 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3571 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3572 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3573 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3586 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3587 {DstPtrCopy, SizeCopy, SrcValCopy});
3590 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3591 {DstPtrCopy, SrcValCopy, SizeCopy});
3598 bool AArch64InstructionSelector::selectBrJT(
MachineInstr &
I,
3600 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3601 Register JTAddr =
I.getOperand(0).getReg();
3602 unsigned JTI =
I.getOperand(1).getIndex();
3609 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3610 {TargetReg, ScratchReg}, {JTAddr,
Index})
3611 .addJumpTableIndex(JTI);
3614 I.eraseFromParent();
3618 bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3620 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3621 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3623 Register DstReg =
I.getOperand(0).getReg();
3624 unsigned JTI =
I.getOperand(1).getIndex();
3627 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3630 I.eraseFromParent();
3634 bool AArch64InstructionSelector::selectTLSGlobalValue(
3636 if (!STI.isTargetMachO())
3641 const auto &GlobalOp =
I.getOperand(1);
3642 assert(GlobalOp.getOffset() == 0 &&
3643 "Shouldn't have an offset on TLS globals!");
3650 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3651 {LoadGOT.getReg(0)})
3664 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3666 I.eraseFromParent();
3670 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3680 Opc = AArch64::FRINTZHr;
3683 Opc = AArch64::FRINTZSr;
3686 Opc = AArch64::FRINTZDr;
3696 Opc = AArch64::FRINTZv4f16;
3697 else if (NumElts == 8)
3698 Opc = AArch64::FRINTZv8f16;
3702 Opc = AArch64::FRINTZv2f32;
3703 else if (NumElts == 4)
3704 Opc = AArch64::FRINTZv4f32;
3708 Opc = AArch64::FRINTZv2f64;
3715 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_TRUNC!\n");
3721 I.setDesc(
TII.get(Opc));
3725 bool AArch64InstructionSelector::selectIntrinsicRound(
3735 Opc = AArch64::FRINTAHr;
3738 Opc = AArch64::FRINTASr;
3741 Opc = AArch64::FRINTADr;
3751 Opc = AArch64::FRINTAv4f16;
3752 else if (NumElts == 8)
3753 Opc = AArch64::FRINTAv8f16;
3757 Opc = AArch64::FRINTAv2f32;
3758 else if (NumElts == 4)
3759 Opc = AArch64::FRINTAv4f32;
3763 Opc = AArch64::FRINTAv2f64;
3770 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_ROUND!\n");
3776 I.setDesc(
TII.get(Opc));
3780 bool AArch64InstructionSelector::selectVectorICmp(
3782 Register DstReg =
I.getOperand(0).getReg();
3784 Register SrcReg =
I.getOperand(2).getReg();
3785 Register Src2Reg =
I.getOperand(3).getReg();
3811 unsigned PredIdx = 0;
3812 bool SwapOperands =
false;
3815 case CmpInst::ICMP_NE:
3816 case CmpInst::ICMP_EQ:
3819 case CmpInst::ICMP_UGT:
3822 case CmpInst::ICMP_UGE:
3825 case CmpInst::ICMP_ULT:
3827 SwapOperands =
true;
3829 case CmpInst::ICMP_ULE:
3831 SwapOperands =
true;
3833 case CmpInst::ICMP_SGT:
3836 case CmpInst::ICMP_SGE:
3839 case CmpInst::ICMP_SLT:
3841 SwapOperands =
true;
3843 case CmpInst::ICMP_SLE:
3845 SwapOperands =
true;
3855 static const unsigned OpcTable[4][4][9] = {
3863 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3864 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3865 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3866 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3867 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3868 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3874 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3875 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3876 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3877 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3878 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3879 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3885 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3886 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3887 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3888 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3889 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3890 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3899 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3900 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3901 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3913 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3914 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3915 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3923 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3925 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3929 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3931 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3946 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3947 I.eraseFromParent();
3951 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3954 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3956 auto BuildFn = [&](
unsigned SubregIndex) {
3960 .addImm(SubregIndex);
3968 return BuildFn(AArch64::hsub);
3970 return BuildFn(AArch64::ssub);
3972 return BuildFn(AArch64::dsub);
3980 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3986 if (
I.getNumOperands() != 3)
3990 if (DstTy == LLT::scalar(128)) {
3993 Register DstReg =
I.getOperand(0).getReg();
3994 Register Src1Reg =
I.getOperand(1).getReg();
3995 Register Src2Reg =
I.getOperand(2).getReg();
3996 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3998 emitLaneInsert(
None, Tmp.getReg(0), Src1Reg, 0, RB, MIB);
4002 Src2Reg, 1, RB, MIB);
4007 I.eraseFromParent();
4011 if (RB.
getID() != AArch64::GPRRegBankID)
4017 auto *DstRC = &AArch64::GPR64RegClass;
4020 TII.get(TargetOpcode::SUBREG_TO_REG))
4023 .
addUse(
I.getOperand(1).getReg())
4024 .
addImm(AArch64::sub_32);
4028 TII.get(TargetOpcode::SUBREG_TO_REG))
4031 .
addUse(
I.getOperand(2).getReg())
4032 .
addImm(AArch64::sub_32);
4034 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
4035 .
addDef(
I.getOperand(0).getReg())
4043 I.eraseFromParent();
4048 const unsigned EltSize) {
4053 CopyOpc = AArch64::DUPi8;
4054 ExtractSubReg = AArch64::bsub;
4057 CopyOpc = AArch64::DUPi16;
4058 ExtractSubReg = AArch64::hsub;
4061 CopyOpc = AArch64::DUPi32;
4062 ExtractSubReg = AArch64::ssub;
4065 CopyOpc = AArch64::DUPi64;
4066 ExtractSubReg = AArch64::dsub;
4070 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
4076 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4080 unsigned CopyOpc = 0;
4081 unsigned ExtractSubReg = 0;
4084 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
4089 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
4091 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
4098 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4100 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4110 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4111 .addReg(VecReg, 0, ExtractSubReg);
4112 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4121 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4122 if (!ScalarToVector)
4128 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4132 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4136 bool AArch64InstructionSelector::selectExtractElt(
4138 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4139 "unexpected opcode!");
4140 Register DstReg =
I.getOperand(0).getReg();
4142 const Register SrcReg =
I.getOperand(1).getReg();
4146 "source register size too small!");
4147 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4151 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4153 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4162 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4166 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4171 I.eraseFromParent();
4175 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4177 unsigned NumElts =
I.getNumOperands() - 1;
4178 Register SrcReg =
I.getOperand(NumElts).getReg();
4182 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4184 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4191 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4192 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4193 Register Dst =
I.getOperand(OpIdx).getReg();
4195 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4199 I.eraseFromParent();
4205 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4206 "unexpected opcode");
4209 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4210 AArch64::FPRRegBankID ||
4211 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4212 AArch64::FPRRegBankID) {
4213 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4214 "currently unsupported.\n");
4220 unsigned NumElts =
I.getNumOperands() - 1;
4221 Register SrcReg =
I.getOperand(NumElts).getReg();
4226 "can only unmerge from vector or s128 types!");
4228 "source register size too small!");
4231 return selectSplitVectorUnmerge(
I,
MRI);
4235 unsigned CopyOpc = 0;
4236 unsigned ExtractSubReg = 0;
4247 unsigned NumInsertRegs = NumElts - 1;
4259 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4263 assert(Found &&
"expected to find last operand's subeg idx");
4264 for (
unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4267 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4274 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4283 InsertRegs.push_back(InsertReg);
4291 Register CopyTo =
I.getOperand(0).getReg();
4292 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4293 .addReg(InsertRegs[0], 0, ExtractSubReg);
4297 unsigned LaneIdx = 1;
4298 for (
Register InsReg : InsertRegs) {
4299 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4318 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4319 I.eraseFromParent();
4323 bool AArch64InstructionSelector::selectConcatVectors(
4325 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4326 "Unexpected opcode");
4327 Register Dst =
I.getOperand(0).getReg();
4328 Register Op1 =
I.getOperand(1).getReg();
4329 Register Op2 =
I.getOperand(2).getReg();
4330 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4333 I.eraseFromParent();
4338 AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4347 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4349 auto &MF = MIRBuilder.
getMF();
4350 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4363 .
buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4364 .addConstantPoolIndex(CPIdx, 0,
4370 .
buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4371 .addConstantPoolIndex(CPIdx, 0,
4377 .
buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4378 .addConstantPoolIndex(CPIdx, 0,
4384 .
buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4385 .addConstantPoolIndex(CPIdx, 0,
4389 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4394 MachineMemOperand::MOLoad,
4403 static std::pair<unsigned, unsigned>
4405 unsigned Opc, SubregIdx;
4406 if (RB.
getID() == AArch64::GPRRegBankID) {
4407 if (EltSize == 16) {
4408 Opc = AArch64::INSvi16gpr;
4409 SubregIdx = AArch64::ssub;
4410 }
else if (EltSize == 32) {
4411 Opc = AArch64::INSvi32gpr;
4412 SubregIdx = AArch64::ssub;
4413 }
else if (EltSize == 64) {
4414 Opc = AArch64::INSvi64gpr;
4415 SubregIdx = AArch64::dsub;
4421 Opc = AArch64::INSvi8lane;
4422 SubregIdx = AArch64::bsub;
4423 }
else if (EltSize == 16) {
4424 Opc = AArch64::INSvi16lane;
4425 SubregIdx = AArch64::hsub;
4426 }
else if (EltSize == 32) {
4427 Opc = AArch64::INSvi32lane;
4428 SubregIdx = AArch64::ssub;
4429 }
else if (EltSize == 64) {
4430 Opc = AArch64::INSvi64lane;
4431 SubregIdx = AArch64::dsub;
4436 return std::make_pair(Opc, SubregIdx);
4440 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4442 const ComplexRendererFns &RenderFns)
const {
4443 assert(Opcode &&
"Expected an opcode?");
4445 "Function should only be used to produce selected instructions!");
4446 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4448 for (
auto &Fn : *RenderFns)
4455 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4459 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4463 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4464 bool Is32Bit =
Size == 32;
4467 if (
auto Fns = selectArithImmed(
RHS))
4468 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4472 if (
auto Fns = selectNegArithImmed(
RHS))
4473 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4477 if (
auto Fns = selectArithExtendedRegister(
RHS))
4478 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4482 if (
auto Fns = selectShiftedRegister(
RHS))
4483 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4485 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4493 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4494 {{AArch64::ADDXri, AArch64::ADDWri},
4495 {AArch64::ADDXrs, AArch64::ADDWrs},
4496 {AArch64::ADDXrr, AArch64::ADDWrr},
4497 {AArch64::SUBXri, AArch64::SUBWri},
4498 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4499 return emitAddSub(OpcTable, DefReg,
LHS,
RHS, MIRBuilder);
4506 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4507 {{AArch64::ADDSXri, AArch64::ADDSWri},
4508 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4509 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4510 {AArch64::SUBSXri, AArch64::SUBSWri},
4511 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4512 return emitAddSub(OpcTable, Dst,
LHS,
RHS, MIRBuilder);
4519 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4520 {{AArch64::SUBSXri, AArch64::SUBSWri},
4521 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4522 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4523 {AArch64::ADDSXri, AArch64::ADDSWri},
4524 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4525 return emitAddSub(OpcTable, Dst,
LHS,
RHS, MIRBuilder);
4533 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4540 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4544 bool Is32Bit = (
RegSize == 32);
4545 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4546 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4547 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4551 int64_t Imm = ValAndVReg->
Value.getSExtValue();
4554 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4561 if (
auto Fns = selectLogicalShiftedRegister(
RHS))
4562 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4563 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4566 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4569 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4576 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4578 if (
auto FoldCmp = tryFoldIntegerCompare(
LHS,
RHS,
Predicate, MIRBuilder))
4581 return emitSUBS(Dst,
LHS,
RHS, MIRBuilder);
4584 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4590 "Expected a 32-bit scalar register?");
4592 const Register ZReg = AArch64::WZR;
4597 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4603 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4604 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4605 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4619 if (OpSize != 32 && OpSize != 64)
4628 return P == CmpInst::FCMP_OEQ ||
P == CmpInst::FCMP_ONE ||
4629 P == CmpInst::FCMP_UEQ ||
P == CmpInst::FCMP_UNE;
4631 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4635 ShouldUseImm =
true;
4639 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4640 {AArch64::FCMPSri, AArch64::FCMPDri}};
4641 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4653 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4665 if (Op1Ty != Op2Ty) {
4666 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4669 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4672 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4688 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4690 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4691 if (!WidenedOp1 || !WidenedOp2) {
4692 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4697 unsigned InsertOpc, InsSubRegIdx;
4698 std::tie(InsertOpc, InsSubRegIdx) =
4727 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4728 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4729 unsigned Opc = OpcTable[
Size == 64];
4730 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4735 std::pair<MachineInstr *, AArch64CC::CondCode>
4736 AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4743 case TargetOpcode::G_SADDO:
4745 case TargetOpcode::G_UADDO:
4747 case TargetOpcode::G_SSUBO:
4749 case TargetOpcode::G_USUBO:
4770 unsigned Depth = 0) {
4775 if (Opcode == TargetOpcode::G_TRUNC) {
4781 if (isa<GAnyCmp>(ValDef)) {
4783 MustBeFirst =
false;
4789 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4790 bool IsOR = Opcode == TargetOpcode::G_OR;
4802 if (MustBeFirstL && MustBeFirstR)
4808 if (!CanNegateL && !CanNegateR)
4812 CanNegate = WillNegate && CanNegateL && CanNegateR;
4815 MustBeFirst = !CanNegate;
4817 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4820 MustBeFirst = MustBeFirstL || MustBeFirstR;
4836 if (CmpInst::isIntPredicate(CC)) {
4837 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4841 CCmpOpc = AArch64::FCCMPHrr;
4844 CCmpOpc = AArch64::FCCMPSrr;
4847 CCmpOpc = AArch64::FCCMPDrr;
4868 if (Opcode == TargetOpcode::G_TRUNC) {
4874 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4879 CC = CmpInst::getInversePredicate(CC);
4880 if (isa<GICmp>(Cmp)) {
4891 ExtraCmp = emitFPCompare(
LHS,
RHS, MIB, CC);
4903 if (isa<GICmp>(Cmp))
4904 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4905 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4906 Cmp->getOperand(3).getReg(), MIB);
4913 bool IsOR = Opcode == TargetOpcode::G_OR;
4919 assert(ValidL &&
"Valid conjunction/disjunction tree");
4926 assert(ValidR &&
"Valid conjunction/disjunction tree");
4931 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4940 bool NegateAfterAll;
4941 if (Opcode == TargetOpcode::G_OR) {
4944 assert(CanNegateR &&
"at least one side must be negatable");
4945 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4949 NegateAfterR =
true;
4952 NegateR = CanNegateR;
4953 NegateAfterR = !CanNegateR;
4956 NegateAfterAll = !Negate;
4958 assert(Opcode == TargetOpcode::G_AND &&
4959 "Valid conjunction/disjunction tree");
4960 assert(!Negate &&
"Valid conjunction/disjunction tree");
4964 NegateAfterR =
false;
4965 NegateAfterAll =
false;
4983 bool DummyCanNegate;
4984 bool DummyMustBeFirst;