42 #include "llvm/IR/IntrinsicsAArch64.h"
47 #define DEBUG_TYPE "aarch64-isel"
50 using namespace MIPatternMatch;
51 using namespace AArch64GISelUtils;
60 #define GET_GLOBALISEL_PREDICATE_BITSET
61 #include "AArch64GenGlobalISel.inc"
62 #undef GET_GLOBALISEL_PREDICATE_BITSET
80 ProduceNonFlagSettingCondBr =
123 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
184 unsigned emitConstantPoolEntry(
const Constant *CPVal,
206 std::initializer_list<llvm::DstOp> DstOps,
207 std::initializer_list<llvm::SrcOp> SrcOps,
209 const ComplexRendererFns &RenderFns =
None)
const;
244 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
271 Register SrcReg = AArch64::WZR)
const;
282 std::pair<MachineInstr *, AArch64CC::CondCode>
301 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
302 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
303 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
304 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
306 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed)
const;
308 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
311 unsigned Size)
const;
313 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
314 return selectAddrModeUnscaled(Root, 1);
316 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
317 return selectAddrModeUnscaled(Root, 2);
319 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
320 return selectAddrModeUnscaled(Root, 4);
322 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
323 return selectAddrModeUnscaled(Root, 8);
325 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
326 return selectAddrModeUnscaled(Root, 16);
331 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
335 unsigned Size)
const;
337 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
338 return selectAddrModeIndexed(Root,
Width / 8);
345 unsigned SizeInBytes)
const;
353 bool WantsExt)
const;
354 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
356 unsigned SizeInBytes)
const;
358 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
359 return selectAddrModeXRO(Root,
Width / 8);
363 unsigned SizeInBytes)
const;
365 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
366 return selectAddrModeWRO(Root,
Width / 8);
369 ComplexRendererFns selectShiftedRegister(
MachineOperand &Root)
const;
371 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
372 return selectShiftedRegister(Root);
375 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
380 return selectShiftedRegister(Root);
390 bool IsLoadStore =
false)
const;
401 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
404 int OpIdx = -1)
const;
406 int OpIdx = -1)
const;
408 int OpIdx = -1)
const;
410 int OpIdx = -1)
const;
412 int OpIdx = -1)
const;
414 int OpIdx = -1)
const;
418 unsigned OpFlags)
const;
427 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
440 bool ProduceNonFlagSettingCondBr =
false;
447 #define GET_GLOBALISEL_PREDICATES_DECL
448 #include "AArch64GenGlobalISel.inc"
449 #undef GET_GLOBALISEL_PREDICATES_DECL
453 #define GET_GLOBALISEL_TEMPORARIES_DECL
454 #include "AArch64GenGlobalISel.inc"
455 #undef GET_GLOBALISEL_TEMPORARIES_DECL
460 #define GET_GLOBALISEL_IMPL
461 #include "AArch64GenGlobalISel.inc"
462 #undef GET_GLOBALISEL_IMPL
464 AArch64InstructionSelector::AArch64InstructionSelector(
468 TRI(*STI.getRegisterInfo()), RBI(RBI),
470 #
include "AArch64GenGlobalISel.inc"
473 #
include "AArch64GenGlobalISel.inc"
483 bool GetAllRegSet =
false) {
484 if (RB.
getID() == AArch64::GPRRegBankID) {
486 return GetAllRegSet ? &AArch64::GPR32allRegClass
487 : &AArch64::GPR32RegClass;
489 return GetAllRegSet ? &AArch64::GPR64allRegClass
490 : &AArch64::GPR64RegClass;
494 if (RB.
getID() == AArch64::FPRRegBankID) {
496 return &AArch64::FPR16RegClass;
498 return &AArch64::FPR32RegClass;
500 return &AArch64::FPR64RegClass;
502 return &AArch64::FPR128RegClass;
513 bool GetAllRegSet =
false) {
514 unsigned RegBankID = RB.
getID();
516 if (RegBankID == AArch64::GPRRegBankID) {
517 if (SizeInBits <= 32)
518 return GetAllRegSet ? &AArch64::GPR32allRegClass
519 : &AArch64::GPR32RegClass;
520 if (SizeInBits == 64)
521 return GetAllRegSet ? &AArch64::GPR64allRegClass
522 : &AArch64::GPR64RegClass;
525 if (RegBankID == AArch64::FPRRegBankID) {
526 switch (SizeInBits) {
530 return &AArch64::FPR8RegClass;
532 return &AArch64::FPR16RegClass;
534 return &AArch64::FPR32RegClass;
536 return &AArch64::FPR64RegClass;
538 return &AArch64::FPR128RegClass;
556 if (RC != &AArch64::FPR32RegClass)
566 dbgs() <<
"Couldn't find appropriate subregister for register class.");
575 switch (RB.
getID()) {
576 case AArch64::GPRRegBankID:
578 case AArch64::FPRRegBankID:
587 auto &
MBB = *
MI.getParent();
589 auto &
MRI = MF.getRegInfo();
595 else if (Root.
isReg()) {
600 Immed = ValAndVReg->Value.getSExtValue();
623 for (
auto &MO :
I.operands()) {
626 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
634 if (!Register::isVirtualRegister(MO.getReg())) {
635 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
645 if (PrevOpBank && OpBank != PrevOpBank) {
646 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
661 case AArch64::GPRRegBankID:
663 switch (GenericOpc) {
664 case TargetOpcode::G_SHL:
665 return AArch64::LSLVWr;
666 case TargetOpcode::G_LSHR:
667 return AArch64::LSRVWr;
668 case TargetOpcode::G_ASHR:
669 return AArch64::ASRVWr;
673 }
else if (OpSize == 64) {
674 switch (GenericOpc) {
675 case TargetOpcode::G_PTR_ADD:
676 return AArch64::ADDXrr;
677 case TargetOpcode::G_SHL:
678 return AArch64::LSLVXr;
679 case TargetOpcode::G_LSHR:
680 return AArch64::LSRVXr;
681 case TargetOpcode::G_ASHR:
682 return AArch64::ASRVXr;
688 case AArch64::FPRRegBankID:
691 switch (GenericOpc) {
692 case TargetOpcode::G_FADD:
693 return AArch64::FADDSrr;
694 case TargetOpcode::G_FSUB:
695 return AArch64::FSUBSrr;
696 case TargetOpcode::G_FMUL:
697 return AArch64::FMULSrr;
698 case TargetOpcode::G_FDIV:
699 return AArch64::FDIVSrr;
704 switch (GenericOpc) {
705 case TargetOpcode::G_FADD:
706 return AArch64::FADDDrr;
707 case TargetOpcode::G_FSUB:
708 return AArch64::FSUBDrr;
709 case TargetOpcode::G_FMUL:
710 return AArch64::FMULDrr;
711 case TargetOpcode::G_FDIV:
712 return AArch64::FDIVDrr;
713 case TargetOpcode::G_OR:
714 return AArch64::ORRv8i8;
731 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
733 case AArch64::GPRRegBankID:
736 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
738 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
740 return isStore ? AArch64::STRWui : AArch64::LDRWui;
742 return isStore ? AArch64::STRXui : AArch64::LDRXui;
745 case AArch64::FPRRegBankID:
748 return isStore ? AArch64::STRBui : AArch64::LDRBui;
750 return isStore ? AArch64::STRHui : AArch64::LDRHui;
752 return isStore ? AArch64::STRSui : AArch64::LDRSui;
754 return isStore ? AArch64::STRDui : AArch64::LDRDui;
769 const Register DstReg =
I.getOperand(0).getReg();
770 const Register SrcReg =
I.getOperand(1).getReg();
776 (DstSize == SrcSize ||
779 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
783 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
784 "Copy with different width?!");
787 assert((DstSize <= 64 || DstBank.
getID() == AArch64::FPRRegBankID) &&
788 "GPRs cannot get more than 64-bit width values");
801 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
802 assert(To &&
"Destination register class cannot be null");
809 RegOp.
setReg(SubRegCopy.getReg(0));
813 if (!Register::isPhysicalRegister(
I.getOperand(0).getReg()))
823 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
827 Register DstReg =
I.getOperand(0).getReg();
828 Register SrcReg =
I.getOperand(1).getReg();
842 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
843 SrcSize = DstSize = 32;
852 Register DstReg =
I.getOperand(0).getReg();
853 Register SrcReg =
I.getOperand(1).getReg();
873 bool KnownValid =
false;
878 auto CheckCopy = [&]() {
881 (!Register::isPhysicalRegister(
I.getOperand(0).getReg()) &&
882 !Register::isPhysicalRegister(
I.getOperand(1).getReg()))) &&
883 "No phys reg on generic operator!");
884 bool ValidCopy =
true;
887 assert(ValidCopy &&
"Invalid copy.");
897 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
913 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
915 }
else if (SrcSize > DstSize) {
922 }
else if (DstSize > SrcSize) {
931 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
944 if (Register::isPhysicalRegister(DstReg))
955 I.setDesc(
TII.get(AArch64::COPY));
970 switch (GenericOpc) {
971 case TargetOpcode::G_SITOFP:
972 return AArch64::SCVTFUWSri;
973 case TargetOpcode::G_UITOFP:
974 return AArch64::UCVTFUWSri;
975 case TargetOpcode::G_FPTOSI:
976 return AArch64::FCVTZSUWSr;
977 case TargetOpcode::G_FPTOUI:
978 return AArch64::FCVTZUUWSr;
983 switch (GenericOpc) {
984 case TargetOpcode::G_SITOFP:
985 return AArch64::SCVTFUXSri;
986 case TargetOpcode::G_UITOFP:
987 return AArch64::UCVTFUXSri;
988 case TargetOpcode::G_FPTOSI:
989 return AArch64::FCVTZSUWDr;
990 case TargetOpcode::G_FPTOUI:
991 return AArch64::FCVTZUUWDr;
1001 switch (GenericOpc) {
1002 case TargetOpcode::G_SITOFP:
1003 return AArch64::SCVTFUWDri;
1004 case TargetOpcode::G_UITOFP:
1005 return AArch64::UCVTFUWDri;
1006 case TargetOpcode::G_FPTOSI:
1007 return AArch64::FCVTZSUXSr;
1008 case TargetOpcode::G_FPTOUI:
1009 return AArch64::FCVTZUUXSr;
1014 switch (GenericOpc) {
1015 case TargetOpcode::G_SITOFP:
1016 return AArch64::SCVTFUXDri;
1017 case TargetOpcode::G_UITOFP:
1018 return AArch64::UCVTFUXDri;
1019 case TargetOpcode::G_FPTOSI:
1020 return AArch64::FCVTZSUXDr;
1021 case TargetOpcode::G_FPTOUI:
1022 return AArch64::FCVTZUUXDr;
1042 "Expected both select operands to have the same regbank?");
1048 "Expected 32 bit or 64 bit select only?");
1049 const bool Is32Bit =
Size == 32;
1051 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1052 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1058 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1059 bool Optimized =
false;
1060 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &
MRI,
1075 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1092 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1111 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1127 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &
MRI,
1133 if (!TrueCst && !FalseCst)
1136 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1137 if (TrueCst && FalseCst) {
1138 int64_t
T = TrueCst->Value.getSExtValue();
1139 int64_t
F = FalseCst->Value.getSExtValue();
1141 if (
T == 0 &&
F == 1) {
1143 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1149 if (
T == 0 &&
F == -1) {
1151 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1159 int64_t
T = TrueCst->Value.getSExtValue();
1162 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1171 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1180 int64_t
F = FalseCst->Value.getSExtValue();
1183 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1190 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1198 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1199 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1200 Optimized |= TryOptSelectCst();
1210 case CmpInst::ICMP_NE:
1212 case CmpInst::ICMP_EQ:
1214 case CmpInst::ICMP_SGT:
1216 case CmpInst::ICMP_SGE:
1218 case CmpInst::ICMP_SLT:
1220 case CmpInst::ICMP_SLE:
1222 case CmpInst::ICMP_UGT:
1224 case CmpInst::ICMP_UGE:
1226 case CmpInst::ICMP_ULT:
1228 case CmpInst::ICMP_ULE:
1240 case CmpInst::FCMP_OEQ:
1243 case CmpInst::FCMP_OGT:
1246 case CmpInst::FCMP_OGE:
1249 case CmpInst::FCMP_OLT:
1252 case CmpInst::FCMP_OLE:
1255 case CmpInst::FCMP_ONE:
1259 case CmpInst::FCMP_ORD:
1262 case CmpInst::FCMP_UNO:
1265 case CmpInst::FCMP_UEQ:
1269 case CmpInst::FCMP_UGT:
1272 case CmpInst::FCMP_UGE:
1275 case CmpInst::FCMP_ULT:
1278 case CmpInst::FCMP_ULE:
1281 case CmpInst::FCMP_UNE:
1290 assert(
Reg.isValid() &&
"Expected valid register!");
1292 unsigned Opc =
MI->getOpcode();
1294 if (!
MI->getOperand(0).isReg() ||
1302 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1303 Opc == TargetOpcode::G_TRUNC) {
1304 Register NextReg =
MI->getOperand(1).getReg();
1320 case TargetOpcode::G_AND:
1321 case TargetOpcode::G_XOR: {
1322 TestReg =
MI->getOperand(1).getReg();
1323 Register ConstantReg =
MI->getOperand(2).getReg();
1333 C = VRegAndVal->Value.getSExtValue();
1336 case TargetOpcode::G_ASHR:
1337 case TargetOpcode::G_LSHR:
1338 case TargetOpcode::G_SHL: {
1339 TestReg =
MI->getOperand(1).getReg();
1343 C = VRegAndVal->Value.getSExtValue();
1359 case TargetOpcode::G_AND:
1361 if ((*
C >>
Bit) & 1)
1364 case TargetOpcode::G_SHL:
1367 if (*
C <=
Bit && (
Bit - *
C) < TestRegSize) {
1372 case TargetOpcode::G_ASHR:
1377 if (
Bit >= TestRegSize)
1378 Bit = TestRegSize - 1;
1380 case TargetOpcode::G_LSHR:
1382 if ((
Bit + *
C) < TestRegSize) {
1387 case TargetOpcode::G_XOR:
1396 if ((*
C >>
Bit) & 1)
1415 assert(ProduceNonFlagSettingCondBr &&
1416 "Cannot emit TB(N)Z with speculation tracking!");
1424 assert(
Bit < 64 &&
"Bit is too large!");
1428 bool UseWReg =
Bit < 32;
1429 unsigned NecessarySize = UseWReg ? 32 : 64;
1430 if (
Size != NecessarySize)
1431 TestReg = moveScalarRegClass(
1432 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1435 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1436 {AArch64::TBZW, AArch64::TBNZW}};
1437 unsigned Opc = OpcTable[UseWReg][IsNegative];
1444 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1447 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1474 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1481 emitTestBit(TestReg,
Bit, Invert, DstMBB, MIB);
1489 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1492 AArch64::GPRRegBankID &&
1493 "Expected GPRs only?");
1497 assert(
Width <= 64 &&
"Expected width to be at most 64?");
1498 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1499 {AArch64::CBNZW, AArch64::CBNZX}};
1500 unsigned Opc = OpcTable[IsNegative][
Width == 64];
1501 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1506 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1509 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1521 I.eraseFromParent();
1525 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1528 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1534 if (!ProduceNonFlagSettingCondBr)
1553 if (VRegAndVal && !AndInst) {
1554 int64_t
C = VRegAndVal->Value.getSExtValue();
1558 if (
C == -1 && Pred == CmpInst::ICMP_SGT) {
1560 emitTestBit(LHS,
Bit,
false, DestMBB, MIB);
1561 I.eraseFromParent();
1567 if (
C == 0 && Pred == CmpInst::ICMP_SLT) {
1569 emitTestBit(LHS,
Bit,
true, DestMBB, MIB);
1570 I.eraseFromParent();
1577 if (ICmpInst::isEquality(Pred)) {
1584 if (VRegAndVal && VRegAndVal->Value == 0) {
1592 tryOptAndIntoCompareBranch(
1593 *AndInst, Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1594 I.eraseFromParent();
1600 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1601 emitCBZ(LHS, Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1602 I.eraseFromParent();
1611 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1614 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1615 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1625 I.eraseFromParent();
1629 bool AArch64InstructionSelector::selectCompareBranch(
1631 Register CondReg =
I.getOperand(0).getReg();
1633 if (CCMI->
getOpcode() == TargetOpcode::G_TRUNC) {
1642 if (CCMIOpc == TargetOpcode::G_FCMP)
1643 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1644 if (CCMIOpc == TargetOpcode::G_ICMP)
1645 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1650 if (ProduceNonFlagSettingCondBr) {
1651 emitTestBit(CondReg, 0,
true,
1652 I.getOperand(1).getMBB(), MIB);
1653 I.eraseFromParent();
1659 MIB.
buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1663 .
addMBB(
I.getOperand(1).getMBB());
1664 I.eraseFromParent();
1674 assert(OpMI &&
"Expected to find a vreg def for vector shift operand");
1685 int64_t Imm = *ShiftImm;
1712 bool AArch64InstructionSelector::selectVectorSHL(
1714 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1715 Register DstReg =
I.getOperand(0).getReg();
1717 Register Src1Reg =
I.getOperand(1).getReg();
1718 Register Src2Reg =
I.getOperand(2).getReg();
1728 if (Ty == LLT::vector(2, 64)) {
1729 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1730 }
else if (Ty == LLT::vector(4, 32)) {
1731 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1732 }
else if (Ty == LLT::vector(2, 32)) {
1733 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1734 }
else if (Ty == LLT::vector(4, 16)) {
1735 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1736 }
else if (Ty == LLT::vector(8, 16)) {
1737 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1738 }
else if (Ty == LLT::vector(16, 8)) {
1739 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1740 }
else if (Ty == LLT::vector(8, 8)) {
1741 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1748 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1754 I.eraseFromParent();
1758 bool AArch64InstructionSelector::selectVectorAshrLshr(
1760 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1761 I.getOpcode() == TargetOpcode::G_LSHR);
1762 Register DstReg =
I.getOperand(0).getReg();
1764 Register Src1Reg =
I.getOperand(1).getReg();
1765 Register Src2Reg =
I.getOperand(2).getReg();
1770 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1780 unsigned NegOpc = 0;
1782 getRegClassForTypeOnBank(Ty, RBI.
getRegBank(AArch64::FPRRegBankID), RBI);
1783 if (Ty == LLT::vector(2, 64)) {
1784 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1785 NegOpc = AArch64::NEGv2i64;
1786 }
else if (Ty == LLT::vector(4, 32)) {
1787 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1788 NegOpc = AArch64::NEGv4i32;
1789 }
else if (Ty == LLT::vector(2, 32)) {
1790 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1791 NegOpc = AArch64::NEGv2i32;
1792 }
else if (Ty == LLT::vector(4, 16)) {
1793 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1794 NegOpc = AArch64::NEGv4i16;
1795 }
else if (Ty == LLT::vector(8, 16)) {
1796 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1797 NegOpc = AArch64::NEGv8i16;
1798 }
else if (Ty == LLT::vector(16, 8)) {
1799 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1800 NegOpc = AArch64::NEGv16i8;
1801 }
else if (Ty == LLT::vector(8, 8)) {
1802 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1803 NegOpc = AArch64::NEGv8i8;
1810 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1812 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1814 I.eraseFromParent();
1818 bool AArch64InstructionSelector::selectVaStartAAPCS(
1823 bool AArch64InstructionSelector::selectVaStartDarwin(
1826 Register ListReg =
I.getOperand(0).getReg();
1831 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
1839 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
1846 I.eraseFromParent();
1850 void AArch64InstructionSelector::materializeLargeCMVal(
1857 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1861 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1864 auto BuildMovK = [&](
Register SrcReg,
unsigned char Flags,
unsigned Offset,
1870 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
1871 MovI->
addOperand(MF, MachineOperand::CreateGA(
1872 GV, MovZ->getOperand(1).getOffset(), Flags));
1875 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1876 MovZ->getOperand(1).getOffset(), Flags));
1878 MovI->addOperand(MF, MachineOperand::CreateImm(
Offset));
1882 Register DstReg = BuildMovK(MovZ.getReg(0),
1888 bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
1893 switch (
I.getOpcode()) {
1894 case TargetOpcode::G_SHL:
1895 case TargetOpcode::G_ASHR:
1896 case TargetOpcode::G_LSHR: {
1903 Register SrcReg =
I.getOperand(1).getReg();
1904 Register ShiftReg =
I.getOperand(2).getReg();
1913 assert(AmtMI &&
"could not find a vreg definition for shift amount");
1914 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1917 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1918 .addReg(ShiftReg, 0, AArch64::sub_32);
1920 I.getOperand(2).setReg(Trunc.getReg(0));
1924 case TargetOpcode::G_STORE: {
1925 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
1934 SrcOp.setReg(NewSrc);
1940 case TargetOpcode::G_PTR_ADD:
1941 return convertPtrAddToAdd(
I,
MRI);
1942 case TargetOpcode::G_LOAD: {
1947 Register DstReg =
I.getOperand(0).getReg();
1954 case AArch64::G_DUP: {
1960 auto NewSrc = MIB.
buildCopy(LLT::scalar(64),
I.getOperand(1).getReg());
1964 I.getOperand(1).setReg(NewSrc.getReg(0));
1967 case TargetOpcode::G_UITOFP:
1968 case TargetOpcode::G_SITOFP: {
1973 Register SrcReg =
I.getOperand(1).getReg();
1980 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
1981 I.setDesc(
TII.get(AArch64::G_SITOF));
1983 I.setDesc(
TII.get(AArch64::G_UITOF));
2001 bool AArch64InstructionSelector::convertPtrAddToAdd(
2003 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2004 Register DstReg =
I.getOperand(0).getReg();
2005 Register AddOp1Reg =
I.getOperand(1).getReg();
2011 const LLT CastPtrTy = PtrTy.
isVector() ? LLT::vector(2, 64) :
LLT::scalar(64);
2021 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2023 I.getOperand(1).setReg(PtrToInt.getReg(0));
2024 if (!
select(*PtrToInt)) {
2025 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2034 I.getOperand(2).setReg(NegatedReg);
2035 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2039 bool AArch64InstructionSelector::earlySelectSHL(
2044 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2045 const auto &MO =
I.getOperand(2);
2054 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2055 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2058 if (!Imm1Fn || !Imm2Fn)
2062 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2065 for (
auto &RenderFn : *Imm1Fn)
2067 for (
auto &RenderFn : *Imm2Fn)
2070 I.eraseFromParent();
2074 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2076 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2095 Register StoreSrcReg =
I.getOperand(0).getReg();
2112 I.getOperand(0).setReg(DefDstReg);
2116 bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I)
const {
2117 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2118 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2124 switch (
I.getOpcode()) {
2125 case AArch64::G_DUP: {
2128 Register Src =
I.getOperand(1).getReg();
2133 Register Dst =
I.getOperand(0).getReg();
2134 auto *CV = ConstantDataVector::getSplat(
2137 ValAndVReg->Value));
2139 if (!emitConstantVector(Dst, CV, MIRBuilder,
MRI))
2141 I.eraseFromParent();
2144 case TargetOpcode::G_BR: {
2152 I.eraseFromParent();
2155 case TargetOpcode::G_SHL:
2156 return earlySelectSHL(
I,
MRI);
2157 case TargetOpcode::G_CONSTANT: {
2158 bool IsZero =
false;
2159 if (
I.getOperand(1).isCImm())
2160 IsZero =
I.getOperand(1).getCImm()->getZExtValue() == 0;
2161 else if (
I.getOperand(1).isImm())
2162 IsZero =
I.getOperand(1).getImm() == 0;
2167 Register DefReg =
I.getOperand(0).getReg();
2170 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2173 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2178 I.setDesc(
TII.get(TargetOpcode::COPY));
2182 case TargetOpcode::G_ADD: {
2199 Register CmpReg =
I.getOperand(2).getReg();
2210 emitIntegerCompare(
Cmp->getOperand(2),
Cmp->getOperand(3),
2211 Cmp->getOperand(1), MIRBuilder);
2212 emitCSetForICMP(
I.getOperand(0).getReg(), Pred, MIRBuilder,
X);
2213 I.eraseFromParent();
2222 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2223 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2233 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2237 unsigned Opcode =
I.getOpcode();
2239 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2242 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2245 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2246 const Register DefReg =
I.getOperand(0).getReg();
2260 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2267 I.setDesc(
TII.get(TargetOpcode::PHI));
2279 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2281 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2288 if (preISelLower(
I)) {
2289 Opcode =
I.getOpcode();
2304 I.getOperand(0).isReg() ?
MRI.
getType(
I.getOperand(0).getReg()) :
LLT{};
2309 case TargetOpcode::G_SBFX:
2310 case TargetOpcode::G_UBFX: {
2311 static const unsigned OpcTable[2][2] = {
2312 {AArch64::UBFMWri, AArch64::UBFMXri},
2313 {AArch64::SBFMWri, AArch64::SBFMXri}};
2314 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2316 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2319 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2322 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2323 auto LSB = Cst1->Value.getZExtValue();
2324 auto Width = Cst2->Value.getZExtValue();
2327 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2330 I.eraseFromParent();
2333 case TargetOpcode::G_BRCOND:
2334 return selectCompareBranch(
I, MF,
MRI);
2336 case TargetOpcode::G_BRINDIRECT: {
2341 case TargetOpcode::G_BRJT:
2342 return selectBrJT(
I,
MRI);
2344 case AArch64::G_ADD_LOW: {
2351 I.setDesc(
TII.get(AArch64::ADDXri));
2352 I.addOperand(MachineOperand::CreateImm(0));
2356 "Expected small code model");
2359 auto Op2 =
I.getOperand(2);
2360 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2361 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2362 Op1.getTargetFlags())
2364 Op2.getTargetFlags());
2365 I.eraseFromParent();
2369 case TargetOpcode::G_BSWAP: {
2371 Register DstReg =
I.getOperand(0).getReg();
2377 LLVM_DEBUG(
dbgs() <<
"Dst type for G_BSWAP currently unsupported.\n");
2384 if (NumElts != 4 && NumElts != 2) {
2385 LLVM_DEBUG(
dbgs() <<
"Unsupported number of elements for G_BSWAP.\n");
2395 : AArch64::REV32v16i8;
2396 else if (EltSize == 64)
2397 Opc = AArch64::REV64v16i8;
2400 assert(Opc != 0 &&
"Didn't get an opcode for G_BSWAP?");
2402 I.setDesc(
TII.get(Opc));
2406 case TargetOpcode::G_FCONSTANT:
2407 case TargetOpcode::G_CONSTANT: {
2408 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2410 const LLT s8 = LLT::scalar(8);
2411 const LLT s16 = LLT::scalar(16);
2412 const LLT s32 = LLT::scalar(32);
2413 const LLT s64 = LLT::scalar(64);
2414 const LLT s128 = LLT::scalar(128);
2415 const LLT p0 = LLT::pointer(0, 64);
2417 const Register DefReg =
I.getOperand(0).getReg();
2424 if (Ty != s32 && Ty != s64 && Ty != s128) {
2426 <<
" constant, expected: " << s32 <<
" or " << s64
2427 <<
" or " << s128 <<
'\n');
2431 if (RB.
getID() != AArch64::FPRRegBankID) {
2433 <<
" constant on bank: " << RB
2434 <<
", expected: FPR\n");
2442 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2446 if (Ty != p0 && Ty != s8 && Ty != s16) {
2448 <<
" constant, expected: " << s32 <<
", " << s64
2449 <<
", or " << p0 <<
'\n');
2453 if (RB.
getID() != AArch64::GPRRegBankID) {
2455 <<
" constant on bank: " << RB
2456 <<
", expected: GPR\n");
2462 const unsigned MovOpc =
2463 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2468 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2470 DefSize == 32 ? AArch64::FPR32RegClass
2471 : (DefSize == 64 ? AArch64::FPR64RegClass
2472 : AArch64::FPR128RegClass);
2476 if (DefSize == 64 || DefSize == 128 ||
2478 auto *FPImm =
I.getOperand(1).getFPImm();
2480 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2482 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2486 I.eraseFromParent();
2498 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2506 }
else if (
I.getOperand(1).isCImm()) {
2507 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2508 I.getOperand(1).ChangeToImmediate(Val);
2509 }
else if (
I.getOperand(1).isImm()) {
2510 uint64_t Val =
I.getOperand(1).getImm();
2511 I.getOperand(1).ChangeToImmediate(Val);
2514 I.setDesc(
TII.get(MovOpc));
2518 case TargetOpcode::G_EXTRACT: {
2519 Register DstReg =
I.getOperand(0).getReg();
2520 Register SrcReg =
I.getOperand(1).getReg();
2538 DstRB.
getID() == AArch64::FPRRegBankID &&
2539 "Wrong extract regbank!");
2544 unsigned Offset =
I.getOperand(2).getImm();
2547 unsigned LaneIdx =
Offset / 64;
2550 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2553 I.eraseFromParent();
2557 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2563 "unexpected G_EXTRACT types");
2570 .addReg(DstReg, 0, AArch64::sub_32);
2572 AArch64::GPR32RegClass,
MRI);
2573 I.getOperand(0).setReg(DstReg);
2578 case TargetOpcode::G_INSERT: {
2587 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2588 unsigned LSB =
I.getOperand(3).getImm();
2590 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2595 "unexpected G_INSERT types");
2601 TII.get(AArch64::SUBREG_TO_REG))
2604 .
addUse(
I.getOperand(2).getReg())
2605 .
addImm(AArch64::sub_32);
2607 AArch64::GPR32RegClass,
MRI);
2608 I.getOperand(2).setReg(SrcReg);
2612 case TargetOpcode::G_FRAME_INDEX: {
2614 if (Ty != LLT::pointer(0, 64)) {
2616 <<
", expected: " << LLT::pointer(0, 64) <<
'\n');
2619 I.setDesc(
TII.get(AArch64::ADDXri));
2622 I.addOperand(MachineOperand::CreateImm(0));
2623 I.addOperand(MachineOperand::CreateImm(0));
2628 case TargetOpcode::G_GLOBAL_VALUE: {
2629 auto GV =
I.getOperand(1).getGlobal();
2630 if (GV->isThreadLocal())
2631 return selectTLSGlobalValue(
I,
MRI);
2633 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2636 I.getOperand(1).setTargetFlags(OpFlags);
2639 materializeLargeCMVal(
I, GV, OpFlags);
2640 I.eraseFromParent();
2644 I.getOperand(1).setTargetFlags(OpFlags);
2646 I.setDesc(
TII.get(AArch64::MOVaddr));
2649 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2655 case TargetOpcode::G_ZEXTLOAD:
2656 case TargetOpcode::G_LOAD:
2657 case TargetOpcode::G_STORE: {
2658 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2663 if (PtrTy != LLT::pointer(0, 64)) {
2665 <<
", expected: " << LLT::pointer(0, 64) <<
'\n');
2669 auto &
MemOp = **
I.memoperands_begin();
2670 uint64_t MemSizeInBytes =
MemOp.getSize();
2671 if (
MemOp.isAtomic()) {
2674 if (
MemOp.getOrdering() == AtomicOrdering::Acquire &&
2675 MemSizeInBytes == 1) {
2676 I.setDesc(
TII.get(AArch64::LDARB));
2679 LLVM_DEBUG(
dbgs() <<
"Atomic load/store not fully supported yet\n");
2682 unsigned MemSizeInBits = MemSizeInBytes * 8;
2685 const Register PtrReg =
I.getOperand(1).getReg();
2689 "Load/Store pointer operand isn't a GPR");
2691 "Load/Store pointer operand isn't a pointer");
2694 const Register ValReg =
I.getOperand(0).getReg();
2699 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2700 bool IsStore =
I.getOpcode() == TargetOpcode::G_STORE;
2701 const unsigned NewOpc =
2703 if (NewOpc ==
I.getOpcode())
2707 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2710 I.setDesc(
TII.get(NewOpc));
2711 I.addOperand(MachineOperand::CreateImm(0));
2716 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2717 IsStore ? NewInst.
addUse(ValReg) : NewInst.
addDef(ValReg);
2719 for (
auto &Fn : *AddrModeFns)
2721 I.eraseFromParent();
2730 if (Opcode == TargetOpcode::G_STORE) {
2734 if (CVal && CVal->Value == 0) {
2736 case AArch64::STRWui:
2737 case AArch64::STRHHui:
2738 case AArch64::STRBBui:
2739 LoadStore->getOperand(0).setReg(AArch64::WZR);
2741 case AArch64::STRXui:
2742 LoadStore->getOperand(0).setReg(AArch64::XZR);
2760 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2763 .
addImm(AArch64::sub_32);
2771 case TargetOpcode::G_SMULH:
2772 case TargetOpcode::G_UMULH: {
2777 const Register DefReg =
I.getOperand(0).getReg();
2780 if (RB.
getID() != AArch64::GPRRegBankID) {
2781 LLVM_DEBUG(
dbgs() <<
"G_[SU]MULH on bank: " << RB <<
", expected: GPR\n");
2785 if (Ty != LLT::scalar(64)) {
2787 <<
", expected: " << LLT::scalar(64) <<
'\n');
2791 unsigned NewOpc =
I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2793 I.setDesc(
TII.get(NewOpc));
2799 case TargetOpcode::G_LSHR:
2800 case TargetOpcode::G_ASHR:
2802 return selectVectorAshrLshr(
I,
MRI);
2804 case TargetOpcode::G_SHL:
2805 if (Opcode == TargetOpcode::G_SHL &&
2807 return selectVectorSHL(
I,
MRI);
2809 case TargetOpcode::G_FADD:
2810 case TargetOpcode::G_FSUB:
2811 case TargetOpcode::G_FMUL:
2812 case TargetOpcode::G_FDIV:
2813 case TargetOpcode::G_OR: {
2820 const Register DefReg =
I.getOperand(0).getReg();
2824 if (NewOpc ==
I.getOpcode())
2827 I.setDesc(
TII.get(NewOpc));
2835 case TargetOpcode::G_PTR_ADD: {
2837 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2),
2839 I.eraseFromParent();
2842 case TargetOpcode::G_SADDO:
2843 case TargetOpcode::G_UADDO:
2844 case TargetOpcode::G_SSUBO:
2845 case TargetOpcode::G_USUBO: {
2848 auto OpAndCC = emitOverflowOp(Opcode,
I.getOperand(0).getReg(),
2849 I.getOperand(2),
I.getOperand(3), MIRBuilder);
2856 auto CsetMI = MIRBuilder
2857 .buildInstr(AArch64::CSINCWr, {
I.getOperand(1).
getReg()},
2861 I.eraseFromParent();
2865 case TargetOpcode::G_PTRMASK: {
2866 Register MaskReg =
I.getOperand(2).getReg();
2872 uint64_t
Mask = *MaskVal;
2873 I.setDesc(
TII.get(AArch64::ANDXri));
2874 I.getOperand(2).ChangeToImmediate(
2879 case TargetOpcode::G_PTRTOINT:
2880 case TargetOpcode::G_TRUNC: {
2884 const Register DstReg =
I.getOperand(0).getReg();
2885 const Register SrcReg =
I.getOperand(1).getReg();
2892 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
2896 if (DstRB.
getID() == AArch64::GPRRegBankID) {
2898 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2903 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2909 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
2913 if (DstRC == SrcRC) {
2915 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2916 SrcTy == LLT::scalar(64)) {
2919 }
else if (DstRC == &AArch64::GPR32RegClass &&
2920 SrcRC == &AArch64::GPR64RegClass) {
2921 I.getOperand(1).setSubReg(AArch64::sub_32);
2924 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2928 I.setDesc(
TII.get(TargetOpcode::COPY));
2930 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
2931 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2932 I.setDesc(
TII.get(AArch64::XTNv4i16));
2940 DstReg, DstRB, LLT::scalar(DstTy.
getSizeInBits()), SrcReg, 0, MIB);
2943 I.eraseFromParent();
2948 if (Opcode == TargetOpcode::G_PTRTOINT) {
2949 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
2950 I.setDesc(
TII.get(TargetOpcode::COPY));
2958 case TargetOpcode::G_ANYEXT: {
2959 const Register DstReg =
I.getOperand(0).getReg();
2960 const Register SrcReg =
I.getOperand(1).getReg();
2963 if (RBDst.
getID() != AArch64::GPRRegBankID) {
2965 <<
", expected: GPR\n");
2970 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
2972 <<
", expected: GPR\n");
2979 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
2983 if (DstSize != 64 && DstSize > 32) {
2985 <<
", expected: 32 or 64\n");
2996 .
addImm(AArch64::sub_32);
2997 I.getOperand(1).setReg(ExtSrc);
3002 case TargetOpcode::G_ZEXT:
3003 case TargetOpcode::G_SEXT_INREG:
3004 case TargetOpcode::G_SEXT: {
3005 unsigned Opcode =
I.getOpcode();
3006 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3007 const Register DefReg =
I.getOperand(0).getReg();
3008 Register SrcReg =
I.getOperand(1).getReg();
3016 if (Opcode == TargetOpcode::G_SEXT_INREG)
3017 SrcSize =
I.getOperand(2).getImm();
3023 AArch64::GPRRegBankID &&
3024 "Unexpected ext regbank");
3039 if (LoadMI && IsGPR) {
3041 unsigned BytesLoaded =
MemOp->getSize();
3049 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3053 if (
Def && isDef32(*
Def)) {
3054 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3057 .
addImm(AArch64::sub_32);
3061 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3071 I.eraseFromParent();
3077 if (DstSize == 64) {
3078 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3086 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3087 {&AArch64::GPR64RegClass}, {})
3094 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3098 }
else if (DstSize <= 32) {
3099 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3108 I.eraseFromParent();
3112 case TargetOpcode::G_SITOFP:
3113 case TargetOpcode::G_UITOFP:
3114 case TargetOpcode::G_FPTOSI:
3115 case TargetOpcode::G_FPTOUI: {
3119 if (NewOpc == Opcode)
3122 I.setDesc(
TII.get(NewOpc));
3128 case TargetOpcode::G_FREEZE:
3131 case TargetOpcode::G_INTTOPTR:
3136 case TargetOpcode::G_BITCAST:
3144 case TargetOpcode::G_SELECT: {
3145 if (
MRI.
getType(
I.getOperand(1).getReg()) != LLT::scalar(1)) {
3147 <<
", expected: " << LLT::scalar(1) <<
'\n');
3151 const Register CondReg =
I.getOperand(1).getReg();
3152 const Register TReg =
I.getOperand(2).getReg();
3153 const Register FReg =
I.getOperand(3).getReg();
3155 if (tryOptSelect(
I))
3162 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3165 if (!emitSelect(
I.getOperand(0).getReg(), TReg, FReg,
AArch64CC::NE, MIB))
3167 I.eraseFromParent();
3170 case TargetOpcode::G_ICMP: {
3172 return selectVectorICmp(
I,
MRI);
3174 if (Ty != LLT::scalar(32)) {
3176 <<
", expected: " << LLT::scalar(32) <<
'\n');
3182 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1),
3184 emitCSetForICMP(
I.getOperand(0).getReg(), Pred, MIRBuilder);
3185 I.eraseFromParent();
3189 case TargetOpcode::G_FCMP: {
3193 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(),
3194 MIRBuilder, Pred) ||
3195 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIRBuilder))
3197 I.eraseFromParent();
3200 case TargetOpcode::G_VASTART:
3201 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3202 : selectVaStartAAPCS(
I, MF,
MRI);
3203 case TargetOpcode::G_INTRINSIC:
3204 return selectIntrinsic(
I,
MRI);
3205 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3206 return selectIntrinsicWithSideEffects(
I,
MRI);
3207 case TargetOpcode::G_IMPLICIT_DEF: {
3208 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3210 const Register DstReg =
I.getOperand(0).getReg();
3213 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3217 case TargetOpcode::G_BLOCK_ADDR: {
3219 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3220 I.eraseFromParent();
3223 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3224 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3225 I.getOperand(0).getReg())
3229 I.getOperand(1).getBlockAddress(), 0,
3231 I.eraseFromParent();
3235 case AArch64::G_DUP: {
3242 AArch64::GPRRegBankID)
3245 if (VecTy == LLT::vector(8, 8))
3246 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3247 else if (VecTy == LLT::vector(16, 8))
3248 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3249 else if (VecTy == LLT::vector(4, 16))
3250 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3251 else if (VecTy == LLT::vector(8, 16))
3252 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3257 case TargetOpcode::G_INTRINSIC_TRUNC:
3258 return selectIntrinsicTrunc(
I,
MRI);
3259 case TargetOpcode::G_INTRINSIC_ROUND:
3260 return selectIntrinsicRound(
I,
MRI);
3261 case TargetOpcode::G_BUILD_VECTOR:
3262 return selectBuildVector(
I,
MRI);
3263 case TargetOpcode::G_MERGE_VALUES:
3265 case TargetOpcode::G_UNMERGE_VALUES:
3267 case TargetOpcode::G_SHUFFLE_VECTOR:
3268 return selectShuffleVector(
I,
MRI);
3269 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3270 return selectExtractElt(
I,
MRI);
3271 case TargetOpcode::G_INSERT_VECTOR_ELT:
3272 return selectInsertElt(
I,
MRI);
3273 case TargetOpcode::G_CONCAT_VECTORS:
3274 return selectConcatVectors(
I,
MRI);
3275 case TargetOpcode::G_JUMP_TABLE:
3276 return selectJumpTable(
I,
MRI);
3277 case TargetOpcode::G_VECREDUCE_FADD:
3278 case TargetOpcode::G_VECREDUCE_ADD:
3279 return selectReduction(
I,
MRI);
3285 bool AArch64InstructionSelector::selectReduction(
3287 Register VecReg =
I.getOperand(1).getReg();
3289 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3292 if (VecTy == LLT::vector(2, 32)) {
3294 Register DstReg =
I.getOperand(0).getReg();
3295 auto AddP = MIB.
buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3298 .addReg(AddP.getReg(0), 0, AArch64::ssub)
3301 I.eraseFromParent();
3306 if (VecTy == LLT::vector(16, 8))
3307 Opc = AArch64::ADDVv16i8v;
3308 else if (VecTy == LLT::vector(8, 16))
3309 Opc = AArch64::ADDVv8i16v;
3310 else if (VecTy == LLT::vector(4, 32))
3311 Opc = AArch64::ADDVv4i32v;
3312 else if (VecTy == LLT::vector(2, 64))
3313 Opc = AArch64::ADDPv2i64p;
3318 I.setDesc(
TII.get(Opc));
3322 if (
I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3324 if (VecTy == LLT::vector(2, 32))
3325 Opc = AArch64::FADDPv2i32p;
3326 else if (VecTy == LLT::vector(2, 64))
3327 Opc = AArch64::FADDPv2i64p;
3332 I.setDesc(
TII.get(Opc));
3338 bool AArch64InstructionSelector::selectBrJT(
MachineInstr &
I,
3340 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3341 Register JTAddr =
I.getOperand(0).getReg();
3342 unsigned JTI =
I.getOperand(1).getIndex();
3350 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3351 {TargetReg, ScratchReg}, {JTAddr,
Index})
3352 .addJumpTableIndex(JTI);
3355 I.eraseFromParent();
3359 bool AArch64InstructionSelector::selectJumpTable(
3361 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3362 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3364 Register DstReg =
I.getOperand(0).getReg();
3365 unsigned JTI =
I.getOperand(1).getIndex();
3369 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3372 I.eraseFromParent();
3376 bool AArch64InstructionSelector::selectTLSGlobalValue(
3378 if (!STI.isTargetMachO())
3390 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3391 {LoadGOT.getReg(0)})
3406 I.eraseFromParent();
3410 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3420 Opc = AArch64::FRINTZHr;
3423 Opc = AArch64::FRINTZSr;
3426 Opc = AArch64::FRINTZDr;
3436 Opc = AArch64::FRINTZv4f16;
3437 else if (NumElts == 8)
3438 Opc = AArch64::FRINTZv8f16;
3442 Opc = AArch64::FRINTZv2f32;
3443 else if (NumElts == 4)
3444 Opc = AArch64::FRINTZv4f32;
3448 Opc = AArch64::FRINTZv2f64;
3455 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_TRUNC!\n");
3461 I.setDesc(
TII.get(Opc));
3465 bool AArch64InstructionSelector::selectIntrinsicRound(
3475 Opc = AArch64::FRINTAHr;
3478 Opc = AArch64::FRINTASr;
3481 Opc = AArch64::FRINTADr;
3491 Opc = AArch64::FRINTAv4f16;
3492 else if (NumElts == 8)
3493 Opc = AArch64::FRINTAv8f16;
3497 Opc = AArch64::FRINTAv2f32;
3498 else if (NumElts == 4)
3499 Opc = AArch64::FRINTAv4f32;
3503 Opc = AArch64::FRINTAv2f64;
3510 LLVM_DEBUG(
dbgs() <<
"Unsupported type for G_INTRINSIC_ROUND!\n");
3516 I.setDesc(
TII.get(Opc));
3520 bool AArch64InstructionSelector::selectVectorICmp(
3522 Register DstReg =
I.getOperand(0).getReg();
3524 Register SrcReg =
I.getOperand(2).getReg();
3525 Register Src2Reg =
I.getOperand(3).getReg();
3551 unsigned PredIdx = 0;
3552 bool SwapOperands =
false;
3555 case CmpInst::ICMP_NE:
3556 case CmpInst::ICMP_EQ:
3559 case CmpInst::ICMP_UGT:
3562 case CmpInst::ICMP_UGE:
3565 case CmpInst::ICMP_ULT:
3567 SwapOperands =
true;
3569 case CmpInst::ICMP_ULE:
3571 SwapOperands =
true;
3573 case CmpInst::ICMP_SGT:
3576 case CmpInst::ICMP_SGE:
3579 case CmpInst::ICMP_SLT:
3581 SwapOperands =
true;
3583 case CmpInst::ICMP_SLE:
3585 SwapOperands =
true;
3595 static const unsigned OpcTable[4][4][9] = {
3603 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3604 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3605 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3606 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3607 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3608 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3614 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3615 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3616 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3617 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3618 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3619 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3625 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3626 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3627 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3628 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3629 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3630 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3639 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3640 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3641 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3653 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3654 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3655 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3663 getRegClassForTypeOnBank(SrcTy, VecRB, RBI,
true);
3665 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3669 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3671 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3688 I.eraseFromParent();
3692 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3695 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3697 auto BuildFn = [&](
unsigned SubregIndex) {
3701 .addImm(SubregIndex);
3709 return BuildFn(AArch64::hsub);
3711 return BuildFn(AArch64::ssub);
3713 return BuildFn(AArch64::dsub);
3721 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3727 if (
I.getNumOperands() != 3)
3731 if (DstTy == LLT::scalar(128)) {
3735 Register DstReg =
I.getOperand(0).getReg();
3736 Register Src1Reg =
I.getOperand(1).getReg();
3737 Register Src2Reg =
I.getOperand(2).getReg();
3738 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3740 emitLaneInsert(
None, Tmp.getReg(0), Src1Reg, 0, RB, MIB);
3744 Src2Reg, 1, RB, MIB);
3749 I.eraseFromParent();
3753 if (RB.
getID() != AArch64::GPRRegBankID)
3759 auto *DstRC = &AArch64::GPR64RegClass;
3762 TII.get(TargetOpcode::SUBREG_TO_REG))
3765 .
addUse(
I.getOperand(1).getReg())
3766 .
addImm(AArch64::sub_32);
3770 TII.get(TargetOpcode::SUBREG_TO_REG))
3773 .
addUse(
I.getOperand(2).getReg())
3774 .
addImm(AArch64::sub_32);
3776 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3777 .
addDef(
I.getOperand(0).getReg())
3785 I.eraseFromParent();
3790 const unsigned EltSize) {
3795 CopyOpc = AArch64::CPYi16;
3796 ExtractSubReg = AArch64::hsub;
3799 CopyOpc = AArch64::CPYi32;
3800 ExtractSubReg = AArch64::ssub;
3803 CopyOpc = AArch64::CPYi64;
3804 ExtractSubReg = AArch64::dsub;
3808 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3814 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3818 unsigned CopyOpc = 0;
3819 unsigned ExtractSubReg = 0;
3822 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3827 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI,
true);
3829 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3836 getRegClassForTypeOnBank(VecTy, VecRB, RBI,
true);
3838 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3848 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3849 .addReg(VecReg, 0, ExtractSubReg);
3859 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3860 if (!ScalarToVector)
3866 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3874 bool AArch64InstructionSelector::selectExtractElt(
3876 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3877 "unexpected opcode!");
3878 Register DstReg =
I.getOperand(0).getReg();
3880 const Register SrcReg =
I.getOperand(1).getReg();
3884 "source register size too small!");
3885 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3889 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3900 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3905 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3906 LaneIdx, MIRBuilder);
3910 I.eraseFromParent();
3914 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3916 unsigned NumElts =
I.getNumOperands() - 1;
3917 Register SrcReg =
I.getOperand(NumElts).getReg();
3921 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3923 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3933 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3934 Register Dst =
I.getOperand(OpIdx).getReg();
3936 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3940 I.eraseFromParent();
3946 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3947 "unexpected opcode");
3951 AArch64::FPRRegBankID ||
3953 AArch64::FPRRegBankID) {
3954 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
3955 "currently unsupported.\n");
3961 unsigned NumElts =
I.getNumOperands() - 1;
3962 Register SrcReg =
I.getOperand(NumElts).getReg();
3967 "can only unmerge from vector or s128 types!");
3969 "source register size too small!");
3972 return selectSplitVectorUnmerge(
I,
MRI);
3978 unsigned CopyOpc = 0;
3979 unsigned ExtractSubReg = 0;
3990 unsigned NumInsertRegs = NumElts - 1;
4000 for (
unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4003 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4010 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4019 InsertRegs.push_back(InsertReg);
4027 Register CopyTo =
I.getOperand(0).getReg();
4028 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4029 .addReg(InsertRegs[0], 0, ExtractSubReg);
4033 unsigned LaneIdx = 1;
4034 for (
Register InsReg : InsertRegs) {
4035 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4055 I.eraseFromParent();
4059 bool AArch64InstructionSelector::selectConcatVectors(
4061 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4062 "Unexpected opcode");
4063 Register Dst =
I.getOperand(0).getReg();
4064 Register Op1 =
I.getOperand(1).getReg();
4065 Register Op2 =
I.getOperand(2).getReg();
4067 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
4070 I.eraseFromParent();
4075 AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4084 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4086 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.
getMF());
4097 .
buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4098 .addConstantPoolIndex(CPIdx, 0,
4104 .
buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4105 .addConstantPoolIndex(CPIdx, 0,
4111 .
buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4112 .addConstantPoolIndex(CPIdx, 0,
4116 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4127 static std::pair<unsigned, unsigned>
4129 unsigned Opc, SubregIdx;
4130 if (RB.
getID() == AArch64::GPRRegBankID) {
4131 if (EltSize == 16) {
4132 Opc = AArch64::INSvi16gpr;
4133 SubregIdx = AArch64::ssub;
4134 }
else if (EltSize == 32) {
4135 Opc = AArch64::INSvi32gpr;
4136 SubregIdx = AArch64::ssub;
4137 }
else if (EltSize == 64) {
4138 Opc = AArch64::INSvi64gpr;
4139 SubregIdx = AArch64::dsub;
4145 Opc = AArch64::INSvi8lane;
4146 SubregIdx = AArch64::bsub;
4147 }
else if (EltSize == 16) {
4148 Opc = AArch64::INSvi16lane;
4149 SubregIdx = AArch64::hsub;
4150 }
else if (EltSize == 32) {
4151 Opc = AArch64::INSvi32lane;
4152 SubregIdx = AArch64::ssub;
4153 }
else if (EltSize == 64) {
4154 Opc = AArch64::INSvi64lane;
4155 SubregIdx = AArch64::dsub;
4160 return std::make_pair(Opc, SubregIdx);
4164 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4166 const ComplexRendererFns &RenderFns)
const {
4167 assert(Opcode &&
"Expected an opcode?");
4169 "Function should only be used to produce selected instructions!");
4170 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4172 for (
auto &Fn : *RenderFns)
4179 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4187 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4188 bool Is32Bit =
Size == 32;
4191 if (
auto Fns = selectArithImmed(RHS))
4192 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4196 if (
auto Fns = selectNegArithImmed(RHS))
4197 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4201 if (
auto Fns = selectArithExtendedRegister(RHS))
4202 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4206 if (
auto Fns = selectShiftedRegister(RHS))
4207 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4209 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4217 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4218 {{AArch64::ADDXri, AArch64::ADDWri},
4219 {AArch64::ADDXrs, AArch64::ADDWrs},
4220 {AArch64::ADDXrr, AArch64::ADDWrr},
4221 {AArch64::SUBXri, AArch64::SUBWri},
4222 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4223 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4230 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4231 {{AArch64::ADDSXri, AArch64::ADDSWri},
4232 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4233 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4234 {AArch64::SUBSXri, AArch64::SUBSWri},
4235 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4236 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4243 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4244 {{AArch64::SUBSXri, AArch64::SUBSWri},
4245 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4246 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4247 {AArch64::ADDSXri, AArch64::ADDSWri},
4248 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4249 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4257 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4268 bool Is32Bit = (RegSize == 32);
4269 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4270 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4271 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4275 int64_t Imm = ValAndVReg->Value.getSExtValue();
4278 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4285 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4286 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4287 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4290 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4293 assert(LHS.
isReg() && RHS.
isReg() &&
"Expected LHS and RHS to be registers!");
4300 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4302 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS,
Predicate, MIRBuilder))
4305 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4308 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4314 "Expected a 32-bit scalar register?");
4316 const Register ZeroReg = AArch64::WZR;
4319 MIRBuilder.
buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
4328 return EmitCSet(Dst, CC1);
4333 EmitCSet(Def1Reg, CC1);
4334 EmitCSet(Def2Reg, CC2);
4335 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4349 if (OpSize != 32 && OpSize != 64)
4358 return P == CmpInst::FCMP_OEQ ||
P == CmpInst::FCMP_ONE ||
4359 P == CmpInst::FCMP_UEQ ||
P == CmpInst::FCMP_UNE;
4361 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4365 ShouldUseImm =
true;
4369 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4370 {AArch64::FCMPSri, AArch64::FCMPDri}};
4371 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4382 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4394 if (Op1Ty != Op2Ty) {
4395 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4398 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4401 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4417 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4419 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4420 if (!WidenedOp1 || !WidenedOp2) {
4421 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4426 unsigned InsertOpc, InsSubRegIdx;
4427 std::tie(InsertOpc, InsSubRegIdx) =
4443 AArch64InstructionSelector::emitCSetForICMP(
Register DefReg,
unsigned Pred,
4449 auto I = MIRBuilder.
buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
4455 std::pair<MachineInstr *, AArch64CC::CondCode>
4456 AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4463 case TargetOpcode::G_SADDO:
4464 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4465 case TargetOpcode::G_UADDO:
4466 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4467 case TargetOpcode::G_SSUBO:
4468 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4469 case TargetOpcode::G_USUBO:
4470 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4474 bool AArch64InstructionSelector::tryOptSelect(
MachineInstr &
I)
const {
4504 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4512 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4516 if (Opc == TargetOpcode::COPY &&
4527 unsigned CondOpc = CondDef->
getOpcode();
4528 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4532 if (CondOpc == TargetOpcode::G_ICMP) {
4560 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
4561 I.getOperand(3).getReg(),
CondCode, MIB);
4562 I.eraseFromParent();
4566 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4570 "Unexpected MachineOperand");
4594 return emitCMN(LHSDef->
getOperand(2), RHS, MIRBuilder);
4607 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
4617 if (!CmpInst::isUnsigned(
P) && LHSDef &&
4618 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
4621 if (!ValAndVReg || ValAndVReg->Value != 0)
4631 bool AArch64InstructionSelector::selectShuffleVector(
4634 Register Src1Reg =
I.getOperand(1).getReg();
4636 Register Src2Reg =
I.getOperand(2).getReg();
4648 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4655 for (
int Val :
Mask) {
4658 Val = Val < 0 ? 0 : Val;
4659 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
4669 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4686 emitScalarToVector(64, &AArch64::FPR128RegClass,
4690 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4697 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4699 I.eraseFromParent();
4705 auto RegSeq = MIRBuilder
4707 {&AArch64::QQRegClass}, {Src1Reg})
4708 .addImm(AArch64::qsub0)
4712 auto TBL2 = MIRBuilder.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
4716 I.eraseFromParent();
4720 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4735 if (RB.
getID() == AArch64::FPRRegBankID) {
4736 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4737 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
4739 .
addUse(InsSub->getOperand(0).getReg())
4742 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
4751 bool AArch64InstructionSelector::selectInsertElt(
4753 assert(
I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4756 Register DstReg =
I.getOperand(0).getReg();
4761 Register EltReg =
I.getOperand(2).getReg();
4764 if (EltSize < 16 || EltSize > 64)
4769 Register IdxReg =
I.getOperand(3).getReg();
4773 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4776 Register SrcReg =
I.getOperand(1).getReg();
4780 if (VecSize < 128) {
4784 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4794 emitLaneInsert(
None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4796 if (VecSize < 128) {
4802 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4809 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
4810 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << VecSize
4814 MIRBuilder.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
4815 .addReg(DemoteVec, 0,
SubReg);
4823 I.eraseFromParent();
4834 if (DstSize == 128) {
4836 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
4841 if (DstSize == 64) {
4844 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
4847 .addReg(Mov.getReg(0), 0, AArch64::dsub);
4853 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
4855 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
4859 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
4865 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4867 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4869 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
4875 for (
unsigned Idx = 1; Idx <
I.getNumOperands(); ++Idx) {
4881 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
4882 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
4883 I.getOperand(Idx).getReg(),
MRI)))
4885 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
4891 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
4893 I.eraseFromParent();
4897 bool AArch64InstructionSelector::selectBuildVector(
4899 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4906 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
4908 if (EltSize < 16 || EltSize > 64)
4916 I.getOperand(1).getReg(), MIRBuilder);
4926 for (
unsigned i = 2,
e = DstSize / EltSize + 1;
i <
e; ++
i) {
4929 PrevMI = &*emitLaneInsert(
None, DstVec,
I.getOperand(
i).getReg(),
i - 1, RB,
4936 if (DstSize < 128) {
4942 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4950 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
4951 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
4957 Register DstReg =
I.getOperand(0).getReg();
4959 MIRBuilder.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
4960 .addReg(DstVec, 0,
SubReg);
4967 assert(PrevMI &&
"PrevMI was null?");
4972 I.eraseFromParent();
4980 return Op.isIntrinsicID();
4982 if (IntrinOp ==
I.operands_end())
4984 return IntrinOp->getIntrinsicID();
4987 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4999 case Intrinsic::trap:
5000 MIRBuilder.
buildInstr(AArch64::BRK, {}, {}).addImm(1);
5002 case Intrinsic::debugtrap:
5003 MIRBuilder.
buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5005 case Intrinsic::ubsantrap:
5007 .addImm(
I.getOperand(1).getImm() | (
'U' << 8));
5011 I.eraseFromParent();
5015 bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
5025 case Intrinsic::aarch64_crypto_sha1h: {
5026 Register DstReg =
I.getOperand(0).getReg();