44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
214 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
216 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
218 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
220 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
237 unsigned emitConstantPoolEntry(
const Constant *CPVal,
256 std::optional<CmpInst::Predicate> = std::nullopt)
const;
259 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
260 std::initializer_list<llvm::SrcOp> SrcOps,
262 const ComplexRendererFns &RenderFns = std::nullopt)
const;
297 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
318 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
340 std::pair<MachineInstr *, AArch64CC::CondCode>
375 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
376 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
377 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
382 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
385 unsigned Size)
const;
387 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
388 return selectAddrModeUnscaled(Root, 1);
390 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
391 return selectAddrModeUnscaled(Root, 2);
393 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
394 return selectAddrModeUnscaled(Root, 4);
396 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
397 return selectAddrModeUnscaled(Root, 8);
399 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
400 return selectAddrModeUnscaled(Root, 16);
405 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
409 unsigned Size)
const;
411 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
412 return selectAddrModeIndexed(Root, Width / 8);
419 unsigned SizeInBytes)
const;
427 bool WantsExt)
const;
428 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
430 unsigned SizeInBytes)
const;
432 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
433 return selectAddrModeXRO(Root, Width / 8);
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
440 return selectAddrModeWRO(Root, Width / 8);
444 bool AllowROR =
false)
const;
446 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
447 return selectShiftedRegister(Root);
450 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
451 return selectShiftedRegister(Root,
true);
461 bool IsLoadStore =
false)
const;
472 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
477 int OpIdx = -1)
const;
479 int OpIdx = -1)
const;
481 int OpIdx = -1)
const;
483 int OpIdx = -1)
const;
485 int OpIdx = -1)
const;
487 int OpIdx = -1)
const;
490 int OpIdx = -1)
const;
496 bool tryOptSelect(
GSelect &Sel);
503 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
516 bool ProduceNonFlagSettingCondBr =
false;
525#define GET_GLOBALISEL_PREDICATES_DECL
526#include "AArch64GenGlobalISel.inc"
527#undef GET_GLOBALISEL_PREDICATES_DECL
531#define GET_GLOBALISEL_TEMPORARIES_DECL
532#include "AArch64GenGlobalISel.inc"
533#undef GET_GLOBALISEL_TEMPORARIES_DECL
538#define GET_GLOBALISEL_IMPL
539#include "AArch64GenGlobalISel.inc"
540#undef GET_GLOBALISEL_IMPL
542AArch64InstructionSelector::AArch64InstructionSelector(
545 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
548#include
"AArch64GenGlobalISel.inc"
551#include
"AArch64GenGlobalISel.inc"
563 bool GetAllRegSet =
false) {
564 if (RB.
getID() == AArch64::GPRRegBankID) {
566 return GetAllRegSet ? &AArch64::GPR32allRegClass
567 : &AArch64::GPR32RegClass;
569 return GetAllRegSet ? &AArch64::GPR64allRegClass
570 : &AArch64::GPR64RegClass;
572 return &AArch64::XSeqPairsClassRegClass;
576 if (RB.
getID() == AArch64::FPRRegBankID) {
579 return &AArch64::FPR8RegClass;
581 return &AArch64::FPR16RegClass;
583 return &AArch64::FPR32RegClass;
585 return &AArch64::FPR64RegClass;
587 return &AArch64::FPR128RegClass;
599 bool GetAllRegSet =
false) {
600 unsigned RegBankID = RB.
getID();
602 if (RegBankID == AArch64::GPRRegBankID) {
603 if (SizeInBits <= 32)
604 return GetAllRegSet ? &AArch64::GPR32allRegClass
605 : &AArch64::GPR32RegClass;
606 if (SizeInBits == 64)
607 return GetAllRegSet ? &AArch64::GPR64allRegClass
608 : &AArch64::GPR64RegClass;
609 if (SizeInBits == 128)
610 return &AArch64::XSeqPairsClassRegClass;
613 if (RegBankID == AArch64::FPRRegBankID) {
614 switch (SizeInBits) {
618 return &AArch64::FPR8RegClass;
620 return &AArch64::FPR16RegClass;
622 return &AArch64::FPR32RegClass;
624 return &AArch64::FPR64RegClass;
626 return &AArch64::FPR128RegClass;
636 switch (
TRI.getRegSizeInBits(*RC)) {
644 if (RC != &AArch64::FPR32RegClass)
654 dbgs() <<
"Couldn't find appropriate subregister for register class.");
663 switch (RB.
getID()) {
664 case AArch64::GPRRegBankID:
666 case AArch64::FPRRegBankID:
689 const unsigned RegClassIDs[],
691 unsigned NumRegs = Regs.
size();
694 assert(NumRegs >= 2 && NumRegs <= 4 &&
695 "Only support between two and 4 registers in a tuple!");
697 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
699 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
700 for (
unsigned I = 0,
E = Regs.
size();
I <
E; ++
I) {
701 RegSequence.addUse(Regs[
I]);
702 RegSequence.addImm(SubRegs[
I]);
704 return RegSequence.getReg(0);
709 static const unsigned RegClassIDs[] = {
710 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
711 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
712 AArch64::dsub2, AArch64::dsub3};
713 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
718 static const unsigned RegClassIDs[] = {
719 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
720 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
721 AArch64::qsub2, AArch64::qsub3};
722 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
727 auto &
MBB = *
MI.getParent();
729 auto &
MRI = MF.getRegInfo();
735 else if (Root.
isReg()) {
740 Immed = ValAndVReg->Value.getSExtValue();
756 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
763 for (
auto &MO :
I.operands()) {
766 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
774 if (!MO.getReg().isVirtual()) {
775 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
785 if (PrevOpBank && OpBank != PrevOpBank) {
786 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
801 case AArch64::GPRRegBankID:
803 switch (GenericOpc) {
804 case TargetOpcode::G_SHL:
805 return AArch64::LSLVWr;
806 case TargetOpcode::G_LSHR:
807 return AArch64::LSRVWr;
808 case TargetOpcode::G_ASHR:
809 return AArch64::ASRVWr;
813 }
else if (OpSize == 64) {
814 switch (GenericOpc) {
815 case TargetOpcode::G_PTR_ADD:
816 return AArch64::ADDXrr;
817 case TargetOpcode::G_SHL:
818 return AArch64::LSLVXr;
819 case TargetOpcode::G_LSHR:
820 return AArch64::LSRVXr;
821 case TargetOpcode::G_ASHR:
822 return AArch64::ASRVXr;
828 case AArch64::FPRRegBankID:
831 switch (GenericOpc) {
832 case TargetOpcode::G_FADD:
833 return AArch64::FADDSrr;
834 case TargetOpcode::G_FSUB:
835 return AArch64::FSUBSrr;
836 case TargetOpcode::G_FMUL:
837 return AArch64::FMULSrr;
838 case TargetOpcode::G_FDIV:
839 return AArch64::FDIVSrr;
844 switch (GenericOpc) {
845 case TargetOpcode::G_FADD:
846 return AArch64::FADDDrr;
847 case TargetOpcode::G_FSUB:
848 return AArch64::FSUBDrr;
849 case TargetOpcode::G_FMUL:
850 return AArch64::FMULDrr;
851 case TargetOpcode::G_FDIV:
852 return AArch64::FDIVDrr;
853 case TargetOpcode::G_OR:
854 return AArch64::ORRv8i8;
871 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
873 case AArch64::GPRRegBankID:
876 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
878 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
880 return isStore ? AArch64::STRWui : AArch64::LDRWui;
882 return isStore ? AArch64::STRXui : AArch64::LDRXui;
885 case AArch64::FPRRegBankID:
888 return isStore ? AArch64::STRBui : AArch64::LDRBui;
890 return isStore ? AArch64::STRHui : AArch64::LDRHui;
892 return isStore ? AArch64::STRSui : AArch64::LDRSui;
894 return isStore ? AArch64::STRDui : AArch64::LDRDui;
896 return isStore ? AArch64::STRQui : AArch64::LDRQui;
910 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
911 assert(To &&
"Destination register class cannot be null");
918 RegOp.
setReg(SubRegCopy.getReg(0));
922 if (!
I.getOperand(0).getReg().isPhysical())
932static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
936 Register DstReg =
I.getOperand(0).getReg();
937 Register SrcReg =
I.getOperand(1).getReg();
951 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
952 SrcSize = DstSize = 32;
969 if (Reg.isPhysical())
971 LLT Ty =
MRI.getType(Reg);
977 RC = getRegClassForTypeOnBank(Ty, RB);
980 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
993 Register DstReg =
I.getOperand(0).getReg();
994 Register SrcReg =
I.getOperand(1).getReg();
1013 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1017 unsigned SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1018 unsigned DstSize =
TRI.getRegSizeInBits(*DstRC);
1029 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1031 }
else if (SrcSize > DstSize) {
1038 }
else if (DstSize > SrcSize) {
1045 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1047 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1052 RegOp.
setReg(PromoteReg);
1071 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1072 I.setDesc(
TII.get(AArch64::COPY));
1073 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1077 I.setDesc(
TII.get(AArch64::COPY));
1092 switch (GenericOpc) {
1093 case TargetOpcode::G_SITOFP:
1094 return AArch64::SCVTFUWSri;
1095 case TargetOpcode::G_UITOFP:
1096 return AArch64::UCVTFUWSri;
1097 case TargetOpcode::G_FPTOSI:
1098 return AArch64::FCVTZSUWSr;
1099 case TargetOpcode::G_FPTOUI:
1100 return AArch64::FCVTZUUWSr;
1105 switch (GenericOpc) {
1106 case TargetOpcode::G_SITOFP:
1107 return AArch64::SCVTFUXSri;
1108 case TargetOpcode::G_UITOFP:
1109 return AArch64::UCVTFUXSri;
1110 case TargetOpcode::G_FPTOSI:
1111 return AArch64::FCVTZSUWDr;
1112 case TargetOpcode::G_FPTOUI:
1113 return AArch64::FCVTZUUWDr;
1123 switch (GenericOpc) {
1124 case TargetOpcode::G_SITOFP:
1125 return AArch64::SCVTFUWDri;
1126 case TargetOpcode::G_UITOFP:
1127 return AArch64::UCVTFUWDri;
1128 case TargetOpcode::G_FPTOSI:
1129 return AArch64::FCVTZSUXSr;
1130 case TargetOpcode::G_FPTOUI:
1131 return AArch64::FCVTZUUXSr;
1136 switch (GenericOpc) {
1137 case TargetOpcode::G_SITOFP:
1138 return AArch64::SCVTFUXDri;
1139 case TargetOpcode::G_UITOFP:
1140 return AArch64::UCVTFUXDri;
1141 case TargetOpcode::G_FPTOSI:
1142 return AArch64::FCVTZSUXDr;
1143 case TargetOpcode::G_FPTOUI:
1144 return AArch64::FCVTZUUXDr;
1163 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1164 "Expected both select operands to have the same regbank?");
1165 LLT Ty =
MRI.getType(True);
1170 "Expected 32 bit or 64 bit select only?");
1171 const bool Is32Bit =
Size == 32;
1172 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1173 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1174 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1180 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1182 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1197 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1214 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1233 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1249 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1255 if (!TrueCst && !FalseCst)
1258 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1259 if (TrueCst && FalseCst) {
1260 int64_t
T = TrueCst->Value.getSExtValue();
1261 int64_t
F = FalseCst->Value.getSExtValue();
1263 if (
T == 0 &&
F == 1) {
1265 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1271 if (
T == 0 &&
F == -1) {
1273 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1281 int64_t
T = TrueCst->Value.getSExtValue();
1284 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1293 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1302 int64_t
F = FalseCst->Value.getSExtValue();
1305 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1312 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1320 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1321 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1442 assert(Reg.isValid() &&
"Expected valid register!");
1443 bool HasZext =
false;
1445 unsigned Opc =
MI->getOpcode();
1447 if (!
MI->getOperand(0).isReg() ||
1448 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1455 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1456 Opc == TargetOpcode::G_TRUNC) {
1457 if (Opc == TargetOpcode::G_ZEXT)
1460 Register NextReg =
MI->getOperand(1).getReg();
1462 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1471 std::optional<uint64_t>
C;
1476 case TargetOpcode::G_AND:
1477 case TargetOpcode::G_XOR: {
1478 TestReg =
MI->getOperand(1).getReg();
1479 Register ConstantReg =
MI->getOperand(2).getReg();
1490 C = VRegAndVal->Value.getZExtValue();
1492 C = VRegAndVal->Value.getSExtValue();
1496 case TargetOpcode::G_ASHR:
1497 case TargetOpcode::G_LSHR:
1498 case TargetOpcode::G_SHL: {
1499 TestReg =
MI->getOperand(1).getReg();
1503 C = VRegAndVal->Value.getSExtValue();
1515 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1519 case TargetOpcode::G_AND:
1521 if ((*
C >> Bit) & 1)
1524 case TargetOpcode::G_SHL:
1527 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1532 case TargetOpcode::G_ASHR:
1537 if (Bit >= TestRegSize)
1538 Bit = TestRegSize - 1;
1540 case TargetOpcode::G_LSHR:
1542 if ((Bit + *
C) < TestRegSize) {
1547 case TargetOpcode::G_XOR:
1556 if ((*
C >> Bit) & 1)
1575 assert(ProduceNonFlagSettingCondBr &&
1576 "Cannot emit TB(N)Z with speculation tracking!");
1581 LLT Ty =
MRI.getType(TestReg);
1584 assert(Bit < 64 &&
"Bit is too large!");
1588 bool UseWReg =
Bit < 32;
1589 unsigned NecessarySize = UseWReg ? 32 : 64;
1590 if (
Size != NecessarySize)
1591 TestReg = moveScalarRegClass(
1592 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1595 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1596 {AArch64::TBZW, AArch64::TBNZW}};
1597 unsigned Opc = OpcTable[UseWReg][IsNegative];
1604bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1607 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1634 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1641 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1649 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1651 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1652 AArch64::GPRRegBankID &&
1653 "Expected GPRs only?");
1654 auto Ty =
MRI.getType(CompareReg);
1657 assert(Width <= 64 &&
"Expected width to be at most 64?");
1658 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1659 {AArch64::CBNZW, AArch64::CBNZX}};
1660 unsigned Opc = OpcTable[IsNegative][Width == 64];
1661 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1666bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1669 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1681 I.eraseFromParent();
1685bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1688 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1694 if (!ProduceNonFlagSettingCondBr)
1713 if (VRegAndVal && !AndInst) {
1714 int64_t
C = VRegAndVal->Value.getSExtValue();
1720 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1721 I.eraseFromParent();
1729 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1730 I.eraseFromParent();
1738 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1739 I.eraseFromParent();
1753 if (VRegAndVal && VRegAndVal->Value == 0) {
1761 tryOptAndIntoCompareBranch(
1763 I.eraseFromParent();
1768 auto LHSTy =
MRI.getType(LHS);
1769 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1771 I.eraseFromParent();
1780bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1783 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1784 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1794 I.eraseFromParent();
1798bool AArch64InstructionSelector::selectCompareBranch(
1800 Register CondReg =
I.getOperand(0).getReg();
1805 if (CCMIOpc == TargetOpcode::G_FCMP)
1806 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1807 if (CCMIOpc == TargetOpcode::G_ICMP)
1808 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1813 if (ProduceNonFlagSettingCondBr) {
1814 emitTestBit(CondReg, 0,
true,
1815 I.getOperand(1).getMBB(), MIB);
1816 I.eraseFromParent();
1826 .
addMBB(
I.getOperand(1).getMBB());
1827 I.eraseFromParent();
1835 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1846 return std::nullopt;
1848 int64_t Imm = *ShiftImm;
1850 return std::nullopt;
1854 return std::nullopt;
1857 return std::nullopt;
1861 return std::nullopt;
1865 return std::nullopt;
1869 return std::nullopt;
1875bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1877 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1878 Register DstReg =
I.getOperand(0).getReg();
1879 const LLT Ty =
MRI.getType(DstReg);
1880 Register Src1Reg =
I.getOperand(1).getReg();
1881 Register Src2Reg =
I.getOperand(2).getReg();
1892 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1894 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1896 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1898 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1900 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1902 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1904 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1910 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1916 I.eraseFromParent();
1920bool AArch64InstructionSelector::selectVectorAshrLshr(
1922 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1923 I.getOpcode() == TargetOpcode::G_LSHR);
1924 Register DstReg =
I.getOperand(0).getReg();
1925 const LLT Ty =
MRI.getType(DstReg);
1926 Register Src1Reg =
I.getOperand(1).getReg();
1927 Register Src2Reg =
I.getOperand(2).getReg();
1932 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1942 unsigned NegOpc = 0;
1944 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1946 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1947 NegOpc = AArch64::NEGv2i64;
1949 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1950 NegOpc = AArch64::NEGv4i32;
1952 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1953 NegOpc = AArch64::NEGv2i32;
1955 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1956 NegOpc = AArch64::NEGv4i16;
1958 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1959 NegOpc = AArch64::NEGv8i16;
1961 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1962 NegOpc = AArch64::NEGv16i8;
1964 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1965 NegOpc = AArch64::NEGv8i8;
1971 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1973 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1975 I.eraseFromParent();
1979bool AArch64InstructionSelector::selectVaStartAAPCS(
1984bool AArch64InstructionSelector::selectVaStartDarwin(
1987 Register ListReg =
I.getOperand(0).getReg();
1989 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2000 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2008 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2015 I.eraseFromParent();
2019void AArch64InstructionSelector::materializeLargeCMVal(
2025 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2036 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2038 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2040 GV, MovZ->getOperand(1).getOffset(), Flags));
2044 MovZ->getOperand(1).getOffset(), Flags));
2050 Register DstReg = BuildMovK(MovZ.getReg(0),
2056bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2061 switch (
I.getOpcode()) {
2062 case TargetOpcode::G_STORE: {
2063 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2071 SrcOp.setReg(NewSrc);
2072 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2077 case TargetOpcode::G_PTR_ADD:
2078 return convertPtrAddToAdd(
I,
MRI);
2079 case TargetOpcode::G_LOAD: {
2084 Register DstReg =
I.getOperand(0).getReg();
2085 const LLT DstTy =
MRI.getType(DstReg);
2091 case AArch64::G_DUP: {
2093 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2097 MRI.setType(
I.getOperand(0).getReg(),
2099 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2100 I.getOperand(1).setReg(NewSrc.getReg(0));
2103 case TargetOpcode::G_UITOFP:
2104 case TargetOpcode::G_SITOFP: {
2109 Register SrcReg =
I.getOperand(1).getReg();
2110 LLT SrcTy =
MRI.getType(SrcReg);
2111 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2115 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2116 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2117 I.setDesc(
TII.get(AArch64::G_SITOF));
2119 I.setDesc(
TII.get(AArch64::G_UITOF));
2137bool AArch64InstructionSelector::convertPtrAddToAdd(
2139 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2140 Register DstReg =
I.getOperand(0).getReg();
2141 Register AddOp1Reg =
I.getOperand(1).getReg();
2142 const LLT PtrTy =
MRI.getType(DstReg);
2146 const LLT CastPtrTy =
2151 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2153 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2157 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2158 MRI.setType(DstReg, CastPtrTy);
2159 I.getOperand(1).setReg(PtrToInt.getReg(0));
2160 if (!select(*PtrToInt)) {
2161 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2170 I.getOperand(2).setReg(NegatedReg);
2171 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2175bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2180 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2181 const auto &MO =
I.getOperand(2);
2186 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2190 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2191 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2193 if (!Imm1Fn || !Imm2Fn)
2197 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2200 for (
auto &RenderFn : *Imm1Fn)
2202 for (
auto &RenderFn : *Imm2Fn)
2205 I.eraseFromParent();
2209bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2211 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2229 LLT DefDstTy =
MRI.getType(DefDstReg);
2230 Register StoreSrcReg =
I.getOperand(0).getReg();
2231 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2242 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2243 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2247 I.getOperand(0).setReg(DefDstReg);
2251bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2252 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2253 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2259 switch (
I.getOpcode()) {
2260 case AArch64::G_DUP: {
2263 Register Src =
I.getOperand(1).getReg();
2268 Register Dst =
I.getOperand(0).getReg();
2270 MRI.getType(Dst).getNumElements(),
2272 ValAndVReg->Value));
2273 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2275 I.eraseFromParent();
2278 case TargetOpcode::G_SEXT:
2281 if (selectUSMovFromExtend(
I,
MRI))
2284 case TargetOpcode::G_BR:
2286 case TargetOpcode::G_SHL:
2287 return earlySelectSHL(
I,
MRI);
2288 case TargetOpcode::G_CONSTANT: {
2289 bool IsZero =
false;
2290 if (
I.getOperand(1).isCImm())
2291 IsZero =
I.getOperand(1).getCImm()->isZero();
2292 else if (
I.getOperand(1).isImm())
2293 IsZero =
I.getOperand(1).getImm() == 0;
2298 Register DefReg =
I.getOperand(0).getReg();
2299 LLT Ty =
MRI.getType(DefReg);
2301 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2302 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2304 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2305 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2309 I.setDesc(
TII.get(TargetOpcode::COPY));
2313 case TargetOpcode::G_ADD: {
2322 Register AddDst =
I.getOperand(0).getReg();
2323 Register AddLHS =
I.getOperand(1).getReg();
2324 Register AddRHS =
I.getOperand(2).getReg();
2326 LLT Ty =
MRI.getType(AddLHS);
2335 if (!
MRI.hasOneNonDBGUse(Reg))
2349 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2359 Cmp = MatchCmp(AddRHS);
2363 auto &PredOp =
Cmp->getOperand(1);
2368 emitIntegerCompare(
Cmp->getOperand(2),
2369 Cmp->getOperand(3), PredOp, MIB);
2370 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2371 I.eraseFromParent();
2374 case TargetOpcode::G_OR: {
2378 Register Dst =
I.getOperand(0).getReg();
2379 LLT Ty =
MRI.getType(Dst);
2398 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2401 int64_t Immr =
Size - ShiftImm;
2402 int64_t Imms =
Size - ShiftImm - 1;
2403 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2404 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2405 I.eraseFromParent();
2408 case TargetOpcode::G_FENCE: {
2409 if (
I.getOperand(1).getImm() == 0)
2413 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2414 I.eraseFromParent();
2423 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2424 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2431 if (Subtarget->requiresStrictAlign()) {
2433 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2439 unsigned Opcode =
I.getOpcode();
2441 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2444 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2447 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2448 const Register DefReg =
I.getOperand(0).getReg();
2449 const LLT DefTy =
MRI.getType(DefReg);
2452 MRI.getRegClassOrRegBank(DefReg);
2462 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2469 I.setDesc(
TII.get(TargetOpcode::PHI));
2471 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2477 if (
I.isDebugInstr())
2484 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2486 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2493 if (preISelLower(
I)) {
2494 Opcode =
I.getOpcode();
2505 if (selectImpl(
I, *CoverageInfo))
2509 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2512 case TargetOpcode::G_SBFX:
2513 case TargetOpcode::G_UBFX: {
2514 static const unsigned OpcTable[2][2] = {
2515 {AArch64::UBFMWri, AArch64::UBFMXri},
2516 {AArch64::SBFMWri, AArch64::SBFMXri}};
2517 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2519 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2522 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2525 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2526 auto LSB = Cst1->Value.getZExtValue();
2527 auto Width = Cst2->Value.getZExtValue();
2529 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2531 .
addImm(LSB + Width - 1);
2532 I.eraseFromParent();
2535 case TargetOpcode::G_BRCOND:
2536 return selectCompareBranch(
I, MF,
MRI);
2538 case TargetOpcode::G_BRINDIRECT: {
2539 I.setDesc(
TII.get(AArch64::BR));
2543 case TargetOpcode::G_BRJT:
2544 return selectBrJT(
I,
MRI);
2546 case AArch64::G_ADD_LOW: {
2552 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2553 I.setDesc(
TII.get(AArch64::ADDXri));
2558 "Expected small code model");
2560 auto Op2 =
I.getOperand(2);
2561 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2562 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2563 Op1.getTargetFlags())
2565 Op2.getTargetFlags());
2566 I.eraseFromParent();
2570 case TargetOpcode::G_FCONSTANT:
2571 case TargetOpcode::G_CONSTANT: {
2572 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2581 const Register DefReg =
I.getOperand(0).getReg();
2582 const LLT DefTy =
MRI.getType(DefReg);
2588 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2590 <<
" constant, expected: " << s16 <<
" or " << s32
2591 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2595 if (RB.
getID() != AArch64::FPRRegBankID) {
2597 <<
" constant on bank: " << RB
2598 <<
", expected: FPR\n");
2606 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2610 if (Ty != p0 && Ty != s8 && Ty != s16) {
2612 <<
" constant, expected: " << s32 <<
", " << s64
2613 <<
", or " << p0 <<
'\n');
2617 if (RB.
getID() != AArch64::GPRRegBankID) {
2619 <<
" constant on bank: " << RB
2620 <<
", expected: GPR\n");
2637 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2644 auto *FPImm =
I.getOperand(1).getFPImm();
2647 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2651 I.eraseFromParent();
2652 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2656 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2658 const Register DefGPRReg =
MRI.createVirtualRegister(
2659 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2665 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2666 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2674 }
else if (
I.getOperand(1).isCImm()) {
2675 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2676 I.getOperand(1).ChangeToImmediate(Val);
2677 }
else if (
I.getOperand(1).isImm()) {
2678 uint64_t Val =
I.getOperand(1).getImm();
2679 I.getOperand(1).ChangeToImmediate(Val);
2682 const unsigned MovOpc =
2683 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2684 I.setDesc(
TII.get(MovOpc));
2688 case TargetOpcode::G_EXTRACT: {
2689 Register DstReg =
I.getOperand(0).getReg();
2690 Register SrcReg =
I.getOperand(1).getReg();
2691 LLT SrcTy =
MRI.getType(SrcReg);
2692 LLT DstTy =
MRI.getType(DstReg);
2704 unsigned Offset =
I.getOperand(2).getImm();
2713 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2715 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2717 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2719 AArch64::GPR64RegClass, NewI->getOperand(0));
2720 I.eraseFromParent();
2726 unsigned LaneIdx =
Offset / 64;
2728 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2731 I.eraseFromParent();
2735 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2741 "unexpected G_EXTRACT types");
2748 .addReg(DstReg, 0, AArch64::sub_32);
2749 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2750 AArch64::GPR32RegClass,
MRI);
2751 I.getOperand(0).setReg(DstReg);
2756 case TargetOpcode::G_INSERT: {
2757 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2758 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2765 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2766 unsigned LSB =
I.getOperand(3).getImm();
2767 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2768 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2773 "unexpected G_INSERT types");
2779 TII.get(AArch64::SUBREG_TO_REG))
2782 .
addUse(
I.getOperand(2).getReg())
2783 .
addImm(AArch64::sub_32);
2784 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2785 AArch64::GPR32RegClass,
MRI);
2786 I.getOperand(2).setReg(SrcReg);
2790 case TargetOpcode::G_FRAME_INDEX: {
2797 I.setDesc(
TII.get(AArch64::ADDXri));
2806 case TargetOpcode::G_GLOBAL_VALUE: {
2809 if (
I.getOperand(1).isSymbol()) {
2810 OpFlags =
I.getOperand(1).getTargetFlags();
2814 GV =
I.getOperand(1).getGlobal();
2816 return selectTLSGlobalValue(
I,
MRI);
2817 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2821 I.setDesc(
TII.get(AArch64::LOADgot));
2822 I.getOperand(1).setTargetFlags(OpFlags);
2824 !
TM.isPositionIndependent()) {
2826 materializeLargeCMVal(
I, GV, OpFlags);
2827 I.eraseFromParent();
2830 I.setDesc(
TII.get(AArch64::ADR));
2831 I.getOperand(1).setTargetFlags(OpFlags);
2833 I.setDesc(
TII.get(AArch64::MOVaddr));
2836 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2842 case TargetOpcode::G_ZEXTLOAD:
2843 case TargetOpcode::G_LOAD:
2844 case TargetOpcode::G_STORE: {
2846 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2860 if (Order != AtomicOrdering::NotAtomic &&
2861 Order != AtomicOrdering::Unordered &&
2862 Order != AtomicOrdering::Monotonic) {
2863 assert(!isa<GZExtLoad>(LdSt));
2864 if (MemSizeInBytes > 64)
2867 if (isa<GLoad>(LdSt)) {
2868 static constexpr unsigned LDAPROpcodes[] = {
2869 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2870 static constexpr unsigned LDAROpcodes[] = {
2871 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2873 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2876 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2878 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2879 AArch64::STLRW, AArch64::STLRX};
2881 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2883 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2884 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2885 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2886 I.getOperand(0).setReg(NewVal);
2888 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2899 "Load/Store pointer operand isn't a GPR");
2900 assert(
MRI.getType(PtrReg).isPointer() &&
2901 "Load/Store pointer operand isn't a pointer");
2905 const LLT ValTy =
MRI.getType(ValReg);
2910 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2913 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2919 .addReg(ValReg, 0,
SubReg)
2921 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2923 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2926 if (RB.
getID() == AArch64::FPRRegBankID) {
2929 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2936 MRI.setRegBank(NewDst, RB);
2939 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2943 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2944 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2951 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2952 bool IsStore = isa<GStore>(
I);
2953 const unsigned NewOpc =
2955 if (NewOpc ==
I.getOpcode())
2959 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2962 I.setDesc(
TII.get(NewOpc));
2968 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2969 Register CurValReg =
I.getOperand(0).getReg();
2970 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2971 NewInst.cloneMemRefs(
I);
2972 for (
auto &Fn : *AddrModeFns)
2974 I.eraseFromParent();
2983 if (Opcode == TargetOpcode::G_STORE) {
2986 if (CVal && CVal->Value == 0) {
2988 case AArch64::STRWui:
2989 case AArch64::STRHHui:
2990 case AArch64::STRBBui:
2991 LoadStore->getOperand(0).setReg(AArch64::WZR);
2993 case AArch64::STRXui:
2994 LoadStore->getOperand(0).setReg(AArch64::XZR);
3000 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3001 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3004 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3008 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3013 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3016 .
addImm(AArch64::sub_32);
3018 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3024 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3025 case TargetOpcode::G_INDEXED_SEXTLOAD:
3026 return selectIndexedExtLoad(
I,
MRI);
3027 case TargetOpcode::G_INDEXED_LOAD:
3028 return selectIndexedLoad(
I,
MRI);
3029 case TargetOpcode::G_INDEXED_STORE:
3030 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3032 case TargetOpcode::G_LSHR:
3033 case TargetOpcode::G_ASHR:
3034 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3035 return selectVectorAshrLshr(
I,
MRI);
3037 case TargetOpcode::G_SHL:
3038 if (Opcode == TargetOpcode::G_SHL &&
3039 MRI.getType(
I.getOperand(0).getReg()).isVector())
3040 return selectVectorSHL(
I,
MRI);
3047 Register SrcReg =
I.getOperand(1).getReg();
3048 Register ShiftReg =
I.getOperand(2).getReg();
3049 const LLT ShiftTy =
MRI.getType(ShiftReg);
3050 const LLT SrcTy =
MRI.getType(SrcReg);
3055 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3056 .addReg(ShiftReg, 0, AArch64::sub_32);
3057 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3058 I.getOperand(2).setReg(Trunc.getReg(0));
3062 case TargetOpcode::G_OR: {
3069 const Register DefReg =
I.getOperand(0).getReg();
3073 if (NewOpc ==
I.getOpcode())
3076 I.setDesc(
TII.get(NewOpc));
3084 case TargetOpcode::G_PTR_ADD: {
3085 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3086 I.eraseFromParent();
3090 case TargetOpcode::G_SADDE:
3091 case TargetOpcode::G_UADDE:
3092 case TargetOpcode::G_SSUBE:
3093 case TargetOpcode::G_USUBE:
3094 case TargetOpcode::G_SADDO:
3095 case TargetOpcode::G_UADDO:
3096 case TargetOpcode::G_SSUBO:
3097 case TargetOpcode::G_USUBO:
3098 return selectOverflowOp(
I,
MRI);
3100 case TargetOpcode::G_PTRMASK: {
3101 Register MaskReg =
I.getOperand(2).getReg();
3108 I.setDesc(
TII.get(AArch64::ANDXri));
3109 I.getOperand(2).ChangeToImmediate(
3114 case TargetOpcode::G_PTRTOINT:
3115 case TargetOpcode::G_TRUNC: {
3116 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3117 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3119 const Register DstReg =
I.getOperand(0).getReg();
3120 const Register SrcReg =
I.getOperand(1).getReg();
3127 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3131 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3140 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3141 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3142 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3146 if (DstRC == SrcRC) {
3148 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3152 }
else if (DstRC == &AArch64::GPR32RegClass &&
3153 SrcRC == &AArch64::GPR64RegClass) {
3154 I.getOperand(1).setSubReg(AArch64::sub_32);
3157 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3161 I.setDesc(
TII.get(TargetOpcode::COPY));
3163 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3166 I.setDesc(
TII.get(AArch64::XTNv4i16));
3176 I.eraseFromParent();
3181 if (Opcode == TargetOpcode::G_PTRTOINT) {
3182 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3183 I.setDesc(
TII.get(TargetOpcode::COPY));
3191 case TargetOpcode::G_ANYEXT: {
3192 if (selectUSMovFromExtend(
I,
MRI))
3195 const Register DstReg =
I.getOperand(0).getReg();
3196 const Register SrcReg =
I.getOperand(1).getReg();
3199 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3201 <<
", expected: GPR\n");
3206 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3208 <<
", expected: GPR\n");
3212 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3215 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3219 if (DstSize != 64 && DstSize > 32) {
3221 <<
", expected: 32 or 64\n");
3227 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3232 .
addImm(AArch64::sub_32);
3233 I.getOperand(1).setReg(ExtSrc);
3238 case TargetOpcode::G_ZEXT:
3239 case TargetOpcode::G_SEXT_INREG:
3240 case TargetOpcode::G_SEXT: {
3241 if (selectUSMovFromExtend(
I,
MRI))
3244 unsigned Opcode =
I.getOpcode();
3245 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3246 const Register DefReg =
I.getOperand(0).getReg();
3247 Register SrcReg =
I.getOperand(1).getReg();
3248 const LLT DstTy =
MRI.getType(DefReg);
3249 const LLT SrcTy =
MRI.getType(SrcReg);
3255 if (Opcode == TargetOpcode::G_SEXT_INREG)
3256 SrcSize =
I.getOperand(2).getImm();
3262 AArch64::GPRRegBankID &&
3263 "Unexpected ext regbank");
3276 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3277 if (LoadMI && IsGPR) {
3279 unsigned BytesLoaded =
MemOp->getSize();
3286 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3288 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3289 const Register ZReg = AArch64::WZR;
3290 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3293 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3296 .
addImm(AArch64::sub_32);
3298 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3300 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3304 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3310 I.eraseFromParent();
3315 if (DstSize == 64) {
3316 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3318 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3324 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3325 {&AArch64::GPR64RegClass}, {})
3332 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3336 }
else if (DstSize <= 32) {
3337 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3346 I.eraseFromParent();
3350 case TargetOpcode::G_SITOFP:
3351 case TargetOpcode::G_UITOFP:
3352 case TargetOpcode::G_FPTOSI:
3353 case TargetOpcode::G_FPTOUI: {
3354 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3355 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3357 if (NewOpc == Opcode)
3360 I.setDesc(
TII.get(NewOpc));
3367 case TargetOpcode::G_FREEZE:
3370 case TargetOpcode::G_INTTOPTR:
3375 case TargetOpcode::G_BITCAST:
3383 case TargetOpcode::G_SELECT: {
3384 auto &Sel = cast<GSelect>(
I);
3385 const Register CondReg = Sel.getCondReg();
3386 const Register TReg = Sel.getTrueReg();
3387 const Register FReg = Sel.getFalseReg();
3389 if (tryOptSelect(Sel))
3394 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3395 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3398 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3400 Sel.eraseFromParent();
3403 case TargetOpcode::G_ICMP: {
3405 return selectVectorICmp(
I,
MRI);
3416 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3417 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3418 AArch64::WZR, InvCC, MIB);
3419 I.eraseFromParent();
3423 case TargetOpcode::G_FCMP: {
3426 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3428 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3430 I.eraseFromParent();
3433 case TargetOpcode::G_VASTART:
3434 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3435 : selectVaStartAAPCS(
I, MF,
MRI);
3436 case TargetOpcode::G_INTRINSIC:
3437 return selectIntrinsic(
I,
MRI);
3438 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3439 return selectIntrinsicWithSideEffects(
I,
MRI);
3440 case TargetOpcode::G_IMPLICIT_DEF: {
3441 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3442 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3443 const Register DstReg =
I.getOperand(0).getReg();
3446 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3449 case TargetOpcode::G_BLOCK_ADDR: {
3451 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3452 I.eraseFromParent();
3455 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3456 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3457 I.getOperand(0).getReg())
3461 I.getOperand(1).getBlockAddress(), 0,
3463 I.eraseFromParent();
3467 case AArch64::G_DUP: {
3473 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3474 AArch64::GPRRegBankID)
3476 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3478 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3480 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3482 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3484 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3489 case TargetOpcode::G_BUILD_VECTOR:
3490 return selectBuildVector(
I,
MRI);
3491 case TargetOpcode::G_MERGE_VALUES:
3493 case TargetOpcode::G_UNMERGE_VALUES:
3495 case TargetOpcode::G_SHUFFLE_VECTOR:
3496 return selectShuffleVector(
I,
MRI);
3497 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3498 return selectExtractElt(
I,
MRI);
3499 case TargetOpcode::G_INSERT_VECTOR_ELT:
3500 return selectInsertElt(
I,
MRI);
3501 case TargetOpcode::G_CONCAT_VECTORS:
3502 return selectConcatVectors(
I,
MRI);
3503 case TargetOpcode::G_JUMP_TABLE:
3504 return selectJumpTable(
I,
MRI);
3505 case TargetOpcode::G_MEMCPY:
3506 case TargetOpcode::G_MEMCPY_INLINE:
3507 case TargetOpcode::G_MEMMOVE:
3508 case TargetOpcode::G_MEMSET:
3509 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3510 return selectMOPS(
I,
MRI);
3516bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3523bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3527 case TargetOpcode::G_MEMCPY:
3528 case TargetOpcode::G_MEMCPY_INLINE:
3529 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3531 case TargetOpcode::G_MEMMOVE:
3532 Mopcode = AArch64::MOPSMemoryMovePseudo;
3534 case TargetOpcode::G_MEMSET:
3536 Mopcode = AArch64::MOPSMemorySetPseudo;
3545 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3546 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3549 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3550 const auto &SrcValRegClass =
3551 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3554 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3555 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3556 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3566 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3567 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3569 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3570 {DstPtrCopy, SizeCopy, SrcValCopy});
3572 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3573 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3574 {DstPtrCopy, SrcValCopy, SizeCopy});
3583 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3584 Register JTAddr =
I.getOperand(0).getReg();
3585 unsigned JTI =
I.getOperand(1).getIndex();
3588 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3589 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3592 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3593 {TargetReg, ScratchReg}, {JTAddr,
Index})
3594 .addJumpTableIndex(JTI);
3596 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3597 {
static_cast<int64_t
>(JTI)});
3599 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3600 I.eraseFromParent();
3604bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3606 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3607 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3609 Register DstReg =
I.getOperand(0).getReg();
3610 unsigned JTI =
I.getOperand(1).getIndex();
3613 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3616 I.eraseFromParent();
3620bool AArch64InstructionSelector::selectTLSGlobalValue(
3622 if (!STI.isTargetMachO())
3627 const auto &GlobalOp =
I.getOperand(1);
3628 assert(GlobalOp.getOffset() == 0 &&
3629 "Shouldn't have an offset on TLS globals!");
3633 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3636 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3637 {LoadGOT.getReg(0)})
3650 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3652 I.eraseFromParent();
3656bool AArch64InstructionSelector::selectVectorICmp(
3658 Register DstReg =
I.getOperand(0).getReg();
3659 LLT DstTy =
MRI.getType(DstReg);
3660 Register SrcReg =
I.getOperand(2).getReg();
3661 Register Src2Reg =
I.getOperand(3).getReg();
3662 LLT SrcTy =
MRI.getType(SrcReg);
3687 unsigned PredIdx = 0;
3688 bool SwapOperands =
false;
3703 SwapOperands =
true;
3707 SwapOperands =
true;
3717 SwapOperands =
true;
3721 SwapOperands =
true;
3731 static const unsigned OpcTable[4][4][9] = {
3739 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3740 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3741 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3742 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3743 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3744 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3750 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3751 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3752 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3753 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3754 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3755 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3761 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3762 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3763 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3764 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3765 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3766 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3775 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3776 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3777 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3789 unsigned EltIdx =
Log2_32(SrcEltSize / 8);
3790 unsigned NumEltsIdx =
Log2_32(NumElts / 2);
3791 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3799 getRegClassForTypeOnBank(SrcTy, VecRB,
true);
3801 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3805 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3807 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3822 RBI.constrainGenericRegister(DstReg, *SrcRC,
MRI);
3823 I.eraseFromParent();
3827MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3830 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3832 auto BuildFn = [&](
unsigned SubregIndex) {
3836 .addImm(SubregIndex);
3844 return BuildFn(AArch64::bsub);
3846 return BuildFn(AArch64::hsub);
3848 return BuildFn(AArch64::ssub);
3850 return BuildFn(AArch64::dsub);
3857AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3860 LLT DstTy =
MRI.getType(DstReg);
3862 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3863 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3870 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3876 .addReg(SrcReg, 0,
SubReg);
3877 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3881bool AArch64InstructionSelector::selectMergeValues(
3883 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3884 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3885 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3889 if (
I.getNumOperands() != 3)
3896 Register DstReg =
I.getOperand(0).getReg();
3897 Register Src1Reg =
I.getOperand(1).getReg();
3898 Register Src2Reg =
I.getOperand(2).getReg();
3899 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3900 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3905 Src2Reg, 1, RB, MIB);
3910 I.eraseFromParent();
3914 if (RB.
getID() != AArch64::GPRRegBankID)
3920 auto *DstRC = &AArch64::GPR64RegClass;
3921 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3923 TII.get(TargetOpcode::SUBREG_TO_REG))
3926 .
addUse(
I.getOperand(1).getReg())
3927 .
addImm(AArch64::sub_32);
3928 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3931 TII.get(TargetOpcode::SUBREG_TO_REG))
3934 .
addUse(
I.getOperand(2).getReg())
3935 .
addImm(AArch64::sub_32);
3937 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3938 .
addDef(
I.getOperand(0).getReg())
3946 I.eraseFromParent();
3951 const unsigned EltSize) {
3956 CopyOpc = AArch64::DUPi8;
3957 ExtractSubReg = AArch64::bsub;
3960 CopyOpc = AArch64::DUPi16;
3961 ExtractSubReg = AArch64::hsub;
3964 CopyOpc = AArch64::DUPi32;
3965 ExtractSubReg = AArch64::ssub;
3968 CopyOpc = AArch64::DUPi64;
3969 ExtractSubReg = AArch64::dsub;
3973 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3979MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3980 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3983 unsigned CopyOpc = 0;
3984 unsigned ExtractSubReg = 0;
3987 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3992 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3994 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3999 const LLT &VecTy =
MRI.getType(VecReg);
4001 getRegClassForTypeOnBank(VecTy, VecRB,
true);
4003 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
4010 DstReg =
MRI.createVirtualRegister(DstRC);
4013 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4014 .addReg(VecReg, 0, ExtractSubReg);
4015 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4024 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4025 if (!ScalarToVector)
4031 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4035 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
4039bool AArch64InstructionSelector::selectExtractElt(
4041 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4042 "unexpected opcode!");
4043 Register DstReg =
I.getOperand(0).getReg();
4044 const LLT NarrowTy =
MRI.getType(DstReg);
4045 const Register SrcReg =
I.getOperand(1).getReg();
4046 const LLT WideTy =
MRI.getType(SrcReg);
4049 "source register size too small!");
4050 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
4054 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
4056 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
4065 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4069 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4074 I.eraseFromParent();
4078bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4080 unsigned NumElts =
I.getNumOperands() - 1;
4081 Register SrcReg =
I.getOperand(NumElts).getReg();
4082 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4083 const LLT SrcTy =
MRI.getType(SrcReg);
4085 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
4087 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
4094 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
4095 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4096 Register Dst =
I.getOperand(OpIdx).getReg();
4098 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4102 I.eraseFromParent();
4106bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4108 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4109 "unexpected opcode");
4112 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4113 AArch64::FPRRegBankID ||
4114 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4115 AArch64::FPRRegBankID) {
4116 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4117 "currently unsupported.\n");
4123 unsigned NumElts =
I.getNumOperands() - 1;
4124 Register SrcReg =
I.getOperand(NumElts).getReg();
4125 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4126 const LLT WideTy =
MRI.getType(SrcReg);
4129 "can only unmerge from vector or s128 types!");
4131 "source register size too small!");
4134 return selectSplitVectorUnmerge(
I,
MRI);
4138 unsigned CopyOpc = 0;
4139 unsigned ExtractSubReg = 0;
4150 unsigned NumInsertRegs = NumElts - 1;
4162 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4166 assert(Found &&
"expected to find last operand's subeg idx");
4167 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4168 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4170 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4174 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4177 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4194 Register CopyTo =
I.getOperand(0).getReg();
4195 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4196 .addReg(InsertRegs[0], 0, ExtractSubReg);
4200 unsigned LaneIdx = 1;
4201 for (
Register InsReg : InsertRegs) {
4202 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4215 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4221 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4222 I.eraseFromParent();
4226bool AArch64InstructionSelector::selectConcatVectors(
4228 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4229 "Unexpected opcode");
4230 Register Dst =
I.getOperand(0).getReg();
4231 Register Op1 =
I.getOperand(1).getReg();
4232 Register Op2 =
I.getOperand(2).getReg();
4233 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4236 I.eraseFromParent();
4241AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4250MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4258 RC = &AArch64::FPR128RegClass;
4259 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4262 RC = &AArch64::FPR64RegClass;
4263 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4266 RC = &AArch64::FPR32RegClass;
4267 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4270 RC = &AArch64::FPR16RegClass;
4271 Opc = AArch64::LDRHui;
4274 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4280 auto &MF = MIRBuilder.
getMF();
4281 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4282 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4284 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4287 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4290 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4291 .addConstantPoolIndex(
4307static std::pair<unsigned, unsigned>
4309 unsigned Opc, SubregIdx;
4310 if (RB.
getID() == AArch64::GPRRegBankID) {
4312 Opc = AArch64::INSvi8gpr;
4313 SubregIdx = AArch64::bsub;
4314 }
else if (EltSize == 16) {
4315 Opc = AArch64::INSvi16gpr;
4316 SubregIdx = AArch64::ssub;
4317 }
else if (EltSize == 32) {
4318 Opc = AArch64::INSvi32gpr;
4319 SubregIdx = AArch64::ssub;
4320 }
else if (EltSize == 64) {
4321 Opc = AArch64::INSvi64gpr;
4322 SubregIdx = AArch64::dsub;
4328 Opc = AArch64::INSvi8lane;
4329 SubregIdx = AArch64::bsub;
4330 }
else if (EltSize == 16) {
4331 Opc = AArch64::INSvi16lane;
4332 SubregIdx = AArch64::hsub;
4333 }
else if (EltSize == 32) {
4334 Opc = AArch64::INSvi32lane;
4335 SubregIdx = AArch64::ssub;
4336 }
else if (EltSize == 64) {
4337 Opc = AArch64::INSvi64lane;
4338 SubregIdx = AArch64::dsub;
4343 return std::make_pair(Opc, SubregIdx);
4347 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4349 const ComplexRendererFns &RenderFns)
const {
4350 assert(Opcode &&
"Expected an opcode?");
4352 "Function should only be used to produce selected instructions!");
4353 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4355 for (
auto &Fn : *RenderFns)
4362 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4366 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4367 auto Ty =
MRI.getType(
LHS.getReg());
4370 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4371 bool Is32Bit =
Size == 32;
4374 if (
auto Fns = selectArithImmed(RHS))
4375 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4379 if (
auto Fns = selectNegArithImmed(RHS))
4380 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4384 if (
auto Fns = selectArithExtendedRegister(RHS))
4385 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4389 if (
auto Fns = selectShiftedRegister(RHS))
4390 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4392 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4400 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4401 {{AArch64::ADDXri, AArch64::ADDWri},
4402 {AArch64::ADDXrs, AArch64::ADDWrs},
4403 {AArch64::ADDXrr, AArch64::ADDWrr},
4404 {AArch64::SUBXri, AArch64::SUBWri},
4405 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4406 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4413 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4414 {{AArch64::ADDSXri, AArch64::ADDSWri},
4415 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4416 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4417 {AArch64::SUBSXri, AArch64::SUBSWri},
4418 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4419 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4426 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4427 {{AArch64::SUBSXri, AArch64::SUBSWri},
4428 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4429 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4430 {AArch64::ADDSXri, AArch64::ADDSWri},
4431 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4432 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4439 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4441 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4442 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4443 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4450 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4452 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4453 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4454 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4461 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4462 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4463 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4469 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4473 bool Is32Bit = (
RegSize == 32);
4474 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4475 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4476 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4480 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4483 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4490 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4491 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4492 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4495MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4498 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4505 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4507 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4509 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4510 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4513MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4517 LLT Ty =
MRI.getType(Dst);
4519 "Expected a 32-bit scalar register?");
4521 const Register ZReg = AArch64::WZR;
4526 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4532 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4533 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4534 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4539MachineInstr *AArch64InstructionSelector::emitFPCompare(
4541 std::optional<CmpInst::Predicate> Pred)
const {
4543 LLT Ty =
MRI.getType(LHS);
4547 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4558 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4562 ShouldUseImm =
true;
4566 unsigned CmpOpcTbl[2][3] = {
4567 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4568 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4570 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4582MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4591 const LLT Op1Ty =
MRI.getType(Op1);
4592 const LLT Op2Ty =
MRI.getType(Op2);
4594 if (Op1Ty != Op2Ty) {
4595 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4598 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4601 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4617 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4619 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4620 if (!WidenedOp1 || !WidenedOp2) {
4621 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4626 unsigned InsertOpc, InsSubRegIdx;
4627 std::tie(InsertOpc, InsSubRegIdx) =
4631 Dst =
MRI.createVirtualRegister(DstRC);
4652 Size =
TRI.getRegSizeInBits(*RC);
4654 Size =
MRI.getType(Dst).getSizeInBits();
4656 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4657 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4658 unsigned Opc = OpcTable[
Size == 64];
4659 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4667 unsigned Opcode =
I.getOpcode();
4671 bool NeedsNegatedCarry =
4672 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4682 if (SrcMI ==
I.getPrevNode()) {
4683 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4684 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4685 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4686 CarrySrcMI->isUnsigned() &&
4687 CarrySrcMI->getCarryOutReg() == CarryReg &&
4688 selectAndRestoreState(*SrcMI))
4693 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4695 if (NeedsNegatedCarry) {
4698 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4702 auto Fns = select12BitValueWithLeftShift(1);
4703 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4706bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4708 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4710 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4712 emitCarryIn(
I, CarryInMI->getCarryInReg());
4716 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4717 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4719 Register CarryOutReg = CarryMI.getCarryOutReg();
4722 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4728 emitCSINC(CarryOutReg, ZReg, ZReg,
4729 getInvertedCondCode(OpAndCC.second), MIB);
4732 I.eraseFromParent();
4736std::pair<MachineInstr *, AArch64CC::CondCode>
4737AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4744 case TargetOpcode::G_SADDO:
4745 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4746 case TargetOpcode::G_UADDO:
4747 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4748 case TargetOpcode::G_SSUBO:
4749 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4750 case TargetOpcode::G_USUBO:
4751 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4752 case TargetOpcode::G_SADDE:
4753 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4754 case TargetOpcode::G_UADDE:
4755 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4756 case TargetOpcode::G_SSUBE:
4757 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4758 case TargetOpcode::G_USUBE:
4759 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4779 unsigned Depth = 0) {
4780 if (!
MRI.hasOneNonDBGUse(Val))
4784 if (isa<GAnyCmp>(ValDef)) {
4786 MustBeFirst =
false;
4792 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4793 bool IsOR = Opcode == TargetOpcode::G_OR;
4805 if (MustBeFirstL && MustBeFirstR)
4811 if (!CanNegateL && !CanNegateR)
4815 CanNegate = WillNegate && CanNegateL && CanNegateR;
4818 MustBeFirst = !CanNegate;
4820 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4823 MustBeFirst = MustBeFirstL || MustBeFirstR;
4830MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4836 LLT OpTy =
MRI.getType(LHS);
4838 std::optional<ValueAndVReg>
C;
4842 if (
C &&
C->Value.ult(32))
4843 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4845 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4851 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4852 CCmpOpc = AArch64::FCCMPHrr;
4855 CCmpOpc = AArch64::FCCMPSrr;
4858 CCmpOpc = AArch64::FCCMPDrr;
4868 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4869 CCmp.
addImm(
C->Value.getZExtValue());
4877MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4884 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4890 if (isa<GICmp>(Cmp)) {
4901 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4912 auto Dst =
MRI.cloneVirtualRegister(LHS);
4913 if (isa<GICmp>(Cmp))
4914 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4915 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4916 Cmp->getOperand(3).getReg(), MIB);
4921 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4923 bool IsOR = Opcode == TargetOpcode::G_OR;
4929 assert(ValidL &&
"Valid conjunction/disjunction tree");
4936 assert(ValidR &&
"Valid conjunction/disjunction tree");
4941 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4950 bool NegateAfterAll;
4951 if (Opcode == TargetOpcode::G_OR) {
4954 assert(CanNegateR &&
"at least one side must be negatable");
4955 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4959 NegateAfterR =
true;
4962 NegateR = CanNegateR;
4963 NegateAfterR = !CanNegateR;
4966 NegateAfterAll = !Negate;
4968 assert(Opcode == TargetOpcode::G_AND &&
4969 "Valid conjunction/disjunction tree");
4970 assert(!Negate &&
"Valid conjunction/disjunction tree");
4974 NegateAfterR =
false;
4975 NegateAfterAll =
false;
4991MachineInstr *AArch64InstructionSelector::emitConjunction(
4993 bool DummyCanNegate;
4994 bool DummyMustBeFirst;
5001bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
5013bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
5037 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
5039 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
5042 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5048 unsigned CondOpc = CondDef->
getOpcode();
5049 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5050 if (tryOptSelectConjunction(
I, *CondDef))
5056 if (CondOpc == TargetOpcode::G_ICMP) {
5084 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
5085 I.getOperand(3).getReg(), CondCode, MIB);
5086 I.eraseFromParent();
5090MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5094 "Unexpected MachineOperand");
5131 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5142 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5145 if (!ValAndVReg || ValAndVReg->Value != 0)
5155bool AArch64InstructionSelector::selectShuffleVector(
5157 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5158 Register Src1Reg =
I.getOperand(1).getReg();
5159 const LLT Src1Ty =
MRI.getType(Src1Reg);
5160 Register Src2Reg =
I.getOperand(2).getReg();
5161 const LLT Src2Ty =
MRI.getType(Src2Reg);
5172 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5179 for (
int Val : Mask) {
5182 Val = Val < 0 ? 0 : Val;
5183 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5201 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5208 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5212 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5218 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5219 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5220 I.eraseFromParent();
5228 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5231 I.eraseFromParent();
5235MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5245 DstReg =
MRI.createVirtualRegister(DstRC);
5247 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5250 if (RB.
getID() == AArch64::FPRRegBankID) {
5251 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5252 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5254 .
addUse(InsSub->getOperand(0).getReg())
5257 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5266bool AArch64InstructionSelector::selectUSMovFromExtend(
5268 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5269 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5270 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5272 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5273 const Register DefReg =
MI.getOperand(0).getReg();
5274 const LLT DstTy =
MRI.getType(DefReg);
5277 if (DstSize != 32 && DstSize != 64)
5281 MI.getOperand(1).getReg(),
MRI);
5287 const LLT &VecTy =
MRI.getType(Src0);
5290 const MachineInstr *ScalarToVector = emitScalarToVector(
5291 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5292 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5298 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5300 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5302 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5304 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5306 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5315 if (DstSize == 64 && !IsSigned) {
5316 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5317 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5318 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5321 .
addImm(AArch64::sub_32);
5322 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5324 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5327 MI.eraseFromParent();
5331bool AArch64InstructionSelector::selectInsertElt(
MachineInstr &
I,
5333 assert(
I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
5336 Register DstReg =
I.getOperand(0).getReg();
5337 const LLT DstTy =
MRI.getType(DstReg);
5341 Register EltReg =
I.getOperand(2).getReg();
5342 const LLT EltTy =
MRI.getType(EltReg);
5344 if (EltSize < 8 || EltSize > 64)
5349 Register IdxReg =
I.getOperand(3).getReg();
5353 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5356 Register SrcReg =
I.getOperand(1).getReg();
5359 if (VecSize < 128) {
5363 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5373 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5375 if (VecSize < 128) {
5386 I.eraseFromParent();
5390MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5393 if (DstSize == 128) {
5394 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5396 Op = AArch64::MOVIv16b_ns;
5398 Op = AArch64::MOVIv8b_ns;
5405 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5412MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5417 if (DstSize == 128) {
5418 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5420 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5422 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5442MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5447 if (DstSize == 128) {
5448 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5450 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5452 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5478MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5482 if (DstSize == 128) {
5483 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5485 Op = AArch64::MOVIv2d_ns;
5487 Op = AArch64::MOVID;
5493 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5500MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5505 if (DstSize == 128) {
5506 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5508 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5510 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5530MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5534 bool IsWide =
false;
5535 if (DstSize == 128) {
5536 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5538 Op = AArch64::FMOVv4f32_ns;
5541 Op = AArch64::FMOVv2f32_ns;
5550 Op = AArch64::FMOVv2f64_ns;
5554 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5559bool AArch64InstructionSelector::selectIndexedExtLoad(
5561 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5563 Register WriteBack = ExtLd.getWritebackReg();
5566 LLT Ty =
MRI.getType(Dst);
5568 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5569 bool IsPre = ExtLd.isPre();
5570 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5571 bool InsertIntoXReg =
false;
5579 if (MemSizeBits == 8) {
5582 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5584 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5585 NewLdDstTy = IsDst64 ? s64 : s32;
5587 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5588 InsertIntoXReg = IsDst64;
5591 }
else if (MemSizeBits == 16) {
5594 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5596 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5597 NewLdDstTy = IsDst64 ? s64 : s32;
5599 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5600 InsertIntoXReg = IsDst64;
5603 }
else if (MemSizeBits == 32) {
5605 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5608 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5609 InsertIntoXReg = IsDst64;
5616 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5624 .addImm(Cst->getSExtValue());
5629 if (InsertIntoXReg) {
5631 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5634 .
addImm(AArch64::sub_32);
5635 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5641 MI.eraseFromParent();
5646bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5648 auto &Ld = cast<GIndexedLoad>(
MI);
5650 Register WriteBack = Ld.getWritebackReg();
5653 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5654 "Unexpected type for indexed load");
5655 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5657 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5658 return selectIndexedExtLoad(
MI,
MRI);
5662 static constexpr unsigned GPROpcodes[] = {
5663 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5665 static constexpr unsigned FPROpcodes[] = {
5666 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5668 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5669 Opc = FPROpcodes[
Log2_32(MemSize)];
5671 Opc = GPROpcodes[
Log2_32(MemSize)];
5673 static constexpr unsigned GPROpcodes[] = {
5674 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5676 static constexpr unsigned FPROpcodes[] = {
5677 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5678 AArch64::LDRDpost, AArch64::LDRQpost};
5679 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5680 Opc = FPROpcodes[
Log2_32(MemSize)];
5682 Opc = GPROpcodes[
Log2_32(MemSize)];
5688 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5691 MI.eraseFromParent();
5695bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5701 LLT ValTy =
MRI.getType(Val);
5706 static constexpr unsigned GPROpcodes[] = {
5707 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5709 static constexpr unsigned FPROpcodes[] = {
5710 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5713 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5718 static constexpr unsigned GPROpcodes[] = {
5719 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5721 static constexpr unsigned FPROpcodes[] = {
5722 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5723 AArch64::STRDpost, AArch64::STRQpost};
5725 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5735 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5736 Str.cloneMemRefs(
I);
5738 I.eraseFromParent();
5746 LLT DstTy =
MRI.getType(Dst);
5749 if (DstSize == 128) {
5751 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5756 if (DstSize == 64) {
5759 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5762 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5763 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5795 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5799 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5803 APInt NegBits(DstSize, 0);
5804 unsigned NumElts = DstSize / NumBits;
5805 for (
unsigned i = 0; i < NumElts; i++)
5806 NegBits |= Neg << (NumBits * i);
5807 NegBits = DefBits ^ NegBits;
5811 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5812 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5814 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5819 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5820 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5821 (STI.hasFullFP16() &&
5822 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5828 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5832 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5833 RBI.constrainGenericRegister(
5834 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5838bool AArch64InstructionSelector::tryOptConstantBuildVec(
5840 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5842 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5848 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5854 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5855 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5856 I.getOperand(
Idx).getReg(),
MRI)))
5858 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5863 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5865 I.eraseFromParent();
5869bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5875 Register Dst =
I.getOperand(0).getReg();
5876 Register EltReg =
I.getOperand(1).getReg();
5877 LLT EltTy =
MRI.getType(EltReg);
5885 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5893 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5898 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5902 I.eraseFromParent();
5904 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5907bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5909 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5912 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5913 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5916 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5918 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5921 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5928 I.getOperand(1).getReg(), MIB);
5938 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5941 Register OpReg =
I.getOperand(i).getReg();
5943 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5944 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5951 if (DstSize < 128) {
5954 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5957 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5965 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5966 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5972 Register DstReg =
I.getOperand(0).getReg();
5974 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5977 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5995 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5997 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5998 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
6002 I.eraseFromParent();
6006bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
6009 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6010 assert(Opc &&
"Expected an opcode?");
6011 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
6013 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6016 "Destination must be 64 bits or 128 bits?");
6017 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
6018 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
6019 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
6021 Load.cloneMemRefs(
I);
6023 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6024 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6025 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
6026 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6035bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6037 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6038 assert(Opc &&
"Expected an opcode?");
6039 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
6041 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6044 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
6046 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
6047 [](
auto MO) { return MO.getReg(); });
6051 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6066 .
addImm(LaneNo->getZExtValue())
6068 Load.cloneMemRefs(
I);
6070 Register SelectedLoadDst =
Load->getOperand(0).getReg();
6071 unsigned SubReg = AArch64::qsub0;
6072 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
6073 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
6074 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
6077 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
6082 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
6088void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
6092 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6096 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6097 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6106bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6109 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6113 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
6114 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
6118 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6131 .
addImm(LaneNo->getZExtValue())
6138bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6141 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6152 case Intrinsic::aarch64_ldxp:
6153 case Intrinsic::aarch64_ldaxp: {
6155 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6156 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6162 case Intrinsic::trap:
6163 MIB.
buildInstr(AArch64::BRK, {}, {}).addImm(1);
6165 case Intrinsic::debugtrap:
6166 MIB.
buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
6168 case Intrinsic::ubsantrap:
6170 .addImm(
I.getOperand(1).getImm() | (
'U' << 8));
6172 case Intrinsic::aarch64_neon_ld1x2: {
6173 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6176 Opc = AArch64::LD1Twov8b;
6178 Opc = AArch64::LD1Twov16b;
6180 Opc = AArch64::LD1Twov4h;
6182 Opc = AArch64::LD1Twov8h;
6184 Opc = AArch64::LD1Twov2s;
6186 Opc = AArch64::LD1Twov4s;
6188 Opc = AArch64::LD1Twov2d;
6189 else if (Ty ==
S64 || Ty == P0)
6190 Opc = AArch64::LD1Twov1d;
6193 selectVectorLoadIntrinsic(Opc, 2,
I);
6196 case Intrinsic::aarch64_neon_ld1x3: {
6197 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6200 Opc = AArch64::LD1Threev8b;
6202 Opc = AArch64::LD1Threev16b;
6204 Opc = AArch64::LD1Threev4h;
6206 Opc = AArch64::LD1Threev8h;
6208 Opc = AArch64::LD1Threev2s;
6210 Opc = AArch64::LD1Threev4s;
6212 Opc = AArch64::LD1Threev2d;
6213 else if (Ty ==
S64 || Ty == P0)
6214 Opc = AArch64::LD1Threev1d;
6217 selectVectorLoadIntrinsic(Opc, 3,
I);
6220 case Intrinsic::aarch64_neon_ld1x4: {
6221 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6224 Opc = AArch64::LD1Fourv8b;
6226 Opc = AArch64::LD1Fourv16b;
6228 Opc = AArch64::LD1Fourv4h;
6230 Opc = AArch64::LD1Fourv8h;
6232 Opc = AArch64::LD1Fourv2s;
6234 Opc = AArch64::LD1Fourv4s;
6236 Opc = AArch64::LD1Fourv2d;
6237 else if (Ty ==
S64 || Ty == P0)
6238 Opc = AArch64::LD1Fourv1d;
6241 selectVectorLoadIntrinsic(Opc, 4,
I);
6244 case Intrinsic::aarch64_neon_ld2: {
6245 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6248 Opc = AArch64::LD2Twov8b;
6250 Opc = AArch64::LD2Twov16b;
6252 Opc = AArch64::LD2Twov4h;
6254 Opc = AArch64::LD2Twov8h;
6256 Opc = AArch64::LD2Twov2s;
6258 Opc = AArch64::LD2Twov4s;
6260 Opc = AArch64::LD2Twov2d;
6261 else if (Ty ==
S64 || Ty == P0)
6262 Opc = AArch64::LD1Twov1d;
6265 selectVectorLoadIntrinsic(Opc, 2,
I);
6268 case Intrinsic::aarch64_neon_ld2lane: {
6269 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6272 Opc = AArch64::LD2i8;
6274 Opc = AArch64::LD2i16;
6276 Opc = AArch64::LD2i32;
6279 Opc = AArch64::LD2i64;
6282 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6286 case Intrinsic::aarch64_neon_ld2r: {
6287 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6290 Opc = AArch64::LD2Rv8b;
6292 Opc = AArch64::LD2Rv16b;
6294 Opc = AArch64::LD2Rv4h;
6296 Opc = AArch64::LD2Rv8h;
6298 Opc = AArch64::LD2Rv2s;
6300 Opc = AArch64::LD2Rv4s;
6302 Opc = AArch64::LD2Rv2d;
6303 else if (Ty ==
S64 || Ty == P0)
6304 Opc = AArch64::LD2Rv1d;
6307 selectVectorLoadIntrinsic(Opc, 2,
I);
6310 case Intrinsic::aarch64_neon_ld3: {
6311 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6314 Opc = AArch64::LD3Threev8b;
6316 Opc = AArch64::LD3Threev16b;
6318 Opc = AArch64::LD3Threev4h;
6320 Opc = AArch64::LD3Threev8h;
6322 Opc = AArch64::LD3Threev2s;
6324 Opc = AArch64::LD3Threev4s;
6326 Opc = AArch64::LD3Threev2d;
6327 else if (Ty ==
S64 || Ty == P0)
6328 Opc = AArch64::LD1Threev1d;
6331 selectVectorLoadIntrinsic(Opc, 3,
I);
6334 case Intrinsic::aarch64_neon_ld3lane: {
6335 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6338 Opc = AArch64::LD3i8;
6340 Opc = AArch64::LD3i16;
6342 Opc = AArch64::LD3i32;
6345 Opc = AArch64::LD3i64;
6348 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6352 case Intrinsic::aarch64_neon_ld3r: {
6353 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6356 Opc = AArch64::LD3Rv8b;
6358 Opc = AArch64::LD3Rv16b;
6360 Opc = AArch64::LD3Rv4h;
6362 Opc = AArch64::LD3Rv8h;
6364 Opc = AArch64::LD3Rv2s;
6366 Opc = AArch64::LD3Rv4s;
6368 Opc = AArch64::LD3Rv2d;
6369 else if (Ty ==
S64 || Ty == P0)
6370 Opc = AArch64::LD3Rv1d;
6373 selectVectorLoadIntrinsic(Opc, 3,
I);
6376 case Intrinsic::aarch64_neon_ld4: {
6377 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6380 Opc = AArch64::LD4Fourv8b;
6382 Opc = AArch64::LD4Fourv16b;
6384 Opc = AArch64::LD4Fourv4h;
6386 Opc = AArch64::LD4Fourv8h;
6388 Opc = AArch64::LD4Fourv2s;
6390 Opc = AArch64::LD4Fourv4s;
6392 Opc = AArch64::LD4Fourv2d;
6393 else if (Ty ==
S64 || Ty == P0)
6394 Opc = AArch64::LD1Fourv1d;
6397 selectVectorLoadIntrinsic(Opc, 4,
I);
6400 case Intrinsic::aarch64_neon_ld4lane: {
6401 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6404 Opc = AArch64::LD4i8;
6406 Opc = AArch64::LD4i16;
6408 Opc = AArch64::LD4i32;
6411 Opc = AArch64::LD4i64;
6414 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6418 case Intrinsic::aarch64_neon_ld4r: {
6419 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6422 Opc = AArch64::LD4Rv8b;
6424 Opc = AArch64::LD4Rv16b;
6426 Opc = AArch64::LD4Rv4h;
6428 Opc = AArch64::LD4Rv8h;
6430 Opc = AArch64::LD4Rv2s;
6432 Opc = AArch64::LD4Rv4s;
6434 Opc = AArch64::LD4Rv2d;
6435 else if (Ty ==
S64 || Ty == P0)
6436 Opc = AArch64::LD4Rv1d;
6439 selectVectorLoadIntrinsic(Opc, 4,
I);
6442 case Intrinsic::aarch64_neon_st1x2: {
6443 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6446 Opc = AArch64::ST1Twov8b;
6448 Opc = AArch64::ST1Twov16b;
6450 Opc = AArch64::ST1Twov4h;
6452 Opc = AArch64::ST1Twov8h;
6454 Opc = AArch64::ST1Twov2s;
6456 Opc = AArch64::ST1Twov4s;
6458 Opc = AArch64::ST1Twov2d;
6459 else if (Ty ==
S64 || Ty == P0)
6460 Opc = AArch64::ST1Twov1d;
6463 selectVectorStoreIntrinsic(
I, 2, Opc);
6466 case Intrinsic::aarch64_neon_st1x3: {
6467 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6470 Opc = AArch64::ST1Threev8b;
6472 Opc = AArch64::ST1Threev16b;
6474 Opc = AArch64::ST1Threev4h;
6476 Opc = AArch64::ST1Threev8h;
6478 Opc = AArch64::ST1Threev2s;
6480 Opc = AArch64::ST1Threev4s;
6482 Opc = AArch64::ST1Threev2d;
6483 else if (Ty ==
S64 || Ty == P0)
6484 Opc = AArch64::ST1Threev1d;
6487 selectVectorStoreIntrinsic(
I, 3, Opc);
6490 case Intrinsic::aarch64_neon_st1x4: {
6491 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6494 Opc = AArch64::ST1Fourv8b;
6496 Opc = AArch64::ST1Fourv16b;
6498 Opc = AArch64::ST1Fourv4h;
6500 Opc = AArch64::ST1Fourv8h;
6502 Opc = AArch64::ST1Fourv2s;
6504 Opc = AArch64::ST1Fourv4s;
6506 Opc = AArch64::ST1Fourv2d;
6507 else if (Ty ==
S64 || Ty == P0)
6508 Opc = AArch64::ST1Fourv1d;
6511 selectVectorStoreIntrinsic(
I, 4, Opc);
6514 case Intrinsic::aarch64_neon_st2: {
6515 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6518 Opc = AArch64::ST2Twov8b;
6520 Opc = AArch64::ST2Twov16b;
6522 Opc = AArch64::ST2Twov4h;
6524 Opc = AArch64::ST2Twov8h;
6526 Opc = AArch64::ST2Twov2s;
6528 Opc = AArch64::ST2Twov4s;
6530 Opc = AArch64::ST2Twov2d;
6531 else if (Ty ==
S64 || Ty == P0)
6532 Opc = AArch64::ST1Twov1d;
6535 selectVectorStoreIntrinsic(
I, 2, Opc);
6538 case Intrinsic::aarch64_neon_st3: {
6539 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6542 Opc = AArch64::ST3Threev8b;
6544 Opc = AArch64::ST3Threev16b;
6546 Opc = AArch64::ST3Threev4h;
6548 Opc = AArch64::ST3Threev8h;
6550 Opc = AArch64::ST3Threev2s;
6552 Opc = AArch64::ST3Threev4s;
6554 Opc = AArch64::ST3Threev2d;
6555 else if (Ty ==
S64 || Ty == P0)
6556 Opc = AArch64::ST1Threev1d;
6559 selectVectorStoreIntrinsic(
I, 3, Opc);
6562 case Intrinsic::aarch64_neon_st4: {
6563 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6566 Opc = AArch64::ST4Fourv8b;
6568 Opc = AArch64::ST4Fourv16b;
6570 Opc = AArch64::ST4Fourv4h;
6572 Opc = AArch64::ST4Fourv8h;
6574 Opc = AArch64::ST4Fourv2s;
6576 Opc = AArch64::ST4Fourv4s;
6578 Opc = AArch64::ST4Fourv2d;
6579 else if (Ty ==
S64 || Ty == P0)
6580 Opc = AArch64::ST1Fourv1d;
6583 selectVectorStoreIntrinsic(
I, 4, Opc);
6586 case Intrinsic::aarch64_neon_st2lane: {
6587 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6590 Opc = AArch64::ST2i8;
6592 Opc = AArch64::ST2i16;
6594 Opc = AArch64::ST2i32;
6597 Opc = AArch64::ST2i64;
6600 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6604 case Intrinsic::aarch64_neon_st3lane: {
6605 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6608 Opc = AArch64::ST3i8;
6610 Opc = AArch64::ST3i16;
6612 Opc = AArch64::ST3i32;
6615 Opc = AArch64::ST3i64;
6618 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6622 case Intrinsic::aarch64_neon_st4lane: {
6623 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6626 Opc = AArch64::ST4i8;
6628 Opc = AArch64::ST4i16;
6630 Opc = AArch64::ST4i32;
6633 Opc = AArch64::ST4i64;
6636 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6640 case Intrinsic::aarch64_mops_memset_tag: {
6653 Register DstDef =
I.getOperand(0).getReg();
6655 Register DstUse =
I.getOperand(2).getReg();
6656 Register ValUse =
I.getOperand(3).getReg();
6657 Register SizeUse =
I.getOperand(4).getReg();
6664 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6665 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6672 I.eraseFromParent();
6676bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6678 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6683 case Intrinsic::aarch64_crypto_sha1h: {
6684 Register DstReg =
I.getOperand(0).getReg();
6685 Register SrcReg =
I.getOperand(2).getReg();
6688 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6689 MRI.getType(SrcReg).getSizeInBits() != 32)
6694 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6695 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6699 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6700 AArch64::GPR32RegClass,
MRI);
6703 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6704 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6707 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6711 if (DstReg !=
I.getOperand(0).getReg()) {
6715 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6716 AArch64::GPR32RegClass,
MRI);
6719 I.eraseFromParent();
6722 case Intrinsic::frameaddress:
6723 case Intrinsic::returnaddress: {
6727 unsigned Depth =
I.getOperand(2).getImm();
6728 Register DstReg =
I.getOperand(0).getReg();
6729 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6731 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6732 if (!MFReturnAddr) {
6737 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6740 if (STI.hasPAuth()) {
6741 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6748 I.eraseFromParent();
6755 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6757 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6759 FrameAddr = NextFrame;
6762 if (IntrinID == Intrinsic::frameaddress)
6767 if (STI.hasPAuth()) {
6768 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6769 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6770 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6779 I.eraseFromParent();
6782 case Intrinsic::swift_async_context_addr:
6783 auto Sub = MIB.
buildInstr(AArch64::SUBXri, {
I.getOperand(0).getReg()},
6791 I.eraseFromParent();
6798AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6800 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6801 return std::nullopt;
6802 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6807AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6809 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6810 return std::nullopt;
6816AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6818 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6819 return std::nullopt;
6820 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6825AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6827 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6828 return std::nullopt;
6839AArch64InstructionSelector::select12BitValueWithLeftShift(
6842 if (Immed >> 12 == 0) {
6844 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6846 Immed = Immed >> 12;
6848 return std::nullopt;
6861AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6868 if (MaybeImmed == std::nullopt)
6869 return std::nullopt;
6870 return select12BitValueWithLeftShift(*MaybeImmed);
6876AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6880 return std::nullopt;
6882 if (MaybeImmed == std::nullopt)
6883 return std::nullopt;
6890 return std::nullopt;
6895 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6898 Immed = ~Immed + 1ULL;
6900 if (Immed & 0xFFFFFFFFFF000000ULL)
6901 return std::nullopt;
6903 Immed &= 0xFFFFFFULL;
6904 return select12BitValueWithLeftShift(Immed);
6910bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
6914 if (
MRI.hasOneNonDBGUse(DefReg) ||
6915 MI.getParent()->getParent()->getFunction().hasOptSize())
6920 if (!STI.hasAddrLSLFast())
6926 return all_of(
MRI.use_nodbg_instructions(DefReg),
6942AArch64InstructionSelector::selectExtendedSHL(
6944 unsigned SizeInBytes,
bool WantsExt)
const {
6945 assert(
Base.isReg() &&
"Expected base to be a register operand");
6946 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
6951 unsigned OffsetOpc = OffsetInst->
getOpcode();
6952 bool LookedThroughZExt =
false;
6953 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
6955 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
6956 return std::nullopt;
6960 LookedThroughZExt =
true;
6962 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
6963 return std::nullopt;
6966 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
6967 if (LegalShiftVal == 0)
6968 return std::nullopt;
6969 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
6970 return std::nullopt;
6981 if (OffsetOpc == TargetOpcode::G_SHL)
6982 return std::nullopt;
6988 return std::nullopt;
6993 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
6997 if (OffsetOpc == TargetOpcode::G_MUL) {
6998 if (!llvm::has_single_bit<uint32_t>(ImmVal))
6999 return std::nullopt;
7005 if ((ImmVal & 0x7) != ImmVal)
7006 return std::nullopt;
7010 if (ImmVal != LegalShiftVal)
7011 return std::nullopt;
7013 unsigned SignExtend = 0;
7017 if (!LookedThroughZExt) {
7019 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7021 return std::nullopt;
7026 return std::nullopt;
7032 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7042 MIB.addImm(SignExtend);
7056AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7059 return std::nullopt;
7076 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7077 return std::nullopt;
7083 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7097AArch64InstructionSelector::selectAddrModeRegisterOffset(
7103 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7104 return std::nullopt;
7110 return std::nullopt;
7130AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7131 unsigned SizeInBytes)
const {
7134 return std::nullopt;
7138 return std::nullopt;
7156 unsigned Scale =
Log2_32(SizeInBytes);
7157 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7161 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7162 ImmOff < (0x1000 << Scale))
7163 return std::nullopt;
7168 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7172 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7178 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7179 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7184 return std::nullopt;
7188 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7194 return selectAddrModeRegisterOffset(Root);
7204AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7205 unsigned SizeInBytes)
const {
7210 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI))
7211 return std::nullopt;
7232 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7241 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI))
7242 return std::nullopt;
7246 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7248 return std::nullopt;
7253 AArch64::GPR32RegClass, MIB);
7260 MIB.addImm(SignExtend);
7271AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7272 unsigned Size)
const {
7277 return std::nullopt;
7279 if (!isBaseWithConstantOffset(Root,
MRI))
7280 return std::nullopt;
7285 if (!OffImm.
isReg())
7286 return std::nullopt;
7288 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7289 return std::nullopt;
7293 return std::nullopt;
7296 if (RHSC >= -256 && RHSC < 256) {
7303 return std::nullopt;
7307AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7310 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7311 return std::nullopt;
7314 return std::nullopt;
7319 return std::nullopt;
7323 return std::nullopt;
7327 return std::nullopt;
7329 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7334 MIB.addGlobalAddress(GV,
Offset,
7344AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7345 unsigned Size)
const {
7350 return std::nullopt;
7353 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7363 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7368 if (isBaseWithConstantOffset(Root,
MRI)) {
7376 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7377 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7392 if (selectAddrModeUnscaled(Root,
Size))
7393 return std::nullopt;
7404 switch (
MI.getOpcode()) {
7407 case TargetOpcode::G_SHL:
7409 case TargetOpcode::G_LSHR:
7411 case TargetOpcode::G_ASHR:
7413 case TargetOpcode::G_ROTR:
7421AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7422 bool AllowROR)
const {
7424 return std::nullopt;
7433 return std::nullopt;
7435 return std::nullopt;
7436 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI))
7437 return std::nullopt;
7443 return std::nullopt;
7450 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7451 unsigned Val = *Immed & (NumBits - 1);
7460 unsigned Opc =
MI.getOpcode();
7463 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7465 if (Opc == TargetOpcode::G_SEXT)
7466 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7468 Size =
MI.getOperand(2).getImm();
7469 assert(
Size != 64 &&
"Extend from 64 bits?");
7482 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7483 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7484 assert(
Size != 64 &&
"Extend from 64 bits?");
7499 if (Opc != TargetOpcode::G_AND)
7518Register AArch64InstructionSelector::moveScalarRegClass(
7521 auto Ty =
MRI.getType(Reg);
7530 return Copy.getReg(0);
7536AArch64InstructionSelector::selectArithExtendedRegister(
7539 return std::nullopt;
7548 return std::nullopt;
7550 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI))
7551 return std::nullopt;
7554 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7559 return std::nullopt;
7560 ShiftVal = *MaybeShiftVal;
7562 return std::nullopt;
7567 return std::nullopt;
7568 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7570 return std::nullopt;
7574 Ext = getExtendTypeForInst(*RootDef,
MRI);
7576 return std::nullopt;
7585 if (isDef32(*ExtInst))
7586 return std::nullopt;
7593 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7597 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7602AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7604 return std::nullopt;
7609 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7610 STI.isLittleEndian())
7614 return std::nullopt;
7616 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7622 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7627 LaneIdx->Value.getSExtValue() == 1) {
7633 return std::nullopt;
7640 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7641 "Expected G_CONSTANT");
7642 std::optional<int64_t> CstVal =
7644 assert(CstVal &&
"Expected constant value");
7648void AArch64InstructionSelector::renderLogicalImm32(
7650 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7651 "Expected G_CONSTANT");
7652 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7657void AArch64InstructionSelector::renderLogicalImm64(
7659 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7660 "Expected G_CONSTANT");
7661 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7669 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7670 "Expected G_FCONSTANT");
7678 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7679 "Expected G_FCONSTANT");
7687 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7688 "Expected G_FCONSTANT");
7693void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7695 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7696 "Expected G_FCONSTANT");
7704bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7706 if (!
MI.mayLoadOrStore())
7709 "Expected load/store to have only one mem op!");
7710 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7713bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7715 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7722 switch (
MI.getOpcode()) {
7725 case TargetOpcode::COPY:
7726 case TargetOpcode::G_BITCAST:
7727 case TargetOpcode::G_TRUNC:
7728 case TargetOpcode::G_PHI:
7738 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7741 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7752 auto *OpDef =
MRI.getVRegDef(OpReg);
7753 const LLT &Ty =
MRI.getType(OpReg);
7759 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7763 MRI.setRegBank(Copy.getReg(0), *DstRB);
7764 MO.setReg(Copy.getReg(0));
7773 for (
auto &BB : MF) {
7774 for (
auto &
MI : BB) {
7775 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7780 for (
auto *
MI : Phis) {
7802 bool HasGPROp =
false, HasFPROp =
false;
7806 const LLT &Ty =
MRI.getType(MO.getReg());
7816 if (RB->
getID() == AArch64::GPRRegBankID)
7822 if (HasGPROp && HasFPROp)
7832 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
uint64_t getMemSizeInBits() const
Returns the size in bits of the memory access.
uint64_t getMemSize() const
Returns the size in bytes of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.