44#include "llvm/IR/IntrinsicsAArch64.h"
52#define DEBUG_TYPE "aarch64-isel"
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
82 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
87 ProduceNonFlagSettingCondBr =
135 bool tryOptAndIntoCompareBranch(
MachineInstr &AndInst,
bool Invert,
213 bool selectVectorLoadIntrinsic(
unsigned Opc,
unsigned NumVecs,
215 bool selectVectorLoadLaneIntrinsic(
unsigned Opc,
unsigned NumVecs,
217 void selectVectorStoreIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
219 bool selectVectorStoreLaneIntrinsic(
MachineInstr &
I,
unsigned NumVecs,
233 unsigned Opc1,
unsigned Opc2,
bool isExt);
239 unsigned emitConstantPoolEntry(
const Constant *CPVal,
258 std::optional<CmpInst::Predicate> = std::nullopt)
const;
261 emitInstr(
unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
264 const ComplexRendererFns &RenderFns = std::nullopt)
const;
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
342 std::pair<MachineInstr *, AArch64CC::CondCode>
377 ComplexRendererFns selectShiftA_32(
const MachineOperand &Root)
const;
378 ComplexRendererFns selectShiftB_32(
const MachineOperand &Root)
const;
379 ComplexRendererFns selectShiftA_64(
const MachineOperand &Root)
const;
380 ComplexRendererFns selectShiftB_64(
const MachineOperand &Root)
const;
382 ComplexRendererFns select12BitValueWithLeftShift(
uint64_t Immed)
const;
384 ComplexRendererFns selectNegArithImmed(
MachineOperand &Root)
const;
387 unsigned Size)
const;
389 ComplexRendererFns selectAddrModeUnscaled8(
MachineOperand &Root)
const {
390 return selectAddrModeUnscaled(Root, 1);
392 ComplexRendererFns selectAddrModeUnscaled16(
MachineOperand &Root)
const {
393 return selectAddrModeUnscaled(Root, 2);
395 ComplexRendererFns selectAddrModeUnscaled32(
MachineOperand &Root)
const {
396 return selectAddrModeUnscaled(Root, 4);
398 ComplexRendererFns selectAddrModeUnscaled64(
MachineOperand &Root)
const {
399 return selectAddrModeUnscaled(Root, 8);
401 ComplexRendererFns selectAddrModeUnscaled128(
MachineOperand &Root)
const {
402 return selectAddrModeUnscaled(Root, 16);
407 ComplexRendererFns tryFoldAddLowIntoImm(
MachineInstr &RootDef,
unsigned Size,
411 unsigned Size)
const;
413 ComplexRendererFns selectAddrModeIndexed(
MachineOperand &Root)
const {
414 return selectAddrModeIndexed(Root, Width / 8);
423 bool IsAddrOperand)
const;
426 unsigned SizeInBytes)
const;
434 bool WantsExt)
const;
435 ComplexRendererFns selectAddrModeRegisterOffset(
MachineOperand &Root)
const;
437 unsigned SizeInBytes)
const;
439 ComplexRendererFns selectAddrModeXRO(
MachineOperand &Root)
const {
440 return selectAddrModeXRO(Root, Width / 8);
444 unsigned SizeInBytes)
const;
446 ComplexRendererFns selectAddrModeWRO(
MachineOperand &Root)
const {
447 return selectAddrModeWRO(Root, Width / 8);
451 bool AllowROR =
false)
const;
453 ComplexRendererFns selectArithShiftedRegister(
MachineOperand &Root)
const {
454 return selectShiftedRegister(Root);
457 ComplexRendererFns selectLogicalShiftedRegister(
MachineOperand &Root)
const {
458 return selectShiftedRegister(Root,
true);
468 bool IsLoadStore =
false)
const;
479 ComplexRendererFns selectArithExtendedRegister(
MachineOperand &Root)
const;
484 int OpIdx = -1)
const;
486 int OpIdx = -1)
const;
488 int OpIdx = -1)
const;
492 int OpIdx = -1)
const;
494 int OpIdx = -1)
const;
496 int OpIdx = -1)
const;
499 int OpIdx = -1)
const;
505 bool tryOptSelect(
GSelect &Sel);
512 bool isLoadStoreOfNumBytes(
const MachineInstr &
MI,
unsigned NumBytes)
const;
525 bool ProduceNonFlagSettingCondBr =
false;
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
551AArch64InstructionSelector::AArch64InstructionSelector(
554 :
TM(
TM), STI(STI),
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()),
557#include
"AArch64GenGlobalISel.inc"
560#include
"AArch64GenGlobalISel.inc"
572 bool GetAllRegSet =
false) {
573 if (RB.
getID() == AArch64::GPRRegBankID) {
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
581 return &AArch64::XSeqPairsClassRegClass;
585 if (RB.
getID() == AArch64::FPRRegBankID) {
588 return &AArch64::FPR8RegClass;
590 return &AArch64::FPR16RegClass;
592 return &AArch64::FPR32RegClass;
594 return &AArch64::FPR64RegClass;
596 return &AArch64::FPR128RegClass;
608 bool GetAllRegSet =
false) {
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
615 unsigned RegBankID = RB.
getID();
617 if (RegBankID == AArch64::GPRRegBankID) {
618 if (SizeInBits <= 32)
619 return GetAllRegSet ? &AArch64::GPR32allRegClass
620 : &AArch64::GPR32RegClass;
621 if (SizeInBits == 64)
622 return GetAllRegSet ? &AArch64::GPR64allRegClass
623 : &AArch64::GPR64RegClass;
624 if (SizeInBits == 128)
625 return &AArch64::XSeqPairsClassRegClass;
628 if (RegBankID == AArch64::FPRRegBankID) {
629 switch (SizeInBits) {
633 return &AArch64::FPR8RegClass;
635 return &AArch64::FPR16RegClass;
637 return &AArch64::FPR32RegClass;
639 return &AArch64::FPR64RegClass;
641 return &AArch64::FPR128RegClass;
651 switch (
TRI.getRegSizeInBits(*RC)) {
659 if (RC != &AArch64::FPR32RegClass)
669 dbgs() <<
"Couldn't find appropriate subregister for register class.");
678 switch (RB.
getID()) {
679 case AArch64::GPRRegBankID:
681 case AArch64::FPRRegBankID:
704 const unsigned RegClassIDs[],
706 unsigned NumRegs = Regs.
size();
709 assert(NumRegs >= 2 && NumRegs <= 4 &&
710 "Only support between two and 4 registers in a tuple!");
712 auto *DesiredClass =
TRI->getRegClass(RegClassIDs[NumRegs - 2]);
714 MIB.
buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
715 for (
unsigned I = 0, E = Regs.
size();
I < E; ++
I) {
716 RegSequence.addUse(Regs[
I]);
717 RegSequence.addImm(SubRegs[
I]);
719 return RegSequence.getReg(0);
724 static const unsigned RegClassIDs[] = {
725 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
726 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
727 AArch64::dsub2, AArch64::dsub3};
728 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
733 static const unsigned RegClassIDs[] = {
734 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
735 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
736 AArch64::qsub2, AArch64::qsub3};
737 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
742 auto &
MBB = *
MI.getParent();
744 auto &
MRI = MF.getRegInfo();
750 else if (Root.
isReg()) {
755 Immed = ValAndVReg->Value.getSExtValue();
771 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
778 for (
auto &MO :
I.operands()) {
781 LLVM_DEBUG(
dbgs() <<
"Generic inst non-reg operands are unsupported\n");
789 if (!MO.getReg().isVirtual()) {
790 LLVM_DEBUG(
dbgs() <<
"Generic inst has physical register operand\n");
800 if (PrevOpBank && OpBank != PrevOpBank) {
801 LLVM_DEBUG(
dbgs() <<
"Generic inst operands have different banks\n");
816 case AArch64::GPRRegBankID:
818 switch (GenericOpc) {
819 case TargetOpcode::G_SHL:
820 return AArch64::LSLVWr;
821 case TargetOpcode::G_LSHR:
822 return AArch64::LSRVWr;
823 case TargetOpcode::G_ASHR:
824 return AArch64::ASRVWr;
828 }
else if (OpSize == 64) {
829 switch (GenericOpc) {
830 case TargetOpcode::G_PTR_ADD:
831 return AArch64::ADDXrr;
832 case TargetOpcode::G_SHL:
833 return AArch64::LSLVXr;
834 case TargetOpcode::G_LSHR:
835 return AArch64::LSRVXr;
836 case TargetOpcode::G_ASHR:
837 return AArch64::ASRVXr;
843 case AArch64::FPRRegBankID:
846 switch (GenericOpc) {
847 case TargetOpcode::G_FADD:
848 return AArch64::FADDSrr;
849 case TargetOpcode::G_FSUB:
850 return AArch64::FSUBSrr;
851 case TargetOpcode::G_FMUL:
852 return AArch64::FMULSrr;
853 case TargetOpcode::G_FDIV:
854 return AArch64::FDIVSrr;
859 switch (GenericOpc) {
860 case TargetOpcode::G_FADD:
861 return AArch64::FADDDrr;
862 case TargetOpcode::G_FSUB:
863 return AArch64::FSUBDrr;
864 case TargetOpcode::G_FMUL:
865 return AArch64::FMULDrr;
866 case TargetOpcode::G_FDIV:
867 return AArch64::FDIVDrr;
868 case TargetOpcode::G_OR:
869 return AArch64::ORRv8i8;
886 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
888 case AArch64::GPRRegBankID:
891 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
893 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
895 return isStore ? AArch64::STRWui : AArch64::LDRWui;
897 return isStore ? AArch64::STRXui : AArch64::LDRXui;
900 case AArch64::FPRRegBankID:
903 return isStore ? AArch64::STRBui : AArch64::LDRBui;
905 return isStore ? AArch64::STRHui : AArch64::LDRHui;
907 return isStore ? AArch64::STRSui : AArch64::LDRSui;
909 return isStore ? AArch64::STRDui : AArch64::LDRDui;
911 return isStore ? AArch64::STRQui : AArch64::LDRQui;
925 assert(SrcReg.
isValid() &&
"Expected a valid source register?");
926 assert(To &&
"Destination register class cannot be null");
933 RegOp.
setReg(SubRegCopy.getReg(0));
937 if (!
I.getOperand(0).getReg().isPhysical())
947static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
951 Register DstReg =
I.getOperand(0).getReg();
952 Register SrcReg =
I.getOperand(1).getReg();
967 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
985 if (Reg.isPhysical())
987 LLT Ty =
MRI.getType(Reg);
993 RC = getRegClassForTypeOnBank(Ty, RB);
996 dbgs() <<
"Warning: DBG_VALUE operand has unexpected size/bank\n");
1009 Register DstReg =
I.getOperand(0).getReg();
1010 Register SrcReg =
I.getOperand(1).getReg();
1029 LLVM_DEBUG(
dbgs() <<
"Couldn't determine source register class\n");
1033 const TypeSize SrcSize =
TRI.getRegSizeInBits(*SrcRC);
1034 const TypeSize DstSize =
TRI.getRegSizeInBits(*DstRC);
1045 auto Copy = MIB.
buildCopy({DstTempRC}, {SrcReg});
1047 }
else if (SrcSize > DstSize) {
1054 }
else if (DstSize > SrcSize) {
1061 Register PromoteReg =
MRI.createVirtualRegister(PromotionRC);
1063 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1068 RegOp.
setReg(PromoteReg);
1087 if (
I.getOpcode() == TargetOpcode::G_ZEXT) {
1088 I.setDesc(
TII.get(AArch64::COPY));
1089 assert(SrcRegBank.
getID() == AArch64::GPRRegBankID);
1093 I.setDesc(
TII.get(AArch64::COPY));
1108 switch (GenericOpc) {
1109 case TargetOpcode::G_SITOFP:
1110 return AArch64::SCVTFUWSri;
1111 case TargetOpcode::G_UITOFP:
1112 return AArch64::UCVTFUWSri;
1113 case TargetOpcode::G_FPTOSI:
1114 return AArch64::FCVTZSUWSr;
1115 case TargetOpcode::G_FPTOUI:
1116 return AArch64::FCVTZUUWSr;
1121 switch (GenericOpc) {
1122 case TargetOpcode::G_SITOFP:
1123 return AArch64::SCVTFUXSri;
1124 case TargetOpcode::G_UITOFP:
1125 return AArch64::UCVTFUXSri;
1126 case TargetOpcode::G_FPTOSI:
1127 return AArch64::FCVTZSUWDr;
1128 case TargetOpcode::G_FPTOUI:
1129 return AArch64::FCVTZUUWDr;
1139 switch (GenericOpc) {
1140 case TargetOpcode::G_SITOFP:
1141 return AArch64::SCVTFUWDri;
1142 case TargetOpcode::G_UITOFP:
1143 return AArch64::UCVTFUWDri;
1144 case TargetOpcode::G_FPTOSI:
1145 return AArch64::FCVTZSUXSr;
1146 case TargetOpcode::G_FPTOUI:
1147 return AArch64::FCVTZUUXSr;
1152 switch (GenericOpc) {
1153 case TargetOpcode::G_SITOFP:
1154 return AArch64::SCVTFUXDri;
1155 case TargetOpcode::G_UITOFP:
1156 return AArch64::UCVTFUXDri;
1157 case TargetOpcode::G_FPTOSI:
1158 return AArch64::FCVTZSUXDr;
1159 case TargetOpcode::G_FPTOUI:
1160 return AArch64::FCVTZUUXDr;
1179 RBI.getRegBank(True,
MRI,
TRI)->getID() &&
1180 "Expected both select operands to have the same regbank?");
1181 LLT Ty =
MRI.getType(True);
1186 "Expected 32 bit or 64 bit select only?");
1187 const bool Is32Bit =
Size == 32;
1188 if (RBI.getRegBank(True,
MRI,
TRI)->getID() != AArch64::GPRRegBankID) {
1189 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1190 auto FCSel = MIB.
buildInstr(Opc, {Dst}, {True, False}).addImm(
CC);
1196 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1198 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &
CC, &
MRI,
1213 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1265 auto TryOptSelectCst = [&Opc, &True, &False, &
CC, Is32Bit, &
MRI,
1271 if (!TrueCst && !FalseCst)
1274 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1275 if (TrueCst && FalseCst) {
1276 int64_t
T = TrueCst->Value.getSExtValue();
1277 int64_t
F = FalseCst->Value.getSExtValue();
1279 if (
T == 0 &&
F == 1) {
1281 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1287 if (
T == 0 &&
F == -1) {
1289 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1297 int64_t
T = TrueCst->Value.getSExtValue();
1300 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318 int64_t
F = FalseCst->Value.getSExtValue();
1321 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1328 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1336 Optimized |= TryFoldBinOpIntoSelect(False, True,
false);
1337 Optimized |= TryFoldBinOpIntoSelect(True, False,
true);
1458 assert(Reg.isValid() &&
"Expected valid register!");
1459 bool HasZext =
false;
1461 unsigned Opc =
MI->getOpcode();
1463 if (!
MI->getOperand(0).isReg() ||
1464 !
MRI.hasOneNonDBGUse(
MI->getOperand(0).getReg()))
1471 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1472 Opc == TargetOpcode::G_TRUNC) {
1473 if (Opc == TargetOpcode::G_ZEXT)
1476 Register NextReg =
MI->getOperand(1).getReg();
1478 if (!NextReg.
isValid() || !
MRI.hasOneNonDBGUse(NextReg))
1487 std::optional<uint64_t>
C;
1492 case TargetOpcode::G_AND:
1493 case TargetOpcode::G_XOR: {
1494 TestReg =
MI->getOperand(1).getReg();
1495 Register ConstantReg =
MI->getOperand(2).getReg();
1506 C = VRegAndVal->Value.getZExtValue();
1508 C = VRegAndVal->Value.getSExtValue();
1512 case TargetOpcode::G_ASHR:
1513 case TargetOpcode::G_LSHR:
1514 case TargetOpcode::G_SHL: {
1515 TestReg =
MI->getOperand(1).getReg();
1519 C = VRegAndVal->Value.getSExtValue();
1531 unsigned TestRegSize =
MRI.getType(TestReg).getSizeInBits();
1535 case TargetOpcode::G_AND:
1537 if ((*
C >> Bit) & 1)
1540 case TargetOpcode::G_SHL:
1543 if (*
C <= Bit && (Bit - *
C) < TestRegSize) {
1548 case TargetOpcode::G_ASHR:
1553 if (Bit >= TestRegSize)
1554 Bit = TestRegSize - 1;
1556 case TargetOpcode::G_LSHR:
1558 if ((Bit + *
C) < TestRegSize) {
1563 case TargetOpcode::G_XOR:
1572 if ((*
C >> Bit) & 1)
1591 assert(ProduceNonFlagSettingCondBr &&
1592 "Cannot emit TB(N)Z with speculation tracking!");
1597 LLT Ty =
MRI.getType(TestReg);
1600 assert(Bit < 64 &&
"Bit is too large!");
1604 bool UseWReg =
Bit < 32;
1605 unsigned NecessarySize = UseWReg ? 32 : 64;
1606 if (
Size != NecessarySize)
1607 TestReg = moveScalarRegClass(
1608 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1611 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1612 {AArch64::TBZW, AArch64::TBNZW}};
1613 unsigned Opc = OpcTable[UseWReg][IsNegative];
1620bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1623 assert(AndInst.
getOpcode() == TargetOpcode::G_AND &&
"Expected G_AND only?");
1650 int32_t
Bit = MaybeBit->Value.exactLogBase2();
1657 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1665 assert(ProduceNonFlagSettingCondBr &&
"CBZ does not set flags!");
1667 assert(RBI.getRegBank(CompareReg,
MRI,
TRI)->getID() ==
1668 AArch64::GPRRegBankID &&
1669 "Expected GPRs only?");
1670 auto Ty =
MRI.getType(CompareReg);
1673 assert(Width <= 64 &&
"Expected width to be at most 64?");
1674 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1675 {AArch64::CBNZW, AArch64::CBNZX}};
1676 unsigned Opc = OpcTable[IsNegative][Width == 64];
1677 auto BranchMI = MIB.
buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1682bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1685 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1697 I.eraseFromParent();
1701bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1704 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1710 if (!ProduceNonFlagSettingCondBr)
1729 if (VRegAndVal && !AndInst) {
1730 int64_t
C = VRegAndVal->Value.getSExtValue();
1736 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1737 I.eraseFromParent();
1745 emitTestBit(LHS, Bit,
true, DestMBB, MIB);
1746 I.eraseFromParent();
1754 emitTestBit(LHS, Bit,
false, DestMBB, MIB);
1755 I.eraseFromParent();
1769 if (VRegAndVal && VRegAndVal->Value == 0) {
1777 tryOptAndIntoCompareBranch(
1779 I.eraseFromParent();
1784 auto LHSTy =
MRI.getType(LHS);
1785 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1787 I.eraseFromParent();
1796bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1799 assert(
I.getOpcode() == TargetOpcode::G_BRCOND);
1800 if (tryOptCompareBranchFedByICmp(
I, ICmp, MIB))
1810 I.eraseFromParent();
1814bool AArch64InstructionSelector::selectCompareBranch(
1816 Register CondReg =
I.getOperand(0).getReg();
1821 if (CCMIOpc == TargetOpcode::G_FCMP)
1822 return selectCompareBranchFedByFCmp(
I, *CCMI, MIB);
1823 if (CCMIOpc == TargetOpcode::G_ICMP)
1824 return selectCompareBranchFedByICmp(
I, *CCMI, MIB);
1829 if (ProduceNonFlagSettingCondBr) {
1830 emitTestBit(CondReg, 0,
true,
1831 I.getOperand(1).getMBB(), MIB);
1832 I.eraseFromParent();
1842 .
addMBB(
I.getOperand(1).getMBB());
1843 I.eraseFromParent();
1851 assert(
MRI.getType(Reg).isVector() &&
"Expected a *vector* shift operand");
1862 return std::nullopt;
1864 int64_t Imm = *ShiftImm;
1866 return std::nullopt;
1870 return std::nullopt;
1873 return std::nullopt;
1877 return std::nullopt;
1881 return std::nullopt;
1885 return std::nullopt;
1891bool AArch64InstructionSelector::selectVectorSHL(
MachineInstr &
I,
1893 assert(
I.getOpcode() == TargetOpcode::G_SHL);
1894 Register DstReg =
I.getOperand(0).getReg();
1895 const LLT Ty =
MRI.getType(DstReg);
1896 Register Src1Reg =
I.getOperand(1).getReg();
1897 Register Src2Reg =
I.getOperand(2).getReg();
1908 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1910 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1912 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1914 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1916 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1918 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1920 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1926 auto Shl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg});
1932 I.eraseFromParent();
1936bool AArch64InstructionSelector::selectVectorAshrLshr(
1938 assert(
I.getOpcode() == TargetOpcode::G_ASHR ||
1939 I.getOpcode() == TargetOpcode::G_LSHR);
1940 Register DstReg =
I.getOperand(0).getReg();
1941 const LLT Ty =
MRI.getType(DstReg);
1942 Register Src1Reg =
I.getOperand(1).getReg();
1943 Register Src2Reg =
I.getOperand(2).getReg();
1948 bool IsASHR =
I.getOpcode() == TargetOpcode::G_ASHR;
1958 unsigned NegOpc = 0;
1960 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1962 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1963 NegOpc = AArch64::NEGv2i64;
1965 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1966 NegOpc = AArch64::NEGv4i32;
1968 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1969 NegOpc = AArch64::NEGv2i32;
1971 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1972 NegOpc = AArch64::NEGv4i16;
1974 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1975 NegOpc = AArch64::NEGv8i16;
1977 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1978 NegOpc = AArch64::NEGv16i8;
1980 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1981 NegOpc = AArch64::NEGv8i8;
1987 auto Neg = MIB.
buildInstr(NegOpc, {RC}, {Src2Reg});
1989 auto SShl = MIB.
buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1991 I.eraseFromParent();
1995bool AArch64InstructionSelector::selectVaStartAAPCS(
2000bool AArch64InstructionSelector::selectVaStartDarwin(
2003 Register ListReg =
I.getOperand(0).getReg();
2005 Register ArgsAddrReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2016 BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::ADDXri))
2024 MIB =
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::STRXui))
2031 I.eraseFromParent();
2035void AArch64InstructionSelector::materializeLargeCMVal(
2041 auto MovZ = MIB.
buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2052 :
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2054 if (
auto *GV = dyn_cast<GlobalValue>(V)) {
2056 GV, MovZ->getOperand(1).getOffset(), Flags));
2060 MovZ->getOperand(1).getOffset(), Flags));
2066 Register DstReg = BuildMovK(MovZ.getReg(0),
2072bool AArch64InstructionSelector::preISelLower(
MachineInstr &
I) {
2077 switch (
I.getOpcode()) {
2078 case TargetOpcode::G_STORE: {
2079 bool Changed = contractCrossBankCopyIntoStore(
I,
MRI);
2087 SrcOp.setReg(NewSrc);
2088 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass,
MRI);
2093 case TargetOpcode::G_PTR_ADD:
2094 return convertPtrAddToAdd(
I,
MRI);
2095 case TargetOpcode::G_LOAD: {
2100 Register DstReg =
I.getOperand(0).getReg();
2101 const LLT DstTy =
MRI.getType(DstReg);
2107 case AArch64::G_DUP: {
2109 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2113 MRI.setType(
I.getOperand(0).getReg(),
2115 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2116 I.getOperand(1).setReg(NewSrc.getReg(0));
2119 case TargetOpcode::G_UITOFP:
2120 case TargetOpcode::G_SITOFP: {
2125 Register SrcReg =
I.getOperand(1).getReg();
2126 LLT SrcTy =
MRI.getType(SrcReg);
2127 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2131 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::FPRRegBankID) {
2132 if (
I.getOpcode() == TargetOpcode::G_SITOFP)
2133 I.setDesc(
TII.get(AArch64::G_SITOF));
2135 I.setDesc(
TII.get(AArch64::G_UITOF));
2153bool AArch64InstructionSelector::convertPtrAddToAdd(
2155 assert(
I.getOpcode() == TargetOpcode::G_PTR_ADD &&
"Expected G_PTR_ADD");
2156 Register DstReg =
I.getOperand(0).getReg();
2157 Register AddOp1Reg =
I.getOperand(1).getReg();
2158 const LLT PtrTy =
MRI.getType(DstReg);
2162 const LLT CastPtrTy =
2167 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2169 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2173 I.setDesc(
TII.get(TargetOpcode::G_ADD));
2174 MRI.setType(DstReg, CastPtrTy);
2175 I.getOperand(1).setReg(PtrToInt.getReg(0));
2176 if (!select(*PtrToInt)) {
2177 LLVM_DEBUG(
dbgs() <<
"Failed to select G_PTRTOINT in convertPtrAddToAdd");
2186 I.getOperand(2).setReg(NegatedReg);
2187 I.setDesc(
TII.get(TargetOpcode::G_SUB));
2191bool AArch64InstructionSelector::earlySelectSHL(
MachineInstr &
I,
2196 assert(
I.getOpcode() == TargetOpcode::G_SHL &&
"unexpected op");
2197 const auto &MO =
I.getOperand(2);
2202 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2206 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2207 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2209 if (!Imm1Fn || !Imm2Fn)
2213 MIB.
buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2216 for (
auto &RenderFn : *Imm1Fn)
2218 for (
auto &RenderFn : *Imm2Fn)
2221 I.eraseFromParent();
2225bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2227 assert(
I.getOpcode() == TargetOpcode::G_STORE &&
"Expected G_STORE");
2245 LLT DefDstTy =
MRI.getType(DefDstReg);
2246 Register StoreSrcReg =
I.getOperand(0).getReg();
2247 LLT StoreSrcTy =
MRI.getType(StoreSrcReg);
2258 if (RBI.getRegBank(StoreSrcReg,
MRI,
TRI) ==
2259 RBI.getRegBank(DefDstReg,
MRI,
TRI))
2263 I.getOperand(0).setReg(DefDstReg);
2267bool AArch64InstructionSelector::earlySelect(
MachineInstr &
I) {
2268 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2269 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2275 switch (
I.getOpcode()) {
2276 case AArch64::G_DUP: {
2279 Register Src =
I.getOperand(1).getReg();
2284 Register Dst =
I.getOperand(0).getReg();
2286 MRI.getType(Dst).getNumElements(),
2288 ValAndVReg->Value));
2289 if (!emitConstantVector(Dst, CV, MIB,
MRI))
2291 I.eraseFromParent();
2294 case TargetOpcode::G_SEXT:
2297 if (selectUSMovFromExtend(
I,
MRI))
2300 case TargetOpcode::G_BR:
2302 case TargetOpcode::G_SHL:
2303 return earlySelectSHL(
I,
MRI);
2304 case TargetOpcode::G_CONSTANT: {
2305 bool IsZero =
false;
2306 if (
I.getOperand(1).isCImm())
2307 IsZero =
I.getOperand(1).getCImm()->isZero();
2308 else if (
I.getOperand(1).isImm())
2309 IsZero =
I.getOperand(1).getImm() == 0;
2314 Register DefReg =
I.getOperand(0).getReg();
2315 LLT Ty =
MRI.getType(DefReg);
2317 I.getOperand(1).ChangeToRegister(AArch64::XZR,
false);
2318 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
2320 I.getOperand(1).ChangeToRegister(AArch64::WZR,
false);
2321 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass,
MRI);
2325 I.setDesc(
TII.get(TargetOpcode::COPY));
2329 case TargetOpcode::G_ADD: {
2338 Register AddDst =
I.getOperand(0).getReg();
2339 Register AddLHS =
I.getOperand(1).getReg();
2340 Register AddRHS =
I.getOperand(2).getReg();
2342 LLT Ty =
MRI.getType(AddLHS);
2351 if (!
MRI.hasOneNonDBGUse(Reg))
2365 MRI.getType(
Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2375 Cmp = MatchCmp(AddRHS);
2379 auto &PredOp =
Cmp->getOperand(1);
2384 emitIntegerCompare(
Cmp->getOperand(2),
2385 Cmp->getOperand(3), PredOp, MIB);
2386 emitCSINC(AddDst, AddLHS, AddLHS, InvCC, MIB);
2387 I.eraseFromParent();
2390 case TargetOpcode::G_OR: {
2394 Register Dst =
I.getOperand(0).getReg();
2395 LLT Ty =
MRI.getType(Dst);
2414 if (ShiftImm >
Size || ((1ULL << ShiftImm) - 1ULL) !=
uint64_t(MaskImm))
2417 int64_t Immr =
Size - ShiftImm;
2418 int64_t Imms =
Size - ShiftImm - 1;
2419 unsigned Opc =
Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2420 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2421 I.eraseFromParent();
2424 case TargetOpcode::G_FENCE: {
2425 if (
I.getOperand(1).getImm() == 0)
2429 .
addImm(
I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2430 I.eraseFromParent();
2439 assert(
I.getParent() &&
"Instruction should be in a basic block!");
2440 assert(
I.getParent()->getParent() &&
"Instruction should be in a function!");
2447 if (Subtarget->requiresStrictAlign()) {
2449 LLVM_DEBUG(
dbgs() <<
"AArch64 GISel does not support strict-align yet\n");
2455 unsigned Opcode =
I.getOpcode();
2457 if (!
I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2460 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2463 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2464 const Register DefReg =
I.getOperand(0).getReg();
2465 const LLT DefTy =
MRI.getType(DefReg);
2468 MRI.getRegClassOrRegBank(DefReg);
2478 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2485 I.setDesc(
TII.get(TargetOpcode::PHI));
2487 return RBI.constrainGenericRegister(DefReg, *DefRC,
MRI);
2493 if (
I.isDebugInstr())
2500 if (
I.getNumOperands() !=
I.getNumExplicitOperands()) {
2502 dbgs() <<
"Generic instruction has unexpected implicit operands\n");
2509 if (preISelLower(
I)) {
2510 Opcode =
I.getOpcode();
2521 if (selectImpl(
I, *CoverageInfo))
2525 I.getOperand(0).isReg() ?
MRI.getType(
I.getOperand(0).getReg()) :
LLT{};
2528 case TargetOpcode::G_SBFX:
2529 case TargetOpcode::G_UBFX: {
2530 static const unsigned OpcTable[2][2] = {
2531 {AArch64::UBFMWri, AArch64::UBFMXri},
2532 {AArch64::SBFMWri, AArch64::SBFMXri}};
2533 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2535 unsigned Opc = OpcTable[IsSigned][
Size == 64];
2538 assert(Cst1 &&
"Should have gotten a constant for src 1?");
2541 assert(Cst2 &&
"Should have gotten a constant for src 2?");
2542 auto LSB = Cst1->Value.getZExtValue();
2543 auto Width = Cst2->Value.getZExtValue();
2545 MIB.
buildInstr(Opc, {
I.getOperand(0)}, {
I.getOperand(1)})
2547 .
addImm(LSB + Width - 1);
2548 I.eraseFromParent();
2551 case TargetOpcode::G_BRCOND:
2552 return selectCompareBranch(
I, MF,
MRI);
2554 case TargetOpcode::G_BRINDIRECT: {
2556 if (std::optional<uint16_t> BADisc =
2557 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(Fn)) {
2561 MI.addReg(AArch64::XZR);
2562 I.eraseFromParent();
2565 I.setDesc(
TII.get(AArch64::BR));
2569 case TargetOpcode::G_BRJT:
2570 return selectBrJT(
I,
MRI);
2572 case AArch64::G_ADD_LOW: {
2578 if (BaseMI->
getOpcode() != AArch64::ADRP) {
2579 I.setDesc(
TII.get(AArch64::ADDXri));
2584 "Expected small code model");
2586 auto Op2 =
I.getOperand(2);
2587 auto MovAddr = MIB.
buildInstr(AArch64::MOVaddr, {
I.getOperand(0)}, {})
2588 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2589 Op1.getTargetFlags())
2591 Op2.getTargetFlags());
2592 I.eraseFromParent();
2596 case TargetOpcode::G_FCONSTANT:
2597 case TargetOpcode::G_CONSTANT: {
2598 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2607 const Register DefReg =
I.getOperand(0).getReg();
2608 const LLT DefTy =
MRI.getType(DefReg);
2614 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2616 <<
" constant, expected: " << s16 <<
" or " << s32
2617 <<
" or " << s64 <<
" or " << s128 <<
'\n');
2621 if (RB.
getID() != AArch64::FPRRegBankID) {
2623 <<
" constant on bank: " << RB
2624 <<
", expected: FPR\n");
2632 if (DefSize != 128 &&
I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2636 if (Ty != p0 && Ty != s8 && Ty != s16) {
2638 <<
" constant, expected: " << s32 <<
", " << s64
2639 <<
", or " << p0 <<
'\n');
2643 if (RB.
getID() != AArch64::GPRRegBankID) {
2645 <<
" constant on bank: " << RB
2646 <<
", expected: GPR\n");
2663 if (TLI->isFPImmLegal(
I.getOperand(1).getFPImm()->getValueAPF(),
2670 auto *FPImm =
I.getOperand(1).getFPImm();
2673 LLVM_DEBUG(
dbgs() <<
"Failed to load double constant pool entry\n");
2677 I.eraseFromParent();
2678 return RBI.constrainGenericRegister(DefReg, FPRRC,
MRI);
2682 assert((DefSize == 32 || DefSize == 64) &&
"Unexpected const def size");
2684 const Register DefGPRReg =
MRI.createVirtualRegister(
2685 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2691 if (!RBI.constrainGenericRegister(DefReg, FPRRC,
MRI)) {
2692 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_FCONSTANT def operand\n");
2700 }
else if (
I.getOperand(1).isCImm()) {
2701 uint64_t Val =
I.getOperand(1).getCImm()->getZExtValue();
2702 I.getOperand(1).ChangeToImmediate(Val);
2703 }
else if (
I.getOperand(1).isImm()) {
2704 uint64_t Val =
I.getOperand(1).getImm();
2705 I.getOperand(1).ChangeToImmediate(Val);
2708 const unsigned MovOpc =
2709 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2710 I.setDesc(
TII.get(MovOpc));
2714 case TargetOpcode::G_EXTRACT: {
2715 Register DstReg =
I.getOperand(0).getReg();
2716 Register SrcReg =
I.getOperand(1).getReg();
2717 LLT SrcTy =
MRI.getType(SrcReg);
2718 LLT DstTy =
MRI.getType(DstReg);
2730 unsigned Offset =
I.getOperand(2).getImm();
2739 if (SrcRB.
getID() == AArch64::GPRRegBankID) {
2741 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {})
2743 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2745 AArch64::GPR64RegClass, NewI->getOperand(0));
2746 I.eraseFromParent();
2752 unsigned LaneIdx =
Offset / 64;
2754 DstReg, DstRB,
LLT::scalar(64), SrcReg, LaneIdx, MIB);
2757 I.eraseFromParent();
2761 I.setDesc(
TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2767 "unexpected G_EXTRACT types");
2774 .addReg(DstReg, 0, AArch64::sub_32);
2775 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
2776 AArch64::GPR32RegClass,
MRI);
2777 I.getOperand(0).setReg(DstReg);
2782 case TargetOpcode::G_INSERT: {
2783 LLT SrcTy =
MRI.getType(
I.getOperand(2).getReg());
2784 LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
2791 I.setDesc(
TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2792 unsigned LSB =
I.getOperand(3).getImm();
2793 unsigned Width =
MRI.getType(
I.getOperand(2).getReg()).getSizeInBits();
2794 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2799 "unexpected G_INSERT types");
2805 TII.get(AArch64::SUBREG_TO_REG))
2808 .
addUse(
I.getOperand(2).getReg())
2809 .
addImm(AArch64::sub_32);
2810 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
2811 AArch64::GPR32RegClass,
MRI);
2812 I.getOperand(2).setReg(SrcReg);
2816 case TargetOpcode::G_FRAME_INDEX: {
2823 I.setDesc(
TII.get(AArch64::ADDXri));
2832 case TargetOpcode::G_GLOBAL_VALUE: {
2835 if (
I.getOperand(1).isSymbol()) {
2836 OpFlags =
I.getOperand(1).getTargetFlags();
2840 GV =
I.getOperand(1).getGlobal();
2842 return selectTLSGlobalValue(
I,
MRI);
2843 OpFlags = STI.ClassifyGlobalReference(GV,
TM);
2847 I.setDesc(
TII.get(AArch64::LOADgot));
2848 I.getOperand(1).setTargetFlags(OpFlags);
2850 !
TM.isPositionIndependent()) {
2852 materializeLargeCMVal(
I, GV, OpFlags);
2853 I.eraseFromParent();
2856 I.setDesc(
TII.get(AArch64::ADR));
2857 I.getOperand(1).setTargetFlags(OpFlags);
2859 I.setDesc(
TII.get(AArch64::MOVaddr));
2862 MIB.addGlobalAddress(GV,
I.getOperand(1).getOffset(),
2868 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2869 return selectPtrAuthGlobalValue(
I,
MRI);
2871 case TargetOpcode::G_ZEXTLOAD:
2872 case TargetOpcode::G_LOAD:
2873 case TargetOpcode::G_STORE: {
2875 bool IsZExtLoad =
I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2889 if (Order != AtomicOrdering::NotAtomic &&
2890 Order != AtomicOrdering::Unordered &&
2891 Order != AtomicOrdering::Monotonic) {
2892 assert(!isa<GZExtLoad>(LdSt));
2893 assert(MemSizeInBytes <= 8 &&
2894 "128-bit atomics should already be custom-legalized");
2896 if (isa<GLoad>(LdSt)) {
2897 static constexpr unsigned LDAPROpcodes[] = {
2898 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2899 static constexpr unsigned LDAROpcodes[] = {
2900 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2902 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2905 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2907 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2908 AArch64::STLRW, AArch64::STLRX};
2910 if (
MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2912 Register NewVal =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2913 MIB.
buildInstr(TargetOpcode::COPY, {NewVal}, {})
2914 .addReg(
I.getOperand(0).getReg(), 0, AArch64::sub_32);
2915 I.getOperand(0).setReg(NewVal);
2917 I.setDesc(
TII.get(Opcodes[
Log2_32(MemSizeInBytes)]));
2928 "Load/Store pointer operand isn't a GPR");
2929 assert(
MRI.getType(PtrReg).isPointer() &&
2930 "Load/Store pointer operand isn't a pointer");
2934 const LLT ValTy =
MRI.getType(ValReg);
2939 if (isa<GStore>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2942 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2948 .addReg(ValReg, 0,
SubReg)
2950 RBI.constrainGenericRegister(Copy, *RC,
MRI);
2952 }
else if (isa<GLoad>(LdSt) && ValTy.
getSizeInBits() > MemSizeInBits) {
2955 if (RB.
getID() == AArch64::FPRRegBankID) {
2958 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2965 MRI.setRegBank(NewDst, RB);
2968 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2972 auto SubRegRC = getRegClassForTypeOnBank(
MRI.getType(OldDst), RB);
2973 RBI.constrainGenericRegister(OldDst, *SubRegRC,
MRI);
2980 auto SelectLoadStoreAddressingMode = [&]() ->
MachineInstr * {
2981 bool IsStore = isa<GStore>(
I);
2982 const unsigned NewOpc =
2984 if (NewOpc ==
I.getOpcode())
2988 selectAddrModeIndexed(
I.getOperand(1), MemSizeInBytes);
2991 I.setDesc(
TII.get(NewOpc));
2997 auto NewInst = MIB.
buildInstr(NewOpc, {}, {},
I.getFlags());
2998 Register CurValReg =
I.getOperand(0).getReg();
2999 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3000 NewInst.cloneMemRefs(
I);
3001 for (
auto &Fn : *AddrModeFns)
3003 I.eraseFromParent();
3012 if (Opcode == TargetOpcode::G_STORE) {
3015 if (CVal && CVal->Value == 0) {
3017 case AArch64::STRWui:
3018 case AArch64::STRHHui:
3019 case AArch64::STRBBui:
3020 LoadStore->getOperand(0).setReg(AArch64::WZR);
3022 case AArch64::STRXui:
3023 LoadStore->getOperand(0).setReg(AArch64::XZR);
3029 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3030 ValTy ==
LLT::scalar(64) && MemSizeInBits == 32)) {
3033 if (
MRI.getType(
LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3037 Register LdReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3042 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3045 .
addImm(AArch64::sub_32);
3047 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3053 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3054 case TargetOpcode::G_INDEXED_SEXTLOAD:
3055 return selectIndexedExtLoad(
I,
MRI);
3056 case TargetOpcode::G_INDEXED_LOAD:
3057 return selectIndexedLoad(
I,
MRI);
3058 case TargetOpcode::G_INDEXED_STORE:
3059 return selectIndexedStore(cast<GIndexedStore>(
I),
MRI);
3061 case TargetOpcode::G_LSHR:
3062 case TargetOpcode::G_ASHR:
3063 if (
MRI.getType(
I.getOperand(0).getReg()).isVector())
3064 return selectVectorAshrLshr(
I,
MRI);
3066 case TargetOpcode::G_SHL:
3067 if (Opcode == TargetOpcode::G_SHL &&
3068 MRI.getType(
I.getOperand(0).getReg()).isVector())
3069 return selectVectorSHL(
I,
MRI);
3076 Register SrcReg =
I.getOperand(1).getReg();
3077 Register ShiftReg =
I.getOperand(2).getReg();
3078 const LLT ShiftTy =
MRI.getType(ShiftReg);
3079 const LLT SrcTy =
MRI.getType(SrcReg);
3084 auto Trunc = MIB.
buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3085 .addReg(ShiftReg, 0, AArch64::sub_32);
3086 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3087 I.getOperand(2).setReg(Trunc.getReg(0));
3091 case TargetOpcode::G_OR: {
3098 const Register DefReg =
I.getOperand(0).getReg();
3102 if (NewOpc ==
I.getOpcode())
3105 I.setDesc(
TII.get(NewOpc));
3113 case TargetOpcode::G_PTR_ADD: {
3114 emitADD(
I.getOperand(0).getReg(),
I.getOperand(1),
I.getOperand(2), MIB);
3115 I.eraseFromParent();
3119 case TargetOpcode::G_SADDE:
3120 case TargetOpcode::G_UADDE:
3121 case TargetOpcode::G_SSUBE:
3122 case TargetOpcode::G_USUBE:
3123 case TargetOpcode::G_SADDO:
3124 case TargetOpcode::G_UADDO:
3125 case TargetOpcode::G_SSUBO:
3126 case TargetOpcode::G_USUBO:
3127 return selectOverflowOp(
I,
MRI);
3129 case TargetOpcode::G_PTRMASK: {
3130 Register MaskReg =
I.getOperand(2).getReg();
3137 I.setDesc(
TII.get(AArch64::ANDXri));
3138 I.getOperand(2).ChangeToImmediate(
3143 case TargetOpcode::G_PTRTOINT:
3144 case TargetOpcode::G_TRUNC: {
3145 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3146 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3148 const Register DstReg =
I.getOperand(0).getReg();
3149 const Register SrcReg =
I.getOperand(1).getReg();
3156 dbgs() <<
"G_TRUNC/G_PTRTOINT input/output on different banks\n");
3160 if (DstRB.
getID() == AArch64::GPRRegBankID) {
3169 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC,
MRI) ||
3170 !RBI.constrainGenericRegister(DstReg, *DstRC,
MRI)) {
3171 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_TRUNC/G_PTRTOINT\n");
3175 if (DstRC == SrcRC) {
3177 }
else if (Opcode == TargetOpcode::G_TRUNC && DstTy ==
LLT::scalar(32) &&
3181 }
else if (DstRC == &AArch64::GPR32RegClass &&
3182 SrcRC == &AArch64::GPR64RegClass) {
3183 I.getOperand(1).setSubReg(AArch64::sub_32);
3186 dbgs() <<
"Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3190 I.setDesc(
TII.get(TargetOpcode::COPY));
3192 }
else if (DstRB.
getID() == AArch64::FPRRegBankID) {
3195 I.setDesc(
TII.get(AArch64::XTNv4i16));
3205 I.eraseFromParent();
3210 if (Opcode == TargetOpcode::G_PTRTOINT) {
3211 assert(DstTy.
isVector() &&
"Expected an FPR ptrtoint to be a vector");
3212 I.setDesc(
TII.get(TargetOpcode::COPY));
3220 case TargetOpcode::G_ANYEXT: {
3221 if (selectUSMovFromExtend(
I,
MRI))
3224 const Register DstReg =
I.getOperand(0).getReg();
3225 const Register SrcReg =
I.getOperand(1).getReg();
3228 if (RBDst.
getID() != AArch64::GPRRegBankID) {
3230 <<
", expected: GPR\n");
3235 if (RBSrc.
getID() != AArch64::GPRRegBankID) {
3237 <<
", expected: GPR\n");
3241 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
3244 LLVM_DEBUG(
dbgs() <<
"G_ANYEXT operand has no size, not a gvreg?\n");
3248 if (DstSize != 64 && DstSize > 32) {
3250 <<
", expected: 32 or 64\n");
3256 Register ExtSrc =
MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3261 .
addImm(AArch64::sub_32);
3262 I.getOperand(1).setReg(ExtSrc);
3267 case TargetOpcode::G_ZEXT:
3268 case TargetOpcode::G_SEXT_INREG:
3269 case TargetOpcode::G_SEXT: {
3270 if (selectUSMovFromExtend(
I,
MRI))
3273 unsigned Opcode =
I.getOpcode();
3274 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3275 const Register DefReg =
I.getOperand(0).getReg();
3276 Register SrcReg =
I.getOperand(1).getReg();
3277 const LLT DstTy =
MRI.getType(DefReg);
3278 const LLT SrcTy =
MRI.getType(SrcReg);
3284 if (Opcode == TargetOpcode::G_SEXT_INREG)
3285 SrcSize =
I.getOperand(2).getImm();
3291 AArch64::GPRRegBankID &&
3292 "Unexpected ext regbank");
3305 RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() == AArch64::GPRRegBankID;
3306 if (LoadMI && IsGPR) {
3308 unsigned BytesLoaded =
MemOp->getSize().getValue();
3315 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3317 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3318 const Register ZReg = AArch64::WZR;
3319 MIB.
buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3322 MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3325 .
addImm(AArch64::sub_32);
3327 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3329 LLVM_DEBUG(
dbgs() <<
"Failed to constrain G_ZEXT destination\n");
3333 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3339 I.eraseFromParent();
3344 if (DstSize == 64) {
3345 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3347 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3353 SrcReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG,
3354 {&AArch64::GPR64RegClass}, {})
3361 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3365 }
else if (DstSize <= 32) {
3366 ExtI = MIB.
buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3375 I.eraseFromParent();
3379 case TargetOpcode::G_SITOFP:
3380 case TargetOpcode::G_UITOFP:
3381 case TargetOpcode::G_FPTOSI:
3382 case TargetOpcode::G_FPTOUI: {
3383 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg()),
3384 SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3386 if (NewOpc == Opcode)
3389 I.setDesc(
TII.get(NewOpc));
3396 case TargetOpcode::G_FREEZE:
3399 case TargetOpcode::G_INTTOPTR:
3404 case TargetOpcode::G_BITCAST:
3412 case TargetOpcode::G_SELECT: {
3413 auto &Sel = cast<GSelect>(
I);
3414 const Register CondReg = Sel.getCondReg();
3415 const Register TReg = Sel.getTrueReg();
3416 const Register FReg = Sel.getFalseReg();
3418 if (tryOptSelect(Sel))
3423 Register DeadVReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3424 auto TstMI = MIB.
buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3427 if (!emitSelect(Sel.getReg(0), TReg, FReg,
AArch64CC::NE, MIB))
3429 Sel.eraseFromParent();
3432 case TargetOpcode::G_ICMP: {
3445 emitIntegerCompare(
I.getOperand(2),
I.getOperand(3),
I.getOperand(1), MIB);
3446 emitCSINC(
I.getOperand(0).getReg(), AArch64::WZR,
3447 AArch64::WZR, InvCC, MIB);
3448 I.eraseFromParent();
3452 case TargetOpcode::G_FCMP: {
3455 if (!emitFPCompare(
I.getOperand(2).getReg(),
I.getOperand(3).getReg(), MIB,
3457 !emitCSetForFCmp(
I.getOperand(0).getReg(), Pred, MIB))
3459 I.eraseFromParent();
3462 case TargetOpcode::G_VASTART:
3463 return STI.isTargetDarwin() ? selectVaStartDarwin(
I, MF,
MRI)
3464 : selectVaStartAAPCS(
I, MF,
MRI);
3465 case TargetOpcode::G_INTRINSIC:
3466 return selectIntrinsic(
I,
MRI);
3467 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3468 return selectIntrinsicWithSideEffects(
I,
MRI);
3469 case TargetOpcode::G_IMPLICIT_DEF: {
3470 I.setDesc(
TII.get(TargetOpcode::IMPLICIT_DEF));
3471 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3472 const Register DstReg =
I.getOperand(0).getReg();
3475 RBI.constrainGenericRegister(DstReg, *DstRC,
MRI);
3478 case TargetOpcode::G_BLOCK_ADDR: {
3479 Function *BAFn =
I.getOperand(1).getBlockAddress()->getFunction();
3480 if (std::optional<uint16_t> BADisc =
3481 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(*BAFn)) {
3482 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3483 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3491 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
3492 AArch64::GPR64RegClass,
MRI);
3493 I.eraseFromParent();
3497 materializeLargeCMVal(
I,
I.getOperand(1).getBlockAddress(), 0);
3498 I.eraseFromParent();
3501 I.setDesc(
TII.get(AArch64::MOVaddrBA));
3502 auto MovMI =
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(AArch64::MOVaddrBA),
3503 I.getOperand(0).getReg())
3507 I.getOperand(1).getBlockAddress(), 0,
3509 I.eraseFromParent();
3513 case AArch64::G_DUP: {
3519 if (RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
3520 AArch64::GPRRegBankID)
3522 LLT VecTy =
MRI.getType(
I.getOperand(0).getReg());
3524 I.setDesc(
TII.get(AArch64::DUPv8i8gpr));
3526 I.setDesc(
TII.get(AArch64::DUPv16i8gpr));
3528 I.setDesc(
TII.get(AArch64::DUPv4i16gpr));
3530 I.setDesc(
TII.get(AArch64::DUPv8i16gpr));
3535 case TargetOpcode::G_BUILD_VECTOR:
3536 return selectBuildVector(
I,
MRI);
3537 case TargetOpcode::G_MERGE_VALUES:
3539 case TargetOpcode::G_UNMERGE_VALUES:
3541 case TargetOpcode::G_SHUFFLE_VECTOR:
3542 return selectShuffleVector(
I,
MRI);
3543 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3544 return selectExtractElt(
I,
MRI);
3545 case TargetOpcode::G_CONCAT_VECTORS:
3546 return selectConcatVectors(
I,
MRI);
3547 case TargetOpcode::G_JUMP_TABLE:
3548 return selectJumpTable(
I,
MRI);
3549 case TargetOpcode::G_MEMCPY:
3550 case TargetOpcode::G_MEMCPY_INLINE:
3551 case TargetOpcode::G_MEMMOVE:
3552 case TargetOpcode::G_MEMSET:
3553 assert(STI.hasMOPS() &&
"Shouldn't get here without +mops feature");
3554 return selectMOPS(
I,
MRI);
3560bool AArch64InstructionSelector::selectAndRestoreState(
MachineInstr &
I) {
3567bool AArch64InstructionSelector::selectMOPS(
MachineInstr &GI,
3571 case TargetOpcode::G_MEMCPY:
3572 case TargetOpcode::G_MEMCPY_INLINE:
3573 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3575 case TargetOpcode::G_MEMMOVE:
3576 Mopcode = AArch64::MOPSMemoryMovePseudo;
3578 case TargetOpcode::G_MEMSET:
3580 Mopcode = AArch64::MOPSMemorySetPseudo;
3589 const Register DstPtrCopy =
MRI.cloneVirtualRegister(DstPtr.getReg());
3590 const Register SrcValCopy =
MRI.cloneVirtualRegister(SrcOrVal.getReg());
3593 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3594 const auto &SrcValRegClass =
3595 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3598 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass,
MRI);
3599 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass,
MRI);
3600 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass,
MRI);
3610 Register DefDstPtr =
MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3611 Register DefSize =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3613 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSize},
3614 {DstPtrCopy, SizeCopy, SrcValCopy});
3616 Register DefSrcPtr =
MRI.createVirtualRegister(&SrcValRegClass);
3617 MIB.
buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3618 {DstPtrCopy, SrcValCopy, SizeCopy});
3627 assert(
I.getOpcode() == TargetOpcode::G_BRJT &&
"Expected G_BRJT");
3628 Register JTAddr =
I.getOperand(0).getReg();
3629 unsigned JTI =
I.getOperand(1).getIndex();
3638 if (STI.isTargetMachO()) {
3643 assert(STI.isTargetELF() &&
3644 "jump table hardening only supported on MachO/ELF");
3652 I.eraseFromParent();
3656 Register TargetReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3657 Register ScratchReg =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3659 auto JumpTableInst = MIB.
buildInstr(AArch64::JumpTableDest32,
3660 {TargetReg, ScratchReg}, {JTAddr,
Index})
3661 .addJumpTableIndex(JTI);
3663 MIB.
buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3664 {
static_cast<int64_t
>(JTI)});
3666 MIB.
buildInstr(AArch64::BR, {}, {TargetReg});
3667 I.eraseFromParent();
3671bool AArch64InstructionSelector::selectJumpTable(
MachineInstr &
I,
3673 assert(
I.getOpcode() == TargetOpcode::G_JUMP_TABLE &&
"Expected jump table");
3674 assert(
I.getOperand(1).isJTI() &&
"Jump table op should have a JTI!");
3676 Register DstReg =
I.getOperand(0).getReg();
3677 unsigned JTI =
I.getOperand(1).getIndex();
3680 MIB.
buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3683 I.eraseFromParent();
3687bool AArch64InstructionSelector::selectTLSGlobalValue(
3689 if (!STI.isTargetMachO())
3694 const auto &GlobalOp =
I.getOperand(1);
3695 assert(GlobalOp.getOffset() == 0 &&
3696 "Shouldn't have an offset on TLS globals!");
3700 MIB.
buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3703 auto Load = MIB.
buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3704 {LoadGOT.getReg(0)})
3715 assert(Opcode == AArch64::BLR);
3716 Opcode = AArch64::BLRAAZ;
3725 RBI.constrainGenericRegister(
I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3727 I.eraseFromParent();
3731MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3734 auto Undef = MIRBuilder.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3736 auto BuildFn = [&](
unsigned SubregIndex) {
3740 .addImm(SubregIndex);
3748 return BuildFn(AArch64::bsub);
3750 return BuildFn(AArch64::hsub);
3752 return BuildFn(AArch64::ssub);
3754 return BuildFn(AArch64::dsub);
3761AArch64InstructionSelector::emitNarrowVector(
Register DstReg,
Register SrcReg,
3764 LLT DstTy =
MRI.getType(DstReg);
3766 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg,
MRI,
TRI));
3767 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3774 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
3780 .addReg(SrcReg, 0,
SubReg);
3781 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
3785bool AArch64InstructionSelector::selectMergeValues(
3787 assert(
I.getOpcode() == TargetOpcode::G_MERGE_VALUES &&
"unexpected opcode");
3788 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
3789 const LLT SrcTy =
MRI.getType(
I.getOperand(1).getReg());
3793 if (
I.getNumOperands() != 3)
3800 Register DstReg =
I.getOperand(0).getReg();
3801 Register Src1Reg =
I.getOperand(1).getReg();
3802 Register Src2Reg =
I.getOperand(2).getReg();
3803 auto Tmp = MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3804 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3809 Src2Reg, 1, RB, MIB);
3814 I.eraseFromParent();
3818 if (RB.
getID() != AArch64::GPRRegBankID)
3824 auto *DstRC = &AArch64::GPR64RegClass;
3825 Register SubToRegDef =
MRI.createVirtualRegister(DstRC);
3827 TII.get(TargetOpcode::SUBREG_TO_REG))
3830 .
addUse(
I.getOperand(1).getReg())
3831 .
addImm(AArch64::sub_32);
3832 Register SubToRegDef2 =
MRI.createVirtualRegister(DstRC);
3835 TII.get(TargetOpcode::SUBREG_TO_REG))
3838 .
addUse(
I.getOperand(2).getReg())
3839 .
addImm(AArch64::sub_32);
3841 *
BuildMI(*
I.getParent(),
I,
I.getDebugLoc(),
TII.get(AArch64::BFMXri))
3842 .
addDef(
I.getOperand(0).getReg())
3850 I.eraseFromParent();
3855 const unsigned EltSize) {
3860 CopyOpc = AArch64::DUPi8;
3861 ExtractSubReg = AArch64::bsub;
3864 CopyOpc = AArch64::DUPi16;
3865 ExtractSubReg = AArch64::hsub;
3868 CopyOpc = AArch64::DUPi32;
3869 ExtractSubReg = AArch64::ssub;
3872 CopyOpc = AArch64::DUPi64;
3873 ExtractSubReg = AArch64::dsub;
3877 LLVM_DEBUG(
dbgs() <<
"Elt size '" << EltSize <<
"' unsupported.\n");
3883MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3884 std::optional<Register> DstReg,
const RegisterBank &DstRB,
LLT ScalarTy,
3887 unsigned CopyOpc = 0;
3888 unsigned ExtractSubReg = 0;
3891 dbgs() <<
"Couldn't determine lane copy opcode for instruction.\n");
3896 getRegClassForTypeOnBank(ScalarTy, DstRB,
true);
3898 LLVM_DEBUG(
dbgs() <<
"Could not determine destination register class.\n");
3903 const LLT &VecTy =
MRI.getType(VecReg);
3905 getRegClassForTypeOnBank(VecTy, VecRB,
true);
3907 LLVM_DEBUG(
dbgs() <<
"Could not determine source register class.\n");
3914 DstReg =
MRI.createVirtualRegister(DstRC);
3917 auto Copy = MIRBuilder.
buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3918 .addReg(VecReg, 0, ExtractSubReg);
3919 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3928 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3929 if (!ScalarToVector)
3935 MIRBuilder.
buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3939 RBI.constrainGenericRegister(*DstReg, *DstRC,
MRI);
3943bool AArch64InstructionSelector::selectExtractElt(
3945 assert(
I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3946 "unexpected opcode!");
3947 Register DstReg =
I.getOperand(0).getReg();
3948 const LLT NarrowTy =
MRI.getType(DstReg);
3949 const Register SrcReg =
I.getOperand(1).getReg();
3950 const LLT WideTy =
MRI.getType(SrcReg);
3953 "source register size too small!");
3954 assert(!NarrowTy.
isVector() &&
"cannot extract vector into vector!");
3958 assert(LaneIdxOp.
isReg() &&
"Lane index operand was not a register?");
3960 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
3969 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
3973 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3978 I.eraseFromParent();
3982bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3984 unsigned NumElts =
I.getNumOperands() - 1;
3985 Register SrcReg =
I.getOperand(NumElts).getReg();
3986 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
3987 const LLT SrcTy =
MRI.getType(SrcReg);
3989 assert(NarrowTy.
isVector() &&
"Expected an unmerge into vectors");
3991 LLVM_DEBUG(
dbgs() <<
"Unexpected vector type for vec split unmerge");
3998 *RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI);
3999 for (
unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4000 Register Dst =
I.getOperand(OpIdx).getReg();
4002 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4006 I.eraseFromParent();
4010bool AArch64InstructionSelector::selectUnmergeValues(
MachineInstr &
I,
4012 assert(
I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4013 "unexpected opcode");
4016 if (RBI.getRegBank(
I.getOperand(0).getReg(),
MRI,
TRI)->getID() !=
4017 AArch64::FPRRegBankID ||
4018 RBI.getRegBank(
I.getOperand(1).getReg(),
MRI,
TRI)->getID() !=
4019 AArch64::FPRRegBankID) {
4020 LLVM_DEBUG(
dbgs() <<
"Unmerging vector-to-gpr and scalar-to-scalar "
4021 "currently unsupported.\n");
4027 unsigned NumElts =
I.getNumOperands() - 1;
4028 Register SrcReg =
I.getOperand(NumElts).getReg();
4029 const LLT NarrowTy =
MRI.getType(
I.getOperand(0).getReg());
4030 const LLT WideTy =
MRI.getType(SrcReg);
4033 "can only unmerge from vector or s128 types!");
4035 "source register size too small!");
4038 return selectSplitVectorUnmerge(
I,
MRI);
4042 unsigned CopyOpc = 0;
4043 unsigned ExtractSubReg = 0;
4054 unsigned NumInsertRegs = NumElts - 1;
4066 *RBI.getRegBank(SrcReg,
MRI,
TRI));
4070 assert(Found &&
"expected to find last operand's subeg idx");
4071 for (
unsigned Idx = 0;
Idx < NumInsertRegs; ++
Idx) {
4072 Register ImpDefReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4074 *
BuildMI(
MBB,
I,
I.getDebugLoc(),
TII.get(TargetOpcode::IMPLICIT_DEF),
4078 Register InsertReg =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4081 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4098 Register CopyTo =
I.getOperand(0).getReg();
4099 auto FirstCopy = MIB.
buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4100 .addReg(InsertRegs[0], 0, ExtractSubReg);
4104 unsigned LaneIdx = 1;
4105 for (
Register InsReg : InsertRegs) {
4106 Register CopyTo =
I.getOperand(LaneIdx).getReg();
4119 MRI.getRegClassOrNull(
I.getOperand(1).getReg());
4125 RBI.constrainGenericRegister(CopyTo, *RC,
MRI);
4126 I.eraseFromParent();
4130bool AArch64InstructionSelector::selectConcatVectors(
4132 assert(
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4133 "Unexpected opcode");
4134 Register Dst =
I.getOperand(0).getReg();
4135 Register Op1 =
I.getOperand(1).getReg();
4136 Register Op2 =
I.getOperand(2).getReg();
4137 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4140 I.eraseFromParent();
4145AArch64InstructionSelector::emitConstantPoolEntry(
const Constant *CPVal,
4154MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4162 RC = &AArch64::FPR128RegClass;
4163 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4166 RC = &AArch64::FPR64RegClass;
4167 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4170 RC = &AArch64::FPR32RegClass;
4171 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4174 RC = &AArch64::FPR16RegClass;
4175 Opc = AArch64::LDRHui;
4178 LLVM_DEBUG(
dbgs() <<
"Could not load from constant pool of type "
4184 auto &MF = MIRBuilder.
getMF();
4185 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4186 if (IsTiny && (
Size == 16 ||
Size == 8 ||
Size == 4)) {
4188 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4191 MIRBuilder.
buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4194 LoadMI = &*MIRBuilder.
buildInstr(Opc, {RC}, {Adrp})
4195 .addConstantPoolIndex(
4211static std::pair<unsigned, unsigned>
4213 unsigned Opc, SubregIdx;
4214 if (RB.
getID() == AArch64::GPRRegBankID) {
4216 Opc = AArch64::INSvi8gpr;
4217 SubregIdx = AArch64::bsub;
4218 }
else if (EltSize == 16) {
4219 Opc = AArch64::INSvi16gpr;
4220 SubregIdx = AArch64::ssub;
4221 }
else if (EltSize == 32) {
4222 Opc = AArch64::INSvi32gpr;
4223 SubregIdx = AArch64::ssub;
4224 }
else if (EltSize == 64) {
4225 Opc = AArch64::INSvi64gpr;
4226 SubregIdx = AArch64::dsub;
4232 Opc = AArch64::INSvi8lane;
4233 SubregIdx = AArch64::bsub;
4234 }
else if (EltSize == 16) {
4235 Opc = AArch64::INSvi16lane;
4236 SubregIdx = AArch64::hsub;
4237 }
else if (EltSize == 32) {
4238 Opc = AArch64::INSvi32lane;
4239 SubregIdx = AArch64::ssub;
4240 }
else if (EltSize == 64) {
4241 Opc = AArch64::INSvi64lane;
4242 SubregIdx = AArch64::dsub;
4247 return std::make_pair(Opc, SubregIdx);
4251 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4253 const ComplexRendererFns &RenderFns)
const {
4254 assert(Opcode &&
"Expected an opcode?");
4256 "Function should only be used to produce selected instructions!");
4257 auto MI = MIRBuilder.
buildInstr(Opcode, DstOps, SrcOps);
4259 for (
auto &Fn : *RenderFns)
4266 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4270 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4271 auto Ty =
MRI.getType(
LHS.getReg());
4274 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit type only");
4275 bool Is32Bit =
Size == 32;
4278 if (
auto Fns = selectArithImmed(RHS))
4279 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {
LHS},
4283 if (
auto Fns = selectNegArithImmed(RHS))
4284 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {
LHS},
4288 if (
auto Fns = selectArithExtendedRegister(RHS))
4289 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {
LHS},
4293 if (
auto Fns = selectShiftedRegister(RHS))
4294 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {
LHS},
4296 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {
LHS,
RHS},
4304 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4305 {{AArch64::ADDXri, AArch64::ADDWri},
4306 {AArch64::ADDXrs, AArch64::ADDWrs},
4307 {AArch64::ADDXrr, AArch64::ADDWrr},
4308 {AArch64::SUBXri, AArch64::SUBWri},
4309 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4310 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4317 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4318 {{AArch64::ADDSXri, AArch64::ADDSWri},
4319 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4320 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4321 {AArch64::SUBSXri, AArch64::SUBSWri},
4322 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4323 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4330 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4331 {{AArch64::SUBSXri, AArch64::SUBSWri},
4332 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4333 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4334 {AArch64::ADDSXri, AArch64::ADDSWri},
4335 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4336 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4343 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4345 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4346 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4347 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4354 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4356 bool Is32Bit = (
MRI->getType(
LHS.getReg()).getSizeInBits() == 32);
4357 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4358 return emitInstr(OpcTable[Is32Bit], {Dst}, {
LHS,
RHS}, MIRBuilder);
4365 bool Is32Bit = (
MRI.getType(
LHS.getReg()).getSizeInBits() == 32);
4366 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4367 return emitADDS(
MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4373 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected register operands?");
4377 bool Is32Bit = (
RegSize == 32);
4378 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4379 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4380 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4384 int64_t
Imm = ValAndVReg->Value.getSExtValue();
4387 auto TstMI = MIRBuilder.
buildInstr(OpcTable[0][Is32Bit], {Ty}, {
LHS});
4394 if (
auto Fns = selectLogicalShiftedRegister(RHS))
4395 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {
LHS}, MIRBuilder, Fns);
4396 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {
LHS,
RHS}, MIRBuilder);
4399MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4402 assert(
LHS.isReg() &&
RHS.isReg() &&
"Expected LHS and RHS to be registers!");
4409 assert((
Size == 32 ||
Size == 64) &&
"Expected a 32-bit or 64-bit LHS/RHS?");
4411 if (
auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4413 auto Dst =
MRI.cloneVirtualRegister(
LHS.getReg());
4414 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4417MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4421 LLT Ty =
MRI.getType(Dst);
4423 "Expected a 32-bit scalar register?");
4425 const Register ZReg = AArch64::WZR;
4430 return emitCSINC(Dst, ZReg, ZReg, InvCC1,
4436 emitCSINC(Def1Reg, ZReg, ZReg, InvCC1, MIRBuilder);
4437 emitCSINC(Def2Reg, ZReg, ZReg, InvCC2, MIRBuilder);
4438 auto OrMI = MIRBuilder.
buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4443MachineInstr *AArch64InstructionSelector::emitFPCompare(
4445 std::optional<CmpInst::Predicate> Pred)
const {
4447 LLT Ty =
MRI.getType(LHS);
4451 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4462 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4466 ShouldUseImm =
true;
4470 unsigned CmpOpcTbl[2][3] = {
4471 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4472 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4474 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4486MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4495 const LLT Op1Ty =
MRI.getType(Op1);
4496 const LLT Op2Ty =
MRI.getType(Op2);
4498 if (Op1Ty != Op2Ty) {
4499 LLVM_DEBUG(
dbgs() <<
"Could not do vector concat of differing vector tys");
4502 assert(Op1Ty.
isVector() &&
"Expected a vector for vector concat");
4505 LLVM_DEBUG(
dbgs() <<
"Vector concat not supported for full size vectors");
4521 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op1, MIRBuilder);
4523 emitScalarToVector(ScalarTy.
getSizeInBits(), DstRC, Op2, MIRBuilder);
4524 if (!WidenedOp1 || !WidenedOp2) {
4525 LLVM_DEBUG(
dbgs() <<
"Could not emit a vector from scalar value");
4530 unsigned InsertOpc, InsSubRegIdx;
4531 std::tie(InsertOpc, InsSubRegIdx) =
4535 Dst =
MRI.createVirtualRegister(DstRC);
4556 Size =
TRI.getRegSizeInBits(*RC);
4558 Size =
MRI.getType(Dst).getSizeInBits();
4560 assert(
Size <= 64 &&
"Expected 64 bits or less only!");
4561 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4562 unsigned Opc = OpcTable[
Size == 64];
4563 auto CSINC = MIRBuilder.
buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4571 unsigned Opcode =
I.getOpcode();
4575 bool NeedsNegatedCarry =
4576 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4586 if (SrcMI ==
I.getPrevNode()) {
4587 if (
auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4588 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4589 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4590 CarrySrcMI->isUnsigned() &&
4591 CarrySrcMI->getCarryOutReg() == CarryReg &&
4592 selectAndRestoreState(*SrcMI))
4597 Register DeadReg =
MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4599 if (NeedsNegatedCarry) {
4602 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4606 auto Fns = select12BitValueWithLeftShift(1);
4607 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4610bool AArch64InstructionSelector::selectOverflowOp(
MachineInstr &
I,
4612 auto &CarryMI = cast<GAddSubCarryOut>(
I);
4614 if (
auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&
I)) {
4616 emitCarryIn(
I, CarryInMI->getCarryInReg());
4620 auto OpAndCC = emitOverflowOp(
I.getOpcode(), CarryMI.getDstReg(),
4621 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4623 Register CarryOutReg = CarryMI.getCarryOutReg();
4626 if (!
MRI.use_nodbg_empty(CarryOutReg)) {
4632 emitCSINC(CarryOutReg, ZReg, ZReg,
4633 getInvertedCondCode(OpAndCC.second), MIB);
4636 I.eraseFromParent();
4640std::pair<MachineInstr *, AArch64CC::CondCode>
4641AArch64InstructionSelector::emitOverflowOp(
unsigned Opcode,
Register Dst,
4648 case TargetOpcode::G_SADDO:
4649 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4650 case TargetOpcode::G_UADDO:
4651 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4652 case TargetOpcode::G_SSUBO:
4653 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4654 case TargetOpcode::G_USUBO:
4655 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4656 case TargetOpcode::G_SADDE:
4657 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4658 case TargetOpcode::G_UADDE:
4659 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::HS);
4660 case TargetOpcode::G_SSUBE:
4661 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::VS);
4662 case TargetOpcode::G_USUBE:
4663 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder),
AArch64CC::LO);
4683 unsigned Depth = 0) {
4684 if (!
MRI.hasOneNonDBGUse(Val))
4688 if (isa<GAnyCmp>(ValDef)) {
4690 MustBeFirst =
false;
4696 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4697 bool IsOR = Opcode == TargetOpcode::G_OR;
4709 if (MustBeFirstL && MustBeFirstR)
4715 if (!CanNegateL && !CanNegateR)
4719 CanNegate = WillNegate && CanNegateL && CanNegateR;
4722 MustBeFirst = !CanNegate;
4724 assert(Opcode == TargetOpcode::G_AND &&
"Must be G_AND");
4727 MustBeFirst = MustBeFirstL || MustBeFirstR;
4734MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4739 LLT OpTy =
MRI.getType(LHS);
4741 std::optional<ValueAndVReg>
C;
4745 if (!
C ||
C->Value.sgt(31) ||
C->Value.slt(-31))
4746 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4747 else if (
C->Value.ule(31))
4748 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4750 CCmpOpc = OpTy.
getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4756 assert(STI.hasFullFP16() &&
"Expected Full FP16 for fp16 comparisons");
4757 CCmpOpc = AArch64::FCCMPHrr;
4760 CCmpOpc = AArch64::FCCMPSrr;
4763 CCmpOpc = AArch64::FCCMPDrr;
4773 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4774 CCmp.
addImm(
C->Value.getZExtValue());
4775 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4776 CCmp.
addImm(
C->Value.abs().getZExtValue());
4784MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4791 if (
auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4797 if (isa<GICmp>(Cmp)) {
4808 ExtraCmp = emitFPCompare(LHS, RHS, MIB,
CC);
4819 auto Dst =
MRI.cloneVirtualRegister(LHS);
4820 if (isa<GICmp>(Cmp))
4821 return emitSUBS(Dst,
Cmp->getOperand(2),
Cmp->getOperand(3), MIB);
4822 return emitFPCompare(
Cmp->getOperand(2).getReg(),
4823 Cmp->getOperand(3).getReg(), MIB);
4828 assert(
MRI.hasOneNonDBGUse(Val) &&
"Valid conjunction/disjunction tree");
4830 bool IsOR = Opcode == TargetOpcode::G_OR;
4836 assert(ValidL &&
"Valid conjunction/disjunction tree");
4843 assert(ValidR &&
"Valid conjunction/disjunction tree");
4848 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
4857 bool NegateAfterAll;
4858 if (Opcode == TargetOpcode::G_OR) {
4861 assert(CanNegateR &&
"at least one side must be negatable");
4862 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
4866 NegateAfterR =
true;
4869 NegateR = CanNegateR;
4870 NegateAfterR = !CanNegateR;
4873 NegateAfterAll = !Negate;
4875 assert(Opcode == TargetOpcode::G_AND &&
4876 "Valid conjunction/disjunction tree");
4877 assert(!Negate &&
"Valid conjunction/disjunction tree");
4881 NegateAfterR =
false;
4882 NegateAfterAll =
false;
4898MachineInstr *AArch64InstructionSelector::emitConjunction(
4900 bool DummyCanNegate;
4901 bool DummyMustBeFirst;
4908bool AArch64InstructionSelector::tryOptSelectConjunction(
GSelect &SelI,
4920bool AArch64InstructionSelector::tryOptSelect(
GSelect &
I) {
4944 if (!
MRI.hasOneNonDBGUse(CondDefReg)) {
4946 for (
const MachineInstr &UI :
MRI.use_nodbg_instructions(CondDefReg)) {
4949 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4955 unsigned CondOpc = CondDef->
getOpcode();
4956 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4957 if (tryOptSelectConjunction(
I, *CondDef))
4963 if (CondOpc == TargetOpcode::G_ICMP) {
4991 emitSelect(
I.getOperand(0).getReg(),
I.getOperand(2).getReg(),
4992 I.getOperand(3).getReg(), CondCode, MIB);
4993 I.eraseFromParent();
4997MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5001 "Unexpected MachineOperand");
5038 return emitCMN(LHS, RHSDef->
getOperand(2), MIRBuilder);
5049 LHSDef->
getOpcode() == TargetOpcode::G_AND) {
5052 if (!ValAndVReg || ValAndVReg->Value != 0)
5062bool AArch64InstructionSelector::selectShuffleVector(
5064 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5065 Register Src1Reg =
I.getOperand(1).getReg();
5066 const LLT Src1Ty =
MRI.getType(Src1Reg);
5067 Register Src2Reg =
I.getOperand(2).getReg();
5068 const LLT Src2Ty =
MRI.getType(Src2Reg);
5079 LLVM_DEBUG(
dbgs() <<
"Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5086 for (
int Val : Mask) {
5089 Val = Val < 0 ? 0 : Val;
5090 for (
unsigned Byte = 0;
Byte < BytesPerElt; ++
Byte) {
5108 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5115 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5119 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5125 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
5126 RBI.constrainGenericRegister(
Copy.getReg(0), AArch64::FPR64RegClass,
MRI);
5127 I.eraseFromParent();
5135 auto TBL2 = MIB.
buildInstr(AArch64::TBLv16i8Two, {
I.getOperand(0)},
5138 I.eraseFromParent();
5142MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5152 DstReg =
MRI.createVirtualRegister(DstRC);
5154 unsigned EltSize =
MRI.getType(EltReg).getSizeInBits();
5157 if (RB.
getID() == AArch64::FPRRegBankID) {
5158 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5159 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5161 .
addUse(InsSub->getOperand(0).getReg())
5164 InsElt = MIRBuilder.
buildInstr(Opc, {*DstReg}, {SrcReg})
5173bool AArch64InstructionSelector::selectUSMovFromExtend(
5175 if (
MI.getOpcode() != TargetOpcode::G_SEXT &&
5176 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5177 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5179 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SEXT;
5180 const Register DefReg =
MI.getOperand(0).getReg();
5181 const LLT DstTy =
MRI.getType(DefReg);
5184 if (DstSize != 32 && DstSize != 64)
5188 MI.getOperand(1).getReg(),
MRI);
5194 const LLT &VecTy =
MRI.getType(Src0);
5197 const MachineInstr *ScalarToVector = emitScalarToVector(
5198 VecTy.
getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5199 assert(ScalarToVector &&
"Didn't expect emitScalarToVector to fail!");
5205 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5207 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5209 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5211 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5213 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5222 if (DstSize == 64 && !IsSigned) {
5223 Register NewReg =
MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5224 MIB.
buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5225 ExtI = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5228 .
addImm(AArch64::sub_32);
5229 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
5231 ExtI = MIB.
buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5234 MI.eraseFromParent();
5238MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5241 if (DstSize == 128) {
5242 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5244 Op = AArch64::MOVIv16b_ns;
5246 Op = AArch64::MOVIv8b_ns;
5253 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5260MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5265 if (DstSize == 128) {
5266 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5268 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5270 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5290MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5295 if (DstSize == 128) {
5296 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5298 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5300 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5326MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5330 if (DstSize == 128) {
5331 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5333 Op = AArch64::MOVIv2d_ns;
5335 Op = AArch64::MOVID;
5341 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5348MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5353 if (DstSize == 128) {
5354 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5356 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5358 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5378MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5382 bool IsWide =
false;
5383 if (DstSize == 128) {
5384 if (
Bits.getHiBits(64) !=
Bits.getLoBits(64))
5386 Op = AArch64::FMOVv4f32_ns;
5389 Op = AArch64::FMOVv2f32_ns;
5398 Op = AArch64::FMOVv2f64_ns;
5402 auto Mov = Builder.
buildInstr(
Op, {Dst}, {}).addImm(Val);
5407bool AArch64InstructionSelector::selectIndexedExtLoad(
5409 auto &ExtLd = cast<GIndexedAnyExtLoad>(
MI);
5411 Register WriteBack = ExtLd.getWritebackReg();
5414 LLT Ty =
MRI.getType(Dst);
5416 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5417 bool IsPre = ExtLd.isPre();
5418 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5419 bool InsertIntoXReg =
false;
5427 if (MemSizeBits == 8) {
5430 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5432 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5433 NewLdDstTy = IsDst64 ? s64 : s32;
5435 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5436 InsertIntoXReg = IsDst64;
5439 }
else if (MemSizeBits == 16) {
5442 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5444 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5445 NewLdDstTy = IsDst64 ? s64 : s32;
5447 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5448 InsertIntoXReg = IsDst64;
5451 }
else if (MemSizeBits == 32) {
5453 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5456 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5457 InsertIntoXReg = IsDst64;
5464 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5472 .addImm(Cst->getSExtValue());
5477 if (InsertIntoXReg) {
5479 auto SubToReg = MIB.
buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5482 .
addImm(AArch64::sub_32);
5483 RBI.constrainGenericRegister(SubToReg.getReg(0), AArch64::GPR64RegClass,
5489 MI.eraseFromParent();
5494bool AArch64InstructionSelector::selectIndexedLoad(
MachineInstr &
MI,
5496 auto &Ld = cast<GIndexedLoad>(
MI);
5498 Register WriteBack = Ld.getWritebackReg();
5501 assert(
MRI.getType(Dst).getSizeInBits() <= 128 &&
5502 "Unexpected type for indexed load");
5503 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5505 if (MemSize <
MRI.getType(Dst).getSizeInBytes())
5506 return selectIndexedExtLoad(
MI,
MRI);
5510 static constexpr unsigned GPROpcodes[] = {
5511 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5513 static constexpr unsigned FPROpcodes[] = {
5514 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5516 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5517 Opc = FPROpcodes[
Log2_32(MemSize)];
5519 Opc = GPROpcodes[
Log2_32(MemSize)];
5521 static constexpr unsigned GPROpcodes[] = {
5522 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5524 static constexpr unsigned FPROpcodes[] = {
5525 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5526 AArch64::LDRDpost, AArch64::LDRQpost};
5527 if (RBI.getRegBank(Dst,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5528 Opc = FPROpcodes[
Log2_32(MemSize)];
5530 Opc = GPROpcodes[
Log2_32(MemSize)];
5536 MIB.
buildInstr(Opc, {WriteBack, Dst}, {
Base}).addImm(Cst->getSExtValue());
5539 MI.eraseFromParent();
5543bool AArch64InstructionSelector::selectIndexedStore(
GIndexedStore &
I,
5549 LLT ValTy =
MRI.getType(Val);
5554 static constexpr unsigned GPROpcodes[] = {
5555 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5557 static constexpr unsigned FPROpcodes[] = {
5558 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5561 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5566 static constexpr unsigned GPROpcodes[] = {
5567 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5569 static constexpr unsigned FPROpcodes[] = {
5570 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5571 AArch64::STRDpost, AArch64::STRQpost};
5573 if (RBI.getRegBank(Val,
MRI,
TRI)->getID() == AArch64::FPRRegBankID)
5583 MIB.
buildInstr(Opc, {Dst}, {Val,
Base}).addImm(Cst->getSExtValue());
5584 Str.cloneMemRefs(
I);
5586 I.eraseFromParent();
5594 LLT DstTy =
MRI.getType(Dst);
5597 if (DstSize == 128) {
5599 MIRBuilder.
buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5604 if (DstSize == 64) {
5607 .
buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5610 .addReg(Mov.getReg(0), 0, AArch64::dsub);
5611 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass,
MRI);
5643 if (
auto *NewOp = TryMOVIWithBits(DefBits))
5647 auto TryWithFNeg = [&](
APInt DefBits,
int NumBits,
5651 APInt NegBits(DstSize, 0);
5652 unsigned NumElts = DstSize / NumBits;
5653 for (
unsigned i = 0; i < NumElts; i++)
5654 NegBits |= Neg << (NumBits * i);
5655 NegBits = DefBits ^ NegBits;
5659 if (
auto *NewOp = TryMOVIWithBits(NegBits)) {
5660 Register NewDst =
MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5662 return MIRBuilder.
buildInstr(NegOpc, {Dst}, {NewDst});
5667 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5668 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5669 (STI.hasFullFP16() &&
5670 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5676 LLVM_DEBUG(
dbgs() <<
"Could not generate cp load for constant vector!");
5680 auto Copy = MIRBuilder.
buildCopy(Dst, CPLoad->getOperand(0));
5681 RBI.constrainGenericRegister(
5682 Dst, *
MRI.getRegClass(CPLoad->getOperand(0).getReg()),
MRI);
5686bool AArch64InstructionSelector::tryOptConstantBuildVec(
5688 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5690 assert(DstSize <= 128 &&
"Unexpected build_vec type!");
5696 for (
unsigned Idx = 1;
Idx <
I.getNumOperands(); ++
Idx) {
5702 const_cast<ConstantInt *
>(OpMI->getOperand(1).getCImm()));
5703 else if ((OpMI =
getOpcodeDef(TargetOpcode::G_FCONSTANT,
5704 I.getOperand(
Idx).getReg(),
MRI)))
5706 const_cast<ConstantFP *
>(OpMI->getOperand(1).getFPImm()));
5711 if (!emitConstantVector(
I.getOperand(0).getReg(), CV, MIB,
MRI))
5713 I.eraseFromParent();
5717bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5723 Register Dst =
I.getOperand(0).getReg();
5724 Register EltReg =
I.getOperand(1).getReg();
5725 LLT EltTy =
MRI.getType(EltReg);
5733 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5741 getRegClassForTypeOnBank(
MRI.getType(Dst), DstRB);
5746 auto SubregToReg = MIB.
buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5750 I.eraseFromParent();
5752 return RBI.constrainGenericRegister(Dst, *DstRC,
MRI);
5755bool AArch64InstructionSelector::selectBuildVector(
MachineInstr &
I,
5757 assert(
I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5760 const LLT DstTy =
MRI.getType(
I.getOperand(0).getReg());
5761 const LLT EltTy =
MRI.getType(
I.getOperand(1).getReg());
5764 if (tryOptConstantBuildVec(
I, DstTy,
MRI))
5766 if (tryOptBuildVecToSubregToReg(
I,
MRI))
5769 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5776 I.getOperand(1).getReg(), MIB);
5786 for (
unsigned i = 2, e = DstSize / EltSize + 1; i <
e; ++i) {
5789 Register OpReg =
I.getOperand(i).getReg();
5791 if (!getOpcodeDef<GImplicitDef>(OpReg,
MRI)) {
5792 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5799 if (DstSize < 128) {
5802 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5805 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5813 if (
SubReg != AArch64::ssub &&
SubReg != AArch64::dsub) {
5814 LLVM_DEBUG(
dbgs() <<
"Unsupported destination size! (" << DstSize
5820 Register DstReg =
I.getOperand(0).getReg();
5822 MIB.
buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0,
SubReg);
5825 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5843 if (PrevMI == ScalarToVec && DstReg.
isVirtual()) {
5845 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec,
MRI,
TRI));
5846 RBI.constrainGenericRegister(DstReg, *RC,
MRI);
5850 I.eraseFromParent();
5854bool AArch64InstructionSelector::selectVectorLoadIntrinsic(
unsigned Opc,
5857 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5858 assert(Opc &&
"Expected an opcode?");
5859 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5861 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5864 "Destination must be 64 bits or 128 bits?");
5865 unsigned SubReg =
Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5866 auto Ptr =
I.getOperand(
I.getNumOperands() - 1).getReg();
5867 assert(
MRI.getType(
Ptr).isPointer() &&
"Expected a pointer type?");
5869 Load.cloneMemRefs(
I);
5871 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5872 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5873 auto Vec = MIB.
buildInstr(TargetOpcode::COPY, {
I.getOperand(
Idx)}, {})
5874 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5883bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5885 assert(
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5886 assert(Opc &&
"Expected an opcode?");
5887 assert(NumVecs > 1 && NumVecs < 5 &&
"Only support 2, 3, or 4 vectors");
5889 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
5892 auto FirstSrcRegIt =
I.operands_begin() + NumVecs + 1;
5894 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.
begin(),
5895 [](
auto MO) { return MO.getReg(); });
5899 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5914 .
addImm(LaneNo->getZExtValue())
5916 Load.cloneMemRefs(
I);
5918 Register SelectedLoadDst =
Load->getOperand(0).getReg();
5919 unsigned SubReg = AArch64::qsub0;
5920 for (
unsigned Idx = 0;
Idx < NumVecs; ++
Idx) {
5921 auto Vec = MIB.
buildInstr(TargetOpcode::COPY,
5922 {Narrow ?
DstOp(&AArch64::FPR128RegClass)
5925 .addReg(SelectedLoadDst, 0,
SubReg +
Idx);
5930 !emitNarrowVector(
I.getOperand(
Idx).getReg(), WideReg, MIB,
MRI))
5936void AArch64InstructionSelector::selectVectorStoreIntrinsic(
MachineInstr &
I,
5940 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5944 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5945 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5954bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
5957 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
5961 std::transform(
I.operands_begin() + 1,
I.operands_begin() + 1 + NumVecs,
5962 Regs.
begin(), [](
auto MO) { return MO.getReg(); });
5966 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5979 .
addImm(LaneNo->getZExtValue())
5986bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5989 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6000 case Intrinsic::aarch64_ldxp:
6001 case Intrinsic::aarch64_ldaxp: {
6003 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6004 {
I.getOperand(0).
getReg(),
I.getOperand(1).getReg()},
6010 case Intrinsic::aarch64_neon_ld1x2: {
6011 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6014 Opc = AArch64::LD1Twov8b;
6016 Opc = AArch64::LD1Twov16b;
6018 Opc = AArch64::LD1Twov4h;
6020 Opc = AArch64::LD1Twov8h;
6022 Opc = AArch64::LD1Twov2s;
6024 Opc = AArch64::LD1Twov4s;
6026 Opc = AArch64::LD1Twov2d;
6027 else if (Ty ==
S64 || Ty == P0)
6028 Opc = AArch64::LD1Twov1d;
6031 selectVectorLoadIntrinsic(Opc, 2,
I);
6034 case Intrinsic::aarch64_neon_ld1x3: {
6035 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6038 Opc = AArch64::LD1Threev8b;
6040 Opc = AArch64::LD1Threev16b;
6042 Opc = AArch64::LD1Threev4h;
6044 Opc = AArch64::LD1Threev8h;
6046 Opc = AArch64::LD1Threev2s;
6048 Opc = AArch64::LD1Threev4s;
6050 Opc = AArch64::LD1Threev2d;
6051 else if (Ty ==
S64 || Ty == P0)
6052 Opc = AArch64::LD1Threev1d;
6055 selectVectorLoadIntrinsic(Opc, 3,
I);
6058 case Intrinsic::aarch64_neon_ld1x4: {
6059 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6062 Opc = AArch64::LD1Fourv8b;
6064 Opc = AArch64::LD1Fourv16b;
6066 Opc = AArch64::LD1Fourv4h;
6068 Opc = AArch64::LD1Fourv8h;
6070 Opc = AArch64::LD1Fourv2s;
6072 Opc = AArch64::LD1Fourv4s;
6074 Opc = AArch64::LD1Fourv2d;
6075 else if (Ty ==
S64 || Ty == P0)
6076 Opc = AArch64::LD1Fourv1d;
6079 selectVectorLoadIntrinsic(Opc, 4,
I);
6082 case Intrinsic::aarch64_neon_ld2: {
6083 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6086 Opc = AArch64::LD2Twov8b;
6088 Opc = AArch64::LD2Twov16b;
6090 Opc = AArch64::LD2Twov4h;
6092 Opc = AArch64::LD2Twov8h;
6094 Opc = AArch64::LD2Twov2s;
6096 Opc = AArch64::LD2Twov4s;
6098 Opc = AArch64::LD2Twov2d;
6099 else if (Ty ==
S64 || Ty == P0)
6100 Opc = AArch64::LD1Twov1d;
6103 selectVectorLoadIntrinsic(Opc, 2,
I);
6106 case Intrinsic::aarch64_neon_ld2lane: {
6107 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6110 Opc = AArch64::LD2i8;
6112 Opc = AArch64::LD2i16;
6114 Opc = AArch64::LD2i32;
6117 Opc = AArch64::LD2i64;
6120 if (!selectVectorLoadLaneIntrinsic(Opc, 2,
I))
6124 case Intrinsic::aarch64_neon_ld2r: {
6125 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6128 Opc = AArch64::LD2Rv8b;
6130 Opc = AArch64::LD2Rv16b;
6132 Opc = AArch64::LD2Rv4h;
6134 Opc = AArch64::LD2Rv8h;
6136 Opc = AArch64::LD2Rv2s;
6138 Opc = AArch64::LD2Rv4s;
6140 Opc = AArch64::LD2Rv2d;
6141 else if (Ty ==
S64 || Ty == P0)
6142 Opc = AArch64::LD2Rv1d;
6145 selectVectorLoadIntrinsic(Opc, 2,
I);
6148 case Intrinsic::aarch64_neon_ld3: {
6149 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6152 Opc = AArch64::LD3Threev8b;
6154 Opc = AArch64::LD3Threev16b;
6156 Opc = AArch64::LD3Threev4h;
6158 Opc = AArch64::LD3Threev8h;
6160 Opc = AArch64::LD3Threev2s;
6162 Opc = AArch64::LD3Threev4s;
6164 Opc = AArch64::LD3Threev2d;
6165 else if (Ty ==
S64 || Ty == P0)
6166 Opc = AArch64::LD1Threev1d;
6169 selectVectorLoadIntrinsic(Opc, 3,
I);
6172 case Intrinsic::aarch64_neon_ld3lane: {
6173 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6176 Opc = AArch64::LD3i8;
6178 Opc = AArch64::LD3i16;
6180 Opc = AArch64::LD3i32;
6183 Opc = AArch64::LD3i64;
6186 if (!selectVectorLoadLaneIntrinsic(Opc, 3,
I))
6190 case Intrinsic::aarch64_neon_ld3r: {
6191 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6194 Opc = AArch64::LD3Rv8b;
6196 Opc = AArch64::LD3Rv16b;
6198 Opc = AArch64::LD3Rv4h;
6200 Opc = AArch64::LD3Rv8h;
6202 Opc = AArch64::LD3Rv2s;
6204 Opc = AArch64::LD3Rv4s;
6206 Opc = AArch64::LD3Rv2d;
6207 else if (Ty ==
S64 || Ty == P0)
6208 Opc = AArch64::LD3Rv1d;
6211 selectVectorLoadIntrinsic(Opc, 3,
I);
6214 case Intrinsic::aarch64_neon_ld4: {
6215 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6218 Opc = AArch64::LD4Fourv8b;
6220 Opc = AArch64::LD4Fourv16b;
6222 Opc = AArch64::LD4Fourv4h;
6224 Opc = AArch64::LD4Fourv8h;
6226 Opc = AArch64::LD4Fourv2s;
6228 Opc = AArch64::LD4Fourv4s;
6230 Opc = AArch64::LD4Fourv2d;
6231 else if (Ty ==
S64 || Ty == P0)
6232 Opc = AArch64::LD1Fourv1d;
6235 selectVectorLoadIntrinsic(Opc, 4,
I);
6238 case Intrinsic::aarch64_neon_ld4lane: {
6239 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6242 Opc = AArch64::LD4i8;
6244 Opc = AArch64::LD4i16;
6246 Opc = AArch64::LD4i32;
6249 Opc = AArch64::LD4i64;
6252 if (!selectVectorLoadLaneIntrinsic(Opc, 4,
I))
6256 case Intrinsic::aarch64_neon_ld4r: {
6257 LLT Ty =
MRI.getType(
I.getOperand(0).getReg());
6260 Opc = AArch64::LD4Rv8b;
6262 Opc = AArch64::LD4Rv16b;
6264 Opc = AArch64::LD4Rv4h;
6266 Opc = AArch64::LD4Rv8h;
6268 Opc = AArch64::LD4Rv2s;
6270 Opc = AArch64::LD4Rv4s;
6272 Opc = AArch64::LD4Rv2d;
6273 else if (Ty ==
S64 || Ty == P0)
6274 Opc = AArch64::LD4Rv1d;
6277 selectVectorLoadIntrinsic(Opc, 4,
I);
6280 case Intrinsic::aarch64_neon_st1x2: {
6281 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6284 Opc = AArch64::ST1Twov8b;
6286 Opc = AArch64::ST1Twov16b;
6288 Opc = AArch64::ST1Twov4h;
6290 Opc = AArch64::ST1Twov8h;
6292 Opc = AArch64::ST1Twov2s;
6294 Opc = AArch64::ST1Twov4s;
6296 Opc = AArch64::ST1Twov2d;
6297 else if (Ty ==
S64 || Ty == P0)
6298 Opc = AArch64::ST1Twov1d;
6301 selectVectorStoreIntrinsic(
I, 2, Opc);
6304 case Intrinsic::aarch64_neon_st1x3: {
6305 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6308 Opc = AArch64::ST1Threev8b;
6310 Opc = AArch64::ST1Threev16b;
6312 Opc = AArch64::ST1Threev4h;
6314 Opc = AArch64::ST1Threev8h;
6316 Opc = AArch64::ST1Threev2s;
6318 Opc = AArch64::ST1Threev4s;
6320 Opc = AArch64::ST1Threev2d;
6321 else if (Ty ==
S64 || Ty == P0)
6322 Opc = AArch64::ST1Threev1d;
6325 selectVectorStoreIntrinsic(
I, 3, Opc);
6328 case Intrinsic::aarch64_neon_st1x4: {
6329 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6332 Opc = AArch64::ST1Fourv8b;
6334 Opc = AArch64::ST1Fourv16b;
6336 Opc = AArch64::ST1Fourv4h;
6338 Opc = AArch64::ST1Fourv8h;
6340 Opc = AArch64::ST1Fourv2s;
6342 Opc = AArch64::ST1Fourv4s;
6344 Opc = AArch64::ST1Fourv2d;
6345 else if (Ty ==
S64 || Ty == P0)
6346 Opc = AArch64::ST1Fourv1d;
6349 selectVectorStoreIntrinsic(
I, 4, Opc);
6352 case Intrinsic::aarch64_neon_st2: {
6353 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6356 Opc = AArch64::ST2Twov8b;
6358 Opc = AArch64::ST2Twov16b;
6360 Opc = AArch64::ST2Twov4h;
6362 Opc = AArch64::ST2Twov8h;
6364 Opc = AArch64::ST2Twov2s;
6366 Opc = AArch64::ST2Twov4s;
6368 Opc = AArch64::ST2Twov2d;
6369 else if (Ty ==
S64 || Ty == P0)
6370 Opc = AArch64::ST1Twov1d;
6373 selectVectorStoreIntrinsic(
I, 2, Opc);
6376 case Intrinsic::aarch64_neon_st3: {
6377 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6380 Opc = AArch64::ST3Threev8b;
6382 Opc = AArch64::ST3Threev16b;
6384 Opc = AArch64::ST3Threev4h;
6386 Opc = AArch64::ST3Threev8h;
6388 Opc = AArch64::ST3Threev2s;
6390 Opc = AArch64::ST3Threev4s;
6392 Opc = AArch64::ST3Threev2d;
6393 else if (Ty ==
S64 || Ty == P0)
6394 Opc = AArch64::ST1Threev1d;
6397 selectVectorStoreIntrinsic(
I, 3, Opc);
6400 case Intrinsic::aarch64_neon_st4: {
6401 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6404 Opc = AArch64::ST4Fourv8b;
6406 Opc = AArch64::ST4Fourv16b;
6408 Opc = AArch64::ST4Fourv4h;
6410 Opc = AArch64::ST4Fourv8h;
6412 Opc = AArch64::ST4Fourv2s;
6414 Opc = AArch64::ST4Fourv4s;
6416 Opc = AArch64::ST4Fourv2d;
6417 else if (Ty ==
S64 || Ty == P0)
6418 Opc = AArch64::ST1Fourv1d;
6421 selectVectorStoreIntrinsic(
I, 4, Opc);
6424 case Intrinsic::aarch64_neon_st2lane: {
6425 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6428 Opc = AArch64::ST2i8;
6430 Opc = AArch64::ST2i16;
6432 Opc = AArch64::ST2i32;
6435 Opc = AArch64::ST2i64;
6438 if (!selectVectorStoreLaneIntrinsic(
I, 2, Opc))
6442 case Intrinsic::aarch64_neon_st3lane: {
6443 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6446 Opc = AArch64::ST3i8;
6448 Opc = AArch64::ST3i16;
6450 Opc = AArch64::ST3i32;
6453 Opc = AArch64::ST3i64;
6456 if (!selectVectorStoreLaneIntrinsic(
I, 3, Opc))
6460 case Intrinsic::aarch64_neon_st4lane: {
6461 LLT Ty =
MRI.getType(
I.getOperand(1).getReg());
6464 Opc = AArch64::ST4i8;
6466 Opc = AArch64::ST4i16;
6468 Opc = AArch64::ST4i32;
6471 Opc = AArch64::ST4i64;
6474 if (!selectVectorStoreLaneIntrinsic(
I, 4, Opc))
6478 case Intrinsic::aarch64_mops_memset_tag: {
6491 Register DstDef =
I.getOperand(0).getReg();
6493 Register DstUse =
I.getOperand(2).getReg();
6494 Register ValUse =
I.getOperand(3).getReg();
6495 Register SizeUse =
I.getOperand(4).getReg();
6502 auto Memset = MIB.
buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6503 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6510 I.eraseFromParent();
6514bool AArch64InstructionSelector::selectIntrinsic(
MachineInstr &
I,
6516 unsigned IntrinID = cast<GIntrinsic>(
I).getIntrinsicID();
6521 case Intrinsic::aarch64_crypto_sha1h: {
6522 Register DstReg =
I.getOperand(0).getReg();
6523 Register SrcReg =
I.getOperand(2).getReg();
6526 if (
MRI.getType(DstReg).getSizeInBits() != 32 ||
6527 MRI.getType(SrcReg).getSizeInBits() != 32)
6532 if (RBI.getRegBank(SrcReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID) {
6533 SrcReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6537 RBI.constrainGenericRegister(
I.getOperand(2).getReg(),
6538 AArch64::GPR32RegClass,
MRI);
6541 if (RBI.getRegBank(DstReg,
MRI,
TRI)->getID() != AArch64::FPRRegBankID)
6542 DstReg =
MRI.createVirtualRegister(&AArch64::FPR32RegClass);
6545 auto SHA1Inst = MIB.
buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
6549 if (DstReg !=
I.getOperand(0).getReg()) {
6553 RBI.constrainGenericRegister(
I.getOperand(0).getReg(),
6554 AArch64::GPR32RegClass,
MRI);
6557 I.eraseFromParent();
6560 case Intrinsic::ptrauth_resign: {
6561 Register DstReg =
I.getOperand(0).getReg();
6562 Register ValReg =
I.getOperand(2).getReg();
6563 uint64_t AUTKey =
I.getOperand(3).getImm();
6564 Register AUTDisc =
I.getOperand(4).getReg();
6565 uint64_t PACKey =
I.getOperand(5).getImm();
6566 Register PACDisc =
I.getOperand(6).getReg();
6570 std::tie(AUTConstDiscC, AUTAddrDisc) =
6575 std::tie(PACConstDiscC, PACAddrDisc) =
6578 MIB.
buildCopy({AArch64::X16}, {ValReg});
6579 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6590 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6591 I.eraseFromParent();
6594 case Intrinsic::ptrauth_auth: {
6595 Register DstReg =
I.getOperand(0).getReg();
6596 Register ValReg =
I.getOperand(2).getReg();
6597 uint64_t AUTKey =
I.getOperand(3).getImm();
6598 Register AUTDisc =
I.getOperand(4).getReg();
6602 std::tie(AUTConstDiscC, AUTAddrDisc) =
6605 MIB.
buildCopy({AArch64::X16}, {ValReg});
6606 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6614 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6615 I.eraseFromParent();
6618 case Intrinsic::frameaddress:
6619 case Intrinsic::returnaddress: {
6623 unsigned Depth =
I.getOperand(2).getImm();
6624 Register DstReg =
I.getOperand(0).getReg();
6625 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass,
MRI);
6627 if (
Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6628 if (!MFReturnAddr) {
6633 MF,
TII, AArch64::LR, AArch64::GPR64RegClass,
I.getDebugLoc());
6636 if (STI.hasPAuth()) {
6637 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6644 I.eraseFromParent();
6651 Register NextFrame =
MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6653 MIB.
buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6655 FrameAddr = NextFrame;
6658 if (IntrinID == Intrinsic::frameaddress)
6663 if (STI.hasPAuth()) {
6664 Register TmpReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6665 MIB.
buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6666 MIB.
buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6675 I.eraseFromParent();
6678 case Intrinsic::aarch64_neon_tbl2:
6679 SelectTable(
I,
MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two,
false);
6681 case Intrinsic::aarch64_neon_tbl3:
6682 SelectTable(
I,
MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6685 case Intrinsic::aarch64_neon_tbl4:
6686 SelectTable(
I,
MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four,
false);
6688 case Intrinsic::aarch64_neon_tbx2:
6689 SelectTable(
I,
MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two,
true);
6691 case Intrinsic::aarch64_neon_tbx3:
6692 SelectTable(
I,
MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three,
true);
6694 case Intrinsic::aarch64_neon_tbx4:
6695 SelectTable(
I,
MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four,
true);
6697 case Intrinsic::swift_async_context_addr:
6706 I.eraseFromParent();
6741bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6743 Register DefReg =
I.getOperand(0).getReg();
6746 Register AddrDisc =
I.getOperand(3).getReg();
6747 uint64_t Disc =
I.getOperand(4).getImm();
6755 if (!isUInt<16>(Disc))
6757 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6760 if (!STI.isTargetELF() && !STI.isTargetMachO())
6770 if (!
MRI.hasOneDef(OffsetReg))
6773 if (OffsetMI.
getOpcode() != TargetOpcode::G_CONSTANT)
6799 unsigned OpFlags = STI.ClassifyGlobalReference(GV,
TM);
6802 "unsupported non-GOT op flags on ptrauth global reference");
6804 "unsupported non-GOT reference to weak ptrauth global");
6807 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6814 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6815 MIB.
buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6816 MIB.
buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6819 .
addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6823 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6824 I.eraseFromParent();
6836 "unsupported non-zero offset in weak ptrauth global reference");
6841 MIB.
buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6842 .addGlobalAddress(GV,
Offset)
6845 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
MRI);
6847 I.eraseFromParent();
6851void AArch64InstructionSelector::SelectTable(
MachineInstr &
I,
6853 unsigned NumVec,
unsigned Opc1,
6854 unsigned Opc2,
bool isExt) {
6855 Register DstReg =
I.getOperand(0).getReg();
6860 for (
unsigned i = 0; i < NumVec; i++)
6861 Regs.
push_back(
I.getOperand(i + 2 + isExt).getReg());
6864 Register IdxReg =
I.getOperand(2 + NumVec + isExt).getReg();
6872 I.eraseFromParent();
6876AArch64InstructionSelector::selectShiftA_32(
const MachineOperand &Root)
const {
6878 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6879 return std::nullopt;
6880 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6885AArch64InstructionSelector::selectShiftB_32(
const MachineOperand &Root)
const {
6887 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6888 return std::nullopt;
6894AArch64InstructionSelector::selectShiftA_64(
const MachineOperand &Root)
const {
6896 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6897 return std::nullopt;
6898 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6903AArch64InstructionSelector::selectShiftB_64(
const MachineOperand &Root)
const {
6905 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6906 return std::nullopt;
6917AArch64InstructionSelector::select12BitValueWithLeftShift(
6920 if (Immed >> 12 == 0) {
6922 }
else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
6924 Immed = Immed >> 12;
6926 return std::nullopt;
6939AArch64InstructionSelector::selectArithImmed(
MachineOperand &Root)
const {
6946 if (MaybeImmed == std::nullopt)
6947 return std::nullopt;
6948 return select12BitValueWithLeftShift(*MaybeImmed);
6954AArch64InstructionSelector::selectNegArithImmed(
MachineOperand &Root)
const {
6958 return std::nullopt;
6960 if (MaybeImmed == std::nullopt)
6961 return std::nullopt;
6968 return std::nullopt;
6973 if (
MRI.getType(Root.
getReg()).getSizeInBits() == 32)
6976 Immed = ~Immed + 1ULL;
6978 if (Immed & 0xFFFFFFFFFF000000ULL)
6979 return std::nullopt;
6981 Immed &= 0xFFFFFFULL;
6982 return select12BitValueWithLeftShift(Immed);
6999std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7001 if (
MI.getOpcode() == AArch64::G_SHL) {
7005 MI.getOperand(2).getReg(),
MRI)) {
7006 const APInt ShiftVal = ValAndVeg->Value;
7009 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7012 return std::nullopt;
7020bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7022 bool IsAddrOperand)
const {
7026 if (
MRI.hasOneNonDBGUse(DefReg) ||
7027 MI.getParent()->getParent()->getFunction().hasOptSize())
7030 if (IsAddrOperand) {
7032 if (
const auto Worth = isWorthFoldingIntoAddrMode(
MI,
MRI))
7036 if (
MI.getOpcode() == AArch64::G_PTR_ADD) {
7043 if (
const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst,
MRI))
7053 return all_of(
MRI.use_nodbg_instructions(DefReg),
7069AArch64InstructionSelector::selectExtendedSHL(
7071 unsigned SizeInBytes,
bool WantsExt)
const {
7072 assert(
Base.isReg() &&
"Expected base to be a register operand");
7073 assert(
Offset.isReg() &&
"Expected offset to be a register operand");
7078 unsigned OffsetOpc = OffsetInst->
getOpcode();
7079 bool LookedThroughZExt =
false;
7080 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7082 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7083 return std::nullopt;
7087 LookedThroughZExt =
true;
7089 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7090 return std::nullopt;
7093 int64_t LegalShiftVal =
Log2_32(SizeInBytes);
7094 if (LegalShiftVal == 0)
7095 return std::nullopt;
7096 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7097 return std::nullopt;
7108 if (OffsetOpc == TargetOpcode::G_SHL)
7109 return std::nullopt;
7115 return std::nullopt;
7120 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7124 if (OffsetOpc == TargetOpcode::G_MUL) {
7125 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7126 return std::nullopt;
7132 if ((ImmVal & 0x7) != ImmVal)
7133 return std::nullopt;
7137 if (ImmVal != LegalShiftVal)
7138 return std::nullopt;
7140 unsigned SignExtend = 0;
7144 if (!LookedThroughZExt) {
7146 auto Ext = getExtendTypeForInst(*ExtInst,
MRI,
true);
7148 return std::nullopt;
7153 return std::nullopt;
7159 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7169 MIB.addImm(SignExtend);
7183AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7186 return std::nullopt;
7203 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7204 return std::nullopt;
7210 return selectExtendedSHL(Root, PtrAdd->
getOperand(1),
7224AArch64InstructionSelector::selectAddrModeRegisterOffset(
7230 if (Gep->
getOpcode() != TargetOpcode::G_PTR_ADD)
7231 return std::nullopt;
7237 return std::nullopt;
7257AArch64InstructionSelector::selectAddrModeXRO(
MachineOperand &Root,
7258 unsigned SizeInBytes)
const {
7261 return std::nullopt;
7265 return std::nullopt;
7283 unsigned Scale =
Log2_32(SizeInBytes);
7284 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7288 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7289 ImmOff < (0x1000 << Scale))
7290 return std::nullopt;
7295 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7299 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7305 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7306 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7311 return std::nullopt;
7315 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7321 return selectAddrModeRegisterOffset(Root);
7331AArch64InstructionSelector::selectAddrModeWRO(
MachineOperand &Root,
7332 unsigned SizeInBytes)
const {
7337 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd,
MRI,
true))
7338 return std::nullopt;
7359 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->
getOperand(0),
7368 if (!isWorthFoldingIntoExtendedReg(*OffsetInst,
MRI,
true))
7369 return std::nullopt;
7373 getExtendTypeForInst(*OffsetInst,
MRI,
true);
7375 return std::nullopt;
7380 AArch64::GPR32RegClass, MIB);
7387 MIB.addImm(SignExtend);
7398AArch64InstructionSelector::selectAddrModeUnscaled(
MachineOperand &Root,
7399 unsigned Size)
const {
7404 return std::nullopt;
7406 if (!isBaseWithConstantOffset(Root,
MRI))
7407 return std::nullopt;
7412 if (!OffImm.
isReg())
7413 return std::nullopt;
7415 if (
RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7416 return std::nullopt;
7420 return std::nullopt;
7423 if (RHSC >= -256 && RHSC < 256) {
7430 return std::nullopt;
7434AArch64InstructionSelector::tryFoldAddLowIntoImm(
MachineInstr &RootDef,
7437 if (RootDef.
getOpcode() != AArch64::G_ADD_LOW)
7438 return std::nullopt;
7441 return std::nullopt;
7446 return std::nullopt;
7450 return std::nullopt;
7454 return std::nullopt;
7456 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.
getTarget());
7461 MIB.addGlobalAddress(GV,
Offset,
7471AArch64InstructionSelector::selectAddrModeIndexed(
MachineOperand &Root,
7472 unsigned Size)
const {
7477 return std::nullopt;
7480 if (RootDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7490 auto OpFns = tryFoldAddLowIntoImm(*RootDef,
Size,
MRI);
7495 if (isBaseWithConstantOffset(Root,
MRI)) {
7503 if ((RHSC & (
Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7504 if (LHSDef->
getOpcode() == TargetOpcode::G_FRAME_INDEX)
7519 if (selectAddrModeUnscaled(Root,
Size))
7520 return std::nullopt;
7531 switch (
MI.getOpcode()) {
7534 case TargetOpcode::G_SHL:
7536 case TargetOpcode::G_LSHR:
7538 case TargetOpcode::G_ASHR:
7540 case TargetOpcode::G_ROTR:
7548AArch64InstructionSelector::selectShiftedRegister(
MachineOperand &Root,
7549 bool AllowROR)
const {
7551 return std::nullopt;
7560 return std::nullopt;
7562 return std::nullopt;
7563 if (!isWorthFoldingIntoExtendedReg(*ShiftInst,
MRI,
false))
7564 return std::nullopt;
7570 return std::nullopt;
7577 unsigned NumBits =
MRI.getType(ShiftReg).getSizeInBits();
7578 unsigned Val = *Immed & (NumBits - 1);
7587 unsigned Opc =
MI.getOpcode();
7590 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7592 if (Opc == TargetOpcode::G_SEXT)
7593 Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7595 Size =
MI.getOperand(2).getImm();
7596 assert(
Size != 64 &&
"Extend from 64 bits?");
7609 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7610 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7611 assert(
Size != 64 &&
"Extend from 64 bits?");
7626 if (Opc != TargetOpcode::G_AND)
7645Register AArch64InstructionSelector::moveScalarRegClass(
7648 auto Ty =
MRI.getType(Reg);
7657 return Copy.getReg(0);
7663AArch64InstructionSelector::selectArithExtendedRegister(
7666 return std::nullopt;
7675 return std::nullopt;
7677 if (!isWorthFoldingIntoExtendedReg(*RootDef,
MRI,
false))
7678 return std::nullopt;
7681 if (RootDef->
getOpcode() == TargetOpcode::G_SHL) {
7686 return std::nullopt;
7687 ShiftVal = *MaybeShiftVal;
7689 return std::nullopt;
7694 return std::nullopt;
7695 Ext = getExtendTypeForInst(*ExtDef,
MRI);
7697 return std::nullopt;
7701 Ext = getExtendTypeForInst(*RootDef,
MRI);
7703 return std::nullopt;
7712 if (isDef32(*ExtInst))
7713 return std::nullopt;
7720 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7724 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7729AArch64InstructionSelector::selectExtractHigh(
MachineOperand &Root)
const {
7731 return std::nullopt;
7736 while (Extract && Extract->MI->
getOpcode() == TargetOpcode::G_BITCAST &&
7737 STI.isLittleEndian())
7741 return std::nullopt;
7743 if (Extract->MI->
getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7749 if (Extract->MI->
getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7754 LaneIdx->Value.getSExtValue() == 1) {
7760 return std::nullopt;
7767 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7768 "Expected G_CONSTANT");
7769 std::optional<int64_t> CstVal =
7771 assert(CstVal &&
"Expected constant value");
7775void AArch64InstructionSelector::renderLogicalImm32(
7777 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7778 "Expected G_CONSTANT");
7779 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7784void AArch64InstructionSelector::renderLogicalImm64(
7786 assert(
I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7787 "Expected G_CONSTANT");
7788 uint64_t CstVal =
I.getOperand(1).getCImm()->getZExtValue();
7796 assert(
MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7797 "Expected G_UBSANTRAP");
7798 MIB.
addImm(
MI.getOperand(0).getImm() | (
'U' << 8));
7804 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7805 "Expected G_FCONSTANT");
7813 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7814 "Expected G_FCONSTANT");
7822 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7823 "Expected G_FCONSTANT");
7828void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7830 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7831 "Expected G_FCONSTANT");
7839bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7841 if (!
MI.mayLoadOrStore())
7844 "Expected load/store to have only one mem op!");
7845 return (*
MI.memoperands_begin())->getSize() == NumBytes;
7848bool AArch64InstructionSelector::isDef32(
const MachineInstr &
MI)
const {
7850 if (
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() != 32)
7857 switch (
MI.getOpcode()) {
7860 case TargetOpcode::COPY:
7861 case TargetOpcode::G_BITCAST:
7862 case TargetOpcode::G_TRUNC:
7863 case TargetOpcode::G_PHI:
7873 assert(
MI.getOpcode() == TargetOpcode::G_PHI &&
"Expected a G_PHI");
7876 assert(DstRB &&
"Expected PHI dst to have regbank assigned");
7887 auto *OpDef =
MRI.getVRegDef(OpReg);
7888 const LLT &Ty =
MRI.getType(OpReg);
7894 if (InsertPt != OpDefBB.
end() && InsertPt->isPHI())
7898 MRI.setRegBank(Copy.getReg(0), *DstRB);
7899 MO.setReg(Copy.getReg(0));
7908 for (
auto &BB : MF) {
7909 for (
auto &
MI : BB) {
7910 if (
MI.getOpcode() == TargetOpcode::G_PHI)
7915 for (
auto *
MI : Phis) {
7937 bool HasGPROp =
false, HasFPROp =
false;
7941 const LLT &Ty =
MRI.getType(MO.getReg());
7951 if (RB->
getID() == AArch64::GPRRegBankID)
7957 if (HasGPROp && HasFPROp)
7967 return new AArch64InstructionSelector(
TM, Subtarget, RBI);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy)
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert an IR fp condition code to an AArch64 CC.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P)
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, MachineRegisterInfo &MRI, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/CMP operations that can be expressed as a conjunction.
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file contains constants used for implementing Dwarf debug support.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
int getVarArgsStackIndex() const
int getVarArgsGPRIndex() const
unsigned getVarArgsGPRSize() const
This class provides the information for the target register banks.
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
bool isIntPredicate() const
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the sign bit is set.
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
const APInt & getUniqueInteger() const
If C is a constant integer then return its value, otherwise C must be a vector of constant integers,...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
This class represents an Operation in the Expression.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
Represents indexed stores.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
TypeSize getValue() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setAdjustsStack(bool V)
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class represents the LLVM 'select' instruction.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Returns true if the given block should be optimized for size.
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
AtomicOrdering
Atomic ordering for LLVM's memory model.
DWARFExpression::Operation Op
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Class which stores all the state required in a MachineIRBuilder.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.