267#define DEBUG_TYPE "frame-info"
270 cl::desc(
"enable use of redzone on AArch64"),
274 "stack-tagging-merge-settag",
284 cl::desc(
"Split allocation of ZPR & PPR objects"),
289 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
290 "optimization (default = off)"));
302 "aarch64-disable-multivector-spill-fill",
311 bool IsTailCallReturn = (
MBB.end() !=
MBBI)
315 int64_t ArgumentPopSize = 0;
316 if (IsTailCallReturn) {
322 ArgumentPopSize = StackAdjust.
getImm();
331 return ArgumentPopSize;
374 if (AFI->hasCalculatedStackSizeSVE())
404bool AArch64FrameLowering::homogeneousPrologEpilog(
430 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
437 unsigned NumGPRs = 0;
438 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
440 if (Reg == AArch64::LR) {
441 assert(CSRegs[
I + 1] == AArch64::FP);
442 if (NumGPRs % 2 != 0)
454bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
473 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
474 MI.getOpcode() == AArch64::ADDXri ||
475 MI.getOpcode() == AArch64::ADDSXri)
500 bool IsWin64,
bool IsFunclet)
const {
502 "Tail call reserved stack must be aligned to 16 bytes");
503 if (!IsWin64 || IsFunclet) {
508 Attribute::SwiftAsync))
522 int FrameIndex =
H.CatchObj.FrameIndex;
523 if ((FrameIndex != INT_MAX) &&
524 CatchObjFrameIndices.
insert(FrameIndex)) {
525 FixedObjectSize =
alignTo(FixedObjectSize,
532 FixedObjectSize += 8;
534 return alignTo(FixedObjectSize, 16);
545 const unsigned RedZoneSize =
558 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
562 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
583 RegInfo->hasStackRealignment(MF))
630 if (TT.isOSDarwin() || TT.isOSWindows())
668 unsigned Opc =
I->getOpcode();
669 bool IsDestroy =
Opc ==
TII->getCallFrameDestroyOpcode();
670 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
673 int64_t Amount =
I->getOperand(0).getImm();
681 if (CalleePopAmount == 0) {
692 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
703 "non-reserved call frame without var sized objects?");
712 }
else if (CalleePopAmount != 0) {
715 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
727 const auto &
TRI = *Subtarget.getRegisterInfo();
733 CFIBuilder.buildDefCFA(AArch64::SP, 0);
736 if (MFI.shouldSignReturnAddress(MF)) {
737 if (MFI.branchProtectionPAuthLR()) {
738 CFIBuilder.buildNegateRAStateWithPC();
740 CFIBuilder.buildNegateRAState();
745 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
746 CFIBuilder.buildSameValue(AArch64::X18);
749 const std::vector<CalleeSavedInfo> &CSI =
751 for (
const auto &Info : CSI) {
753 if (!
TRI.regNeedsCFI(Reg, Reg))
755 CFIBuilder.buildSameValue(Reg);
768 case AArch64::W##n: \
769 case AArch64::X##n: \
794 case AArch64::B##n: \
795 case AArch64::H##n: \
796 case AArch64::S##n: \
797 case AArch64::D##n: \
798 case AArch64::Q##n: \
799 return HasSVE ? AArch64::Z##n : AArch64::Q##n
836void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
847 const AArch64Subtarget &STI = MF.
getSubtarget<AArch64Subtarget>();
850 BitVector GPRsToZero(
TRI.getNumRegs());
851 BitVector FPRsToZero(
TRI.getNumRegs());
854 if (
TRI.isGeneralPurposeRegister(MF,
Reg)) {
857 GPRsToZero.set(XReg);
861 FPRsToZero.set(XReg);
868 for (MCRegister
Reg : GPRsToZero.set_bits())
872 for (MCRegister
Reg : FPRsToZero.set_bits())
876 for (MCRegister PReg :
877 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
878 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
879 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
881 if (RegsToZero[PReg])
887bool AArch64FrameLowering::windowsRequiresStackProbe(
889 const AArch64Subtarget &Subtarget = MF.
getSubtarget<AArch64Subtarget>();
890 const AArch64FunctionInfo &MFI = *MF.
getInfo<AArch64FunctionInfo>();
894 StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
903 for (
unsigned i = 0; CSRegs[i]; ++i)
909 bool HasCall)
const {
919 const AArch64Subtarget &Subtarget = MF->
getSubtarget<AArch64Subtarget>();
921 LivePhysRegs LiveRegs(
TRI);
924 LiveRegs.addReg(AArch64::X16);
925 LiveRegs.addReg(AArch64::X17);
926 LiveRegs.addReg(AArch64::X18);
930 const MachineRegisterInfo &MRI = MF->
getRegInfo();
931 if (LiveRegs.available(MRI, AArch64::X9))
934 for (
unsigned Reg : AArch64::GPR64RegClass) {
935 if (LiveRegs.available(MRI,
Reg))
938 return AArch64::NoRegister;
957 if (!
LiveRegs.available(MRI, AArch64::X16) ||
958 !
LiveRegs.available(MRI, AArch64::X17))
965 MBB.isLiveIn(AArch64::NZCV))
969 if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister)
975 windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
976 if (findScratchNonCalleeSaveRegister(TmpMBB,
true) == AArch64::NoRegister)
985 F.needsUnwindTableEntry();
988bool AArch64FrameLowering::shouldSignReturnAddressEverywhere(
1004 unsigned Opc =
MBBI->getOpcode();
1008 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1009 int Imm =
MBBI->getOperand(ImmIdx).getImm();
1017 case AArch64::STR_ZXI:
1018 case AArch64::LDR_ZXI: {
1019 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1026 case AArch64::STR_PXI:
1027 case AArch64::LDR_PXI: {
1028 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1035 case AArch64::LDPDpost:
1038 case AArch64::STPDpre: {
1039 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1040 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1041 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
1048 case AArch64::LDPXpost:
1051 case AArch64::STPXpre: {
1054 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1055 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1059 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1060 .
addImm(RegInfo->getSEHRegNum(Reg0))
1061 .
addImm(RegInfo->getSEHRegNum(Reg1))
1066 case AArch64::LDRDpost:
1069 case AArch64::STRDpre: {
1070 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1071 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1077 case AArch64::LDRXpost:
1080 case AArch64::STRXpre: {
1081 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1088 case AArch64::STPDi:
1089 case AArch64::LDPDi: {
1090 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1091 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1099 case AArch64::STPXi:
1100 case AArch64::LDPXi: {
1104 int SEHReg0 = RegInfo->getSEHRegNum(Reg0);
1105 int SEHReg1 = RegInfo->getSEHRegNum(Reg1);
1107 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1111 else if (SEHReg0 >= 19 && SEHReg1 >= 19)
1118 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegIP))
1125 case AArch64::STRXui:
1126 case AArch64::LDRXui: {
1127 int Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1134 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegI))
1140 case AArch64::STRDui:
1141 case AArch64::LDRDui: {
1142 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1149 case AArch64::STPQi:
1150 case AArch64::LDPQi: {
1151 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1152 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1153 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQP))
1160 case AArch64::LDPQpost:
1163 case AArch64::STPQpre: {
1164 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1165 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1166 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQPX))
1185 if (ST.isTargetDarwin())
1207 DL =
MBBI->getDebugLoc();
1209 TII->createPauthEpilogueInstr(
MBB,
DL);
1213 EmitSignRA(MF.
front());
1215 if (
MBB.isEHFuncletEntry())
1217 if (
MBB.isReturnBlock())
1273 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1278 if (MFI.isVariableSizedObjectIndex(FI)) {
1288 if (MFI.hasScalableStackID(FI)) {
1289 if (FPAfterSVECalleeSaves &&
1292 "split-sve-objects not supported with FPAfterSVECalleeSaves");
1300 AccessOffset = -PPRStackSize;
1301 return AccessOffset +
1306 bool IsFixed = MFI.isFixedObjectIndex(FI);
1311 if (!IsFixed && !IsCSR) {
1312 ScalableOffset = -SVEStackSize;
1313 }
else if (FPAfterSVECalleeSaves && IsCSR) {
1328 int64_t ObjectOffset)
const {
1332 bool IsWin64 = Subtarget.isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
1333 unsigned FixedObject =
1334 getFixedObjectSize(MF, AFI, IsWin64,
false);
1342 int64_t ObjectOffset)
const {
1353 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1354 ? getFPOffset(MF, ObjectOffset).getFixed()
1355 : getStackOffset(MF, ObjectOffset).getFixed();
1360 bool ForSimm)
const {
1362 int64_t ObjectOffset = MFI.getObjectOffset(FI);
1363 bool isFixed = MFI.isFixedObjectIndex(FI);
1366 FrameReg, PreferFP, ForSimm);
1372 bool ForSimm)
const {
1378 int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
1379 int64_t
Offset = getStackOffset(MF, ObjectOffset).getFixed();
1382 bool isSVE = MFI.isScalableStackID(StackID);
1386 StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
1397 PreferFP &= !SVEStackSize;
1405 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
1409 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
1411 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
1416 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1417 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
1419 if (FPOffset >= 0) {
1423 }
else if (MFI.hasVarSizedObjects()) {
1427 bool CanUseBP = RegInfo->hasBasePointer(MF);
1428 if (FPOffsetFits && CanUseBP)
1435 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1442 "Funclets should only be present on Win64");
1446 if (FPOffsetFits && PreferFP)
1453 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
1454 "In the presence of dynamic stack pointer realignment, "
1455 "non-argument/CSR objects cannot be accessed through the frame pointer");
1472 FPOffset -= PPRStackSize;
1474 SPOffset -= PPRStackSize;
1479 if (FPAfterSVECalleeSaves) {
1490 RegInfo->hasStackRealignment(MF))) {
1491 FrameReg = RegInfo->getFrameRegister(MF);
1494 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
1501 if (FPAfterSVECalleeSaves) {
1508 SVEAreaOffset = SVECalleeSavedStack;
1510 SVEAreaOffset = SVECalleeSavedStack - SVEStackSize;
1513 SVEAreaOffset = SVEStackSize;
1515 SVEAreaOffset = SVEStackSize - SVECalleeSavedStack;
1518 if (UseFP && !(isFixed || isCSR))
1519 SVEAreaOffset = -SVEStackSize;
1520 if (!UseFP && (isFixed || isCSR))
1521 SVEAreaOffset = SVEStackSize;
1525 FrameReg = RegInfo->getFrameRegister(MF);
1530 if (RegInfo->hasBasePointer(MF))
1531 FrameReg = RegInfo->getBaseRegister();
1533 assert(!MFI.hasVarSizedObjects() &&
1534 "Can't use SP when we have var sized objects.");
1535 FrameReg = AArch64::SP;
1563 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
1569 unsigned SpillCount,
unsigned Reg1,
1570 unsigned Reg2,
bool NeedsWinCFI,
1579 if (Reg2 == AArch64::FP)
1589 if (
TRI->getEncodingValue(Reg2) ==
TRI->getEncodingValue(Reg1) + 1)
1590 return SpillExtendedVolatile
1591 ? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) ||
1592 (SpillCount % 2) == 0)
1597 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
1598 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR)
1608 unsigned SpillCount,
unsigned Reg1,
1609 unsigned Reg2,
bool UsesWinAAPCS,
1610 bool NeedsWinCFI,
bool NeedsFrameRecord,
1614 Reg1, Reg2, NeedsWinCFI,
TRI);
1618 if (NeedsFrameRecord)
1619 return Reg2 == AArch64::LR;
1631 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG }
Type;
1632 const TargetRegisterClass *RC;
1634 RegPairInfo() =
default;
1636 bool isPaired()
const {
return Reg2.
isValid(); }
1638 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
1644 for (
unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
1645 if (SavedRegs.
test(PReg)) {
1646 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
1660 bool IsLocallyStreaming =
1666 return Subtarget.hasSVE2p1() ||
1667 (Subtarget.hasSME2() &&
1668 (!IsLocallyStreaming && Subtarget.
isStreaming()));
1676 bool NeedsFrameRecord) {
1693 (
Count & 1) == 0) &&
1694 "Odd number of callee-saved regs to spill!");
1696 int StackFillDir = -1;
1698 unsigned FirstReg = 0;
1706 FirstReg =
Count - 1;
1718 bool SpillExtendedVolatile =
1720 const auto &
Reg = CSI.getReg();
1721 return Reg >= AArch64::X0 &&
Reg <= AArch64::X18;
1724 int ZPRByteOffset = 0;
1725 int PPRByteOffset = 0;
1730 }
else if (!FPAfterSVECalleeSaves) {
1741 auto AlignOffset = [StackFillDir](
int Offset,
int Align) {
1742 if (StackFillDir < 0)
1748 for (
unsigned i = FirstReg; i <
Count; i += RegInc) {
1750 RPI.Reg1 = CSI[i].getReg();
1752 if (AArch64::GPR64RegClass.
contains(RPI.Reg1)) {
1753 RPI.Type = RegPairInfo::GPR;
1754 RPI.RC = &AArch64::GPR64RegClass;
1755 }
else if (AArch64::FPR64RegClass.
contains(RPI.Reg1)) {
1756 RPI.Type = RegPairInfo::FPR64;
1757 RPI.RC = &AArch64::FPR64RegClass;
1758 }
else if (AArch64::FPR128RegClass.
contains(RPI.Reg1)) {
1759 RPI.Type = RegPairInfo::FPR128;
1760 RPI.RC = &AArch64::FPR128RegClass;
1761 }
else if (AArch64::ZPRRegClass.
contains(RPI.Reg1)) {
1762 RPI.Type = RegPairInfo::ZPR;
1763 RPI.RC = &AArch64::ZPRRegClass;
1764 }
else if (AArch64::PPRRegClass.
contains(RPI.Reg1)) {
1765 RPI.Type = RegPairInfo::PPR;
1766 RPI.RC = &AArch64::PPRRegClass;
1767 }
else if (RPI.Reg1 == AArch64::VG) {
1768 RPI.Type = RegPairInfo::VG;
1769 RPI.RC = &AArch64::FIXED_REGSRegClass;
1774 int &ScalableByteOffset = RPI.Type == RegPairInfo::PPR && SplitPPRs
1779 if (HasCSHazardPadding &&
1782 ByteOffset += StackFillDir * StackHazardSize;
1786 int Scale =
TRI->getSpillSize(*RPI.RC);
1788 if (
unsigned(i + RegInc) <
Count && !HasCSHazardPadding) {
1789 MCRegister NextReg = CSI[i + RegInc].getReg();
1790 unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
1792 case RegPairInfo::GPR:
1793 if (AArch64::GPR64RegClass.
contains(NextReg) &&
1795 RPI.Reg1, NextReg, IsWindows,
1796 NeedsWinCFI, NeedsFrameRecord,
TRI))
1799 case RegPairInfo::FPR64:
1800 if (AArch64::FPR64RegClass.
contains(NextReg) &&
1802 RPI.Reg1, NextReg, IsWindows,
1803 NeedsWinCFI, NeedsFrameRecord,
TRI))
1806 case RegPairInfo::FPR128:
1807 if (AArch64::FPR128RegClass.
contains(NextReg))
1810 case RegPairInfo::PPR:
1812 case RegPairInfo::ZPR:
1814 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
1817 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
1822 case RegPairInfo::VG:
1833 assert((!RPI.isPaired() ||
1834 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
1835 "Out of order callee saved regs!");
1837 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
1838 RPI.Reg1 == AArch64::LR) &&
1839 "FrameRecord must be allocated together with LR");
1842 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
1843 RPI.Reg2 == AArch64::LR) &&
1844 "FrameRecord must be allocated together with LR");
1852 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1853 RPI.Reg1 + 1 == RPI.Reg2))) &&
1854 "Callee-save registers not saved as adjacent register pair!");
1856 RPI.FrameIdx = CSI[i].getFrameIdx();
1859 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
1863 if (RPI.isScalable() && ScalableByteOffset % Scale != 0)
1864 ScalableByteOffset = AlignOffset(ScalableByteOffset, Scale);
1868 if (!RPI.isScalable() && ByteOffset % Scale != 0)
1869 ByteOffset = AlignOffset(ByteOffset, Scale);
1871 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1872 assert(OffsetPre % Scale == 0);
1874 if (RPI.isScalable())
1875 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1877 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
1882 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1883 (IsWindows && RPI.Reg2 == AArch64::LR)))
1884 ByteOffset += StackFillDir * 8;
1888 if (NeedGapToAlignStack && !IsWindows && !RPI.isScalable() &&
1889 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
1890 ByteOffset % 16 != 0) {
1891 ByteOffset += 8 * StackFillDir;
1897 NeedGapToAlignStack =
false;
1900 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
1901 assert(OffsetPost % Scale == 0);
1904 int Offset = IsWindows ? OffsetPre : OffsetPost;
1909 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
1910 (IsWindows && RPI.Reg2 == AArch64::LR)))
1912 RPI.Offset =
Offset / Scale;
1914 assert((!RPI.isPaired() ||
1915 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
1916 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
1917 "Offset out of bounds for LDP/STP immediate");
1919 auto isFrameRecord = [&] {
1921 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
1922 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
1930 return i > 0 && RPI.Reg1 == AArch64::FP &&
1931 CSI[i - 1].getReg() == AArch64::LR;
1936 if (NeedsFrameRecord && isFrameRecord())
1953 std::reverse(RegPairs.
begin(), RegPairs.
end());
1975 if (homogeneousPrologEpilog(MF)) {
1979 for (
auto &RPI : RegPairs) {
1985 MBB.addLiveIn(RPI.Reg1);
1986 if (RPI.isPaired() && !MRI.
isReserved(RPI.Reg2))
1987 MBB.addLiveIn(RPI.Reg2);
1991 bool PTrueCreated =
false;
2007 unsigned Size =
TRI->getSpillSize(*RPI.RC);
2008 Align Alignment =
TRI->getSpillAlign(*RPI.RC);
2010 case RegPairInfo::GPR:
2011 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
2013 case RegPairInfo::FPR64:
2014 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2016 case RegPairInfo::FPR128:
2017 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2019 case RegPairInfo::ZPR:
2020 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
2022 case RegPairInfo::PPR:
2023 StrOpc = AArch64::STR_PXI;
2025 case RegPairInfo::VG:
2026 StrOpc = AArch64::STRXui;
2032 if (X0Scratch != AArch64::NoRegister)
2038 if (Reg1 == AArch64::VG) {
2040 Reg1 = findScratchNonCalleeSaveRegister(&
MBB,
true);
2041 assert(Reg1 != AArch64::NoRegister);
2051 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
2052 AArch64::X0, LiveIn.PhysReg);
2060 RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
2062 TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));
2076 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
2078 dbgs() <<
", " << RPI.FrameIdx + 1;
2083 !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2084 "Windows unwdinding requires a consecutive (FP,LR) pair");
2088 unsigned FrameIdxReg1 = RPI.FrameIdx;
2089 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2095 if (RPI.isPaired() && RPI.isScalable()) {
2101 "Expects SVE2.1 or SME2 target and a predicate register");
2102#ifdef EXPENSIVE_CHECKS
2103 auto IsPPR = [](
const RegPairInfo &c) {
2104 return c.Reg1 == RegPairInfo::PPR;
2106 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
2107 auto IsZPR = [](
const RegPairInfo &c) {
2108 return c.Type == RegPairInfo::ZPR;
2110 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
2111 assert(!(PPRBegin < ZPRBegin) &&
2112 "Expected callee save predicate to be handled first");
2114 if (!PTrueCreated) {
2115 PTrueCreated =
true;
2121 MBB.addLiveIn(Reg1);
2123 MBB.addLiveIn(Reg2);
2124 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
2141 MBB.addLiveIn(Reg1);
2142 if (RPI.isPaired()) {
2144 MBB.addLiveIn(Reg2);
2163 if (RPI.Type == RegPairInfo::ZPR) {
2167 }
else if (RPI.Type == RegPairInfo::PPR) {
2187 DL =
MBBI->getDebugLoc();
2190 if (homogeneousPrologEpilog(MF, &
MBB)) {
2193 for (
auto &RPI : RegPairs) {
2201 auto IsPPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::PPR; };
2203 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.
end(), IsPPR);
2204 std::reverse(PPRBegin, PPREnd);
2205 auto IsZPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::ZPR; };
2207 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.
end(), IsZPR);
2208 std::reverse(ZPRBegin, ZPREnd);
2210 bool PTrueCreated =
false;
2211 for (
const RegPairInfo &RPI : RegPairs) {
2224 unsigned Size =
TRI->getSpillSize(*RPI.RC);
2225 Align Alignment =
TRI->getSpillAlign(*RPI.RC);
2227 case RegPairInfo::GPR:
2228 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2230 case RegPairInfo::FPR64:
2231 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2233 case RegPairInfo::FPR128:
2234 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2236 case RegPairInfo::ZPR:
2237 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
2239 case RegPairInfo::PPR:
2240 LdrOpc = AArch64::LDR_PXI;
2242 case RegPairInfo::VG:
2249 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
2251 dbgs() <<
", " << RPI.FrameIdx + 1;
2258 unsigned FrameIdxReg1 = RPI.FrameIdx;
2259 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2266 if (RPI.isPaired() && RPI.isScalable()) {
2271 "Expects SVE2.1 or SME2 target and a predicate register");
2272#ifdef EXPENSIVE_CHECKS
2273 assert(!(PPRBegin < ZPRBegin) &&
2274 "Expected callee save predicate to be handled first");
2276 if (!PTrueCreated) {
2277 PTrueCreated =
true;
2282 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
2299 if (RPI.isPaired()) {
2326 return std::optional<int>(PSV->getFrameIndex());
2337 return std::nullopt;
2343 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
2344 return std::nullopt;
2351 return AArch64::PPRRegClass.contains(
MI.getOperand(0).getReg());
2357void AArch64FrameLowering::determineStackHazardSlot(
2360 auto *AFI = MF.
getInfo<AArch64FunctionInfo>();
2361 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
2375 return AArch64::FPR64RegClass.contains(Reg) ||
2376 AArch64::FPR128RegClass.contains(Reg) ||
2377 AArch64::ZPRRegClass.contains(Reg);
2380 return AArch64::PPRRegClass.contains(Reg);
2382 bool HasFPRStackObjects =
false;
2383 bool HasPPRStackObjects =
false;
2385 enum SlotType : uint8_t {
2396 for (
auto &
MBB : MF) {
2397 for (
auto &
MI :
MBB) {
2399 if (!FI || FI < 0 || FI >
int(SlotTypes.size()))
2406 ? SlotType::ZPRorFPR
2412 for (
int FI = 0; FI < int(SlotTypes.size()); ++FI) {
2413 HasFPRStackObjects |= SlotTypes[FI] == SlotType::ZPRorFPR;
2416 if (SlotTypes[FI] == SlotType::PPR) {
2418 HasPPRStackObjects =
true;
2423 if (HasFPRCSRs || HasFPRStackObjects) {
2426 << StackHazardSize <<
"\n");
2437 LLVM_DEBUG(
dbgs() <<
"Using SplitSVEObjects for SVE CC function\n");
2443 LLVM_DEBUG(
dbgs() <<
"Determining if SplitSVEObjects should be used in "
2444 "non-SVE CC function...\n");
2451 <<
"Calling convention is not supported with SplitSVEObjects\n");
2455 if (!HasPPRCSRs && !HasPPRStackObjects) {
2457 dbgs() <<
"Not using SplitSVEObjects as no PPRs are on the stack\n");
2461 if (!HasFPRCSRs && !HasFPRStackObjects) {
2464 <<
"Not using SplitSVEObjects as no FPRs or ZPRs are on the stack\n");
2468 [[maybe_unused]]
const AArch64Subtarget &Subtarget =
2469 MF.getSubtarget<AArch64Subtarget>();
2471 "Expected SVE to be available for PPRs");
2473 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2477 BitVector FPRZRegs(SavedRegs.
size());
2478 for (
size_t Reg = 0,
E = SavedRegs.
size(); HasFPRCSRs &&
Reg <
E; ++
Reg) {
2479 BitVector::reference RegBit = SavedRegs[
Reg];
2482 unsigned SubRegIdx = 0;
2484 SubRegIdx = AArch64::dsub;
2486 SubRegIdx = AArch64::zsub;
2493 TRI->getMatchingSuperReg(
Reg, SubRegIdx, &AArch64::ZPRRegClass);
2496 SavedRegs |= FPRZRegs;
2516 unsigned UnspilledCSGPR = AArch64::NoRegister;
2517 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2523 RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister() :
MCRegister();
2525 unsigned ExtraCSSpill = 0;
2526 bool HasUnpairedGPR64 =
false;
2527 bool HasPairZReg =
false;
2528 BitVector UserReservedRegs = RegInfo->getUserReservedRegs(MF);
2529 BitVector ReservedRegs = RegInfo->getReservedRegs(MF);
2532 for (
unsigned i = 0; CSRegs[i]; ++i) {
2536 if (Reg == BasePointerReg)
2541 if (UserReservedRegs[Reg]) {
2542 SavedRegs.
reset(Reg);
2546 bool RegUsed = SavedRegs.
test(Reg);
2548 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
2549 if (RegIsGPR64 || AArch64::FPR64RegClass.
contains(Reg) ||
2550 AArch64::FPR128RegClass.
contains(Reg)) {
2553 if (HasUnpairedGPR64)
2554 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
2556 PairedReg = CSRegs[i ^ 1];
2563 if (RegIsGPR64 && !AArch64::GPR64RegClass.
contains(PairedReg)) {
2564 PairedReg = AArch64::NoRegister;
2565 HasUnpairedGPR64 =
true;
2567 assert(PairedReg == AArch64::NoRegister ||
2568 AArch64::GPR64RegClass.
contains(Reg, PairedReg) ||
2569 AArch64::FPR64RegClass.
contains(Reg, PairedReg) ||
2570 AArch64::FPR128RegClass.
contains(Reg, PairedReg));
2573 if (AArch64::GPR64RegClass.
contains(Reg) && !ReservedRegs[Reg]) {
2574 UnspilledCSGPR = Reg;
2575 UnspilledCSGPRPaired = PairedReg;
2583 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2584 !SavedRegs.
test(PairedReg)) {
2585 SavedRegs.
set(PairedReg);
2586 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
2587 !ReservedRegs[PairedReg])
2588 ExtraCSSpill = PairedReg;
2591 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
2592 SavedRegs.
test(CSRegs[i ^ 1]));
2600 if (PnReg.isValid())
2606 SavedRegs.
set(AArch64::P8);
2611 "Predicate cannot be a reserved register");
2621 SavedRegs.
set(AArch64::X18);
2627 determineStackHazardSlot(MF, SavedRegs);
2630 unsigned CSStackSize = 0;
2631 unsigned ZPRCSStackSize = 0;
2632 unsigned PPRCSStackSize = 0;
2634 for (
unsigned Reg : SavedRegs.
set_bits()) {
2636 assert(RC &&
"expected register class!");
2637 auto SpillSize =
TRI->getSpillSize(*RC);
2638 bool IsZPR = AArch64::ZPRRegClass.contains(Reg);
2639 bool IsPPR = !IsZPR && AArch64::PPRRegClass.contains(Reg);
2641 ZPRCSStackSize += SpillSize;
2643 PPRCSStackSize += SpillSize;
2645 CSStackSize += SpillSize;
2651 unsigned NumSavedRegs = SavedRegs.
count();
2664 SavedRegs.
set(AArch64::LR);
2669 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2670 SavedRegs.
set(AArch64::FP);
2671 SavedRegs.
set(AArch64::LR);
2675 dbgs() <<
"*** determineCalleeSaves\nSaved CSRs:";
2676 for (
unsigned Reg : SavedRegs.
set_bits())
2682 auto [ZPRLocalStackSize, PPRLocalStackSize] =
2684 uint64_t SVELocals = ZPRLocalStackSize + PPRLocalStackSize;
2686 alignTo(ZPRCSStackSize + PPRCSStackSize + SVELocals, 16);
2687 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
2696 int64_t CalleeStackUsed = 0;
2699 if (FixedOff > CalleeStackUsed)
2700 CalleeStackUsed = FixedOff;
2704 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
2705 CalleeStackUsed) > EstimatedStackSizeLimit;
2706 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2716 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2718 <<
" to get a scratch register.\n");
2719 SavedRegs.
set(UnspilledCSGPR);
2720 ExtraCSSpill = UnspilledCSGPR;
2725 if (producePairRegisters(MF)) {
2726 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
2729 SavedRegs.
reset(UnspilledCSGPR);
2730 ExtraCSSpill = AArch64::NoRegister;
2733 SavedRegs.
set(UnspilledCSGPRPaired);
2742 unsigned Size =
TRI->getSpillSize(RC);
2743 Align Alignment =
TRI->getSpillAlign(RC);
2745 RS->addScavengingFrameIndex(FI);
2746 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
2747 <<
" as the emergency spill slot.\n");
2752 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
2761 << EstimatedStackSize + AlignedCSStackSize <<
" bytes.\n");
2765 "Should not invalidate callee saved info");
2776 std::vector<CalleeSavedInfo> &CSI)
const {
2785 std::reverse(CSI.begin(), CSI.end());
2805 find_if(CSI, [](
auto &Info) {
return Info.getReg() == AArch64::LR; });
2806 if (It != CSI.end())
2807 CSI.insert(It, VGInfo);
2809 CSI.push_back(VGInfo);
2813 int HazardSlotIndex = std::numeric_limits<int>::max();
2814 for (
auto &CS : CSI) {
2822 assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
2823 "Unexpected register order for hazard slot");
2825 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
2831 unsigned Size = RegInfo->getSpillSize(*RC);
2832 Align Alignment(RegInfo->getSpillAlign(*RC));
2834 CS.setFrameIdx(FrameIdx);
2839 Reg == AArch64::FP) {
2849 HazardSlotIndex == std::numeric_limits<int>::max()) {
2851 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
2878 int &Min,
int &Max) {
2879 Min = std::numeric_limits<int>::max();
2880 Max = std::numeric_limits<int>::min();
2886 for (
auto &CS : CSI) {
2887 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
2888 AArch64::PPRRegClass.contains(CS.getReg())) {
2889 assert((Max == std::numeric_limits<int>::min() ||
2890 Max + 1 == CS.getFrameIdx()) &&
2891 "SVE CalleeSaves are not consecutive");
2892 Min = std::min(Min, CS.getFrameIdx());
2893 Max = std::max(Max, CS.getFrameIdx());
2896 return Min != std::numeric_limits<int>::max();
2909 uint64_t &ZPRStackTop = SVEStack.ZPRStackSize;
2917 "SVE vectors should never be passed on the stack by value, only by "
2921 auto AllocateObject = [&](
int FI) {
2930 if (Alignment >
Align(16))
2932 "Alignment of scalable vectors > 16 bytes is not yet supported");
2935 StackTop =
alignTo(StackTop, Alignment);
2937 assert(StackTop < (
uint64_t)std::numeric_limits<int64_t>::max() &&
2938 "SVE StackTop far too large?!");
2940 int64_t
Offset = -int64_t(StackTop);
2948 int MinCSFrameIndex, MaxCSFrameIndex;
2950 for (
int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI)
2963 int StackProtectorFI = -1;
2967 ObjectsToAllocate.
push_back(StackProtectorFI);
2983 for (
unsigned FI : ObjectsToAllocate)
2998 "Upwards growing stack unsupported");
3013 int64_t CurrentOffset =
3017 int FrameIndex =
H.CatchObj.FrameIndex;
3018 if ((FrameIndex != INT_MAX) && MFI.
getObjectOffset(FrameIndex) == 0) {
3029 int64_t UnwindHelpOffset =
alignTo(CurrentOffset + 8,
Align(16));
3030 assert(UnwindHelpOffset == getFixedObjectSize(MF, AFI,
true,
3032 "UnwindHelpOffset must be at the start of the fixed object area");
3035 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3045 RS->enterBasicBlockEnd(
MBB);
3047 Register DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
3048 assert(DstReg &&
"There must be a free register after frame setup");
3059struct TagStoreInstr {
3067 MachineFunction *MF;
3068 MachineBasicBlock *
MBB;
3069 MachineRegisterInfo *MRI;
3078 StackOffset FrameRegOffset;
3082 std::optional<int64_t> FrameRegUpdate;
3084 unsigned FrameRegUpdateFlags;
3094 TagStoreEdit(MachineBasicBlock *
MBB,
bool ZeroData)
3095 :
MBB(
MBB), ZeroData(ZeroData) {
3101 void addInstruction(TagStoreInstr
I) {
3103 TagStores.
back().Offset + TagStores.
back().Size ==
I.Offset) &&
3104 "Non-adjacent tag store instructions.");
3107 void clear() { TagStores.
clear(); }
3112 const AArch64FrameLowering *TFI,
bool TryMergeSPUpdate);
3119 const int64_t kMinOffset = -256 * 16;
3120 const int64_t kMaxOffset = 255 * 16;
3123 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
3124 if (BaseRegOffsetBytes < kMinOffset ||
3125 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset ||
3129 BaseRegOffsetBytes % 16 != 0) {
3134 BaseRegOffsetBytes = 0;
3139 int64_t InstrSize = (
Size > 16) ? 32 : 16;
3142 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
3144 assert(BaseRegOffsetBytes % 16 == 0);
3148 .
addImm(BaseRegOffsetBytes / 16)
3152 if (BaseRegOffsetBytes == 0)
3154 BaseRegOffsetBytes += InstrSize;
3173 int64_t LoopSize =
Size;
3176 if (FrameRegUpdate && *FrameRegUpdate)
3177 LoopSize -= LoopSize % 32;
3179 TII->get(ZeroData ? AArch64::STZGloop_wback
3180 : AArch64::STGloop_wback))
3187 LoopI->
setFlags(FrameRegUpdateFlags);
3189 int64_t ExtraBaseRegUpdate =
3190 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
3191 LLVM_DEBUG(
dbgs() <<
"TagStoreEdit::emitLoop: LoopSize=" << LoopSize
3192 <<
", Size=" <<
Size
3193 <<
", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
3194 <<
", FrameRegUpdate=" << FrameRegUpdate
3195 <<
", FrameRegOffset.getFixed()="
3196 << FrameRegOffset.
getFixed() <<
"\n");
3197 if (LoopSize <
Size) {
3201 int64_t STGOffset = ExtraBaseRegUpdate + 16;
3202 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
3203 "STG immediate out of range");
3205 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3212 }
else if (ExtraBaseRegUpdate) {
3214 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
3215 assert(AddSubOffset <= 4095 &&
"ADD/SUB immediate out of range");
3218 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3231 int64_t
Size, int64_t *TotalOffset) {
3233 if ((
MI.getOpcode() == AArch64::ADDXri ||
3234 MI.getOpcode() == AArch64::SUBXri) &&
3235 MI.getOperand(0).getReg() ==
Reg &&
MI.getOperand(1).getReg() ==
Reg) {
3237 int64_t
Offset =
MI.getOperand(2).getImm() << Shift;
3238 if (
MI.getOpcode() == AArch64::SUBXri)
3249 const int64_t kMaxOffset = 4080 - 16;
3251 const int64_t kMinOffset = -4095;
3252 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
3253 PostOffset % 16 == 0) {
3264 for (
auto &TS : TSE) {
3268 if (
MI->memoperands_empty()) {
3272 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
3278 bool TryMergeSPUpdate) {
3279 if (TagStores.
empty())
3281 TagStoreInstr &FirstTagStore = TagStores[0];
3282 TagStoreInstr &LastTagStore = TagStores[TagStores.
size() - 1];
3283 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3284 DL = TagStores[0].MI->getDebugLoc();
3288 *MF, FirstTagStore.Offset,
false ,
3292 FrameRegUpdate = std::nullopt;
3294 mergeMemRefs(TagStores, CombinedMemRefs);
3297 dbgs() <<
"Replacing adjacent STG instructions:\n";
3298 for (
const auto &Instr : TagStores) {
3307 if (TagStores.
size() < 2)
3309 emitUnrolled(InsertI);
3312 int64_t TotalOffset = 0;
3313 if (TryMergeSPUpdate) {
3319 if (InsertI !=
MBB->
end() &&
3320 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
3322 UpdateInstr = &*InsertI++;
3328 if (!UpdateInstr && TagStores.
size() < 2)
3332 FrameRegUpdate = TotalOffset;
3333 FrameRegUpdateFlags = UpdateInstr->
getFlags();
3340 for (
auto &TS : TagStores)
3341 TS.MI->eraseFromParent();
3345 int64_t &
Size,
bool &ZeroData) {
3349 unsigned Opcode =
MI.getOpcode();
3350 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
3351 Opcode == AArch64::STZ2Gi);
3353 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3354 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
3356 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
3359 Size =
MI.getOperand(2).getImm();
3363 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
3365 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
3370 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
3374 16 *
MI.getOperand(2).getImm();
3378static size_t countAvailableScavengerSlots(
LivePhysRegs &LiveRegs,
3383 return LiveRegs.available(MRI,
Reg);
3386 size_t NumEmergencySlots = 0;
3388 NumEmergencySlots =
RS->getNumScavengingFrameIndices();
3390 return FreeGPRs + NumEmergencySlots;
3409 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
3415 constexpr int kScanLimit = 10;
3418 NextI !=
E &&
Count < kScanLimit; ++NextI) {
3427 if (isMergeableStackTaggingInstruction(
MI,
Offset,
Size, ZeroData)) {
3428 if (ZeroData != FirstZeroData)
3436 if (!
MI.isTransient())
3445 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects() ||
MI.isCall())
3461 LiveRegs.addLiveOuts(*
MBB);
3466 LiveRegs.stepBackward(*
I);
3469 if (LiveRegs.contains(AArch64::NZCV))
3480 dbgs() <<
"Failed to merge MTE stack tagging instructions into loop "
3481 <<
"due to high register pressure.\n");
3486 [](
const TagStoreInstr &
Left,
const TagStoreInstr &
Right) {
3491 int64_t CurOffset = Instrs[0].Offset;
3492 for (
auto &Instr : Instrs) {
3493 if (CurOffset >
Instr.Offset)
3500 TagStoreEdit TSE(
MBB, FirstZeroData);
3501 std::optional<int64_t> EndOffset;
3502 for (
auto &Instr : Instrs) {
3503 if (EndOffset && *EndOffset !=
Instr.Offset) {
3505 TSE.emitCode(InsertI, TFI,
false);
3509 TSE.addInstruction(Instr);
3528 II = tryMergeAdjacentSTG(
II,
this, RS);
3535 shouldSignReturnAddressEverywhere(MF))
3544 bool IgnoreSPUpdates)
const {
3546 if (IgnoreSPUpdates) {
3549 FrameReg = AArch64::SP;
3559 FrameReg = AArch64::SP;
3584 bool IsValid =
false;
3586 int ObjectIndex = 0;
3588 int GroupIndex = -1;
3590 bool ObjectFirst =
false;
3593 bool GroupFirst =
false;
3598 enum { AccessFPR = 1, AccessHazard = 2, AccessGPR = 4 };
3602 SmallVector<int, 8> CurrentMembers;
3603 int NextGroupIndex = 0;
3604 std::vector<FrameObject> &Objects;
3607 GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
3608 void AddMember(
int Index) { CurrentMembers.
push_back(Index); }
3609 void EndCurrentGroup() {
3610 if (CurrentMembers.
size() > 1) {
3615 for (
int Index : CurrentMembers) {
3616 Objects[
Index].GroupIndex = NextGroupIndex;
3622 CurrentMembers.clear();
3626bool FrameObjectCompare(
const FrameObject &
A,
const FrameObject &
B) {
3648 return std::make_tuple(!
A.IsValid,
A.Accesses,
A.ObjectFirst,
A.GroupFirst,
3649 A.GroupIndex,
A.ObjectIndex) <
3650 std::make_tuple(!
B.IsValid,
B.Accesses,
B.ObjectFirst,
B.GroupFirst,
3651 B.GroupIndex,
B.ObjectIndex);
3660 ObjectsToAllocate.
empty())
3665 for (
auto &Obj : ObjectsToAllocate) {
3666 FrameObjects[Obj].IsValid =
true;
3667 FrameObjects[Obj].ObjectIndex = Obj;
3672 GroupBuilder GB(FrameObjects);
3673 for (
auto &
MBB : MF) {
3674 for (
auto &
MI :
MBB) {
3675 if (
MI.isDebugInstr())
3680 if (FI && *FI >= 0 && *FI < (
int)FrameObjects.size()) {
3683 FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
3685 FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
3690 switch (
MI.getOpcode()) {
3691 case AArch64::STGloop:
3692 case AArch64::STZGloop:
3696 case AArch64::STZGi:
3697 case AArch64::ST2Gi:
3698 case AArch64::STZ2Gi:
3711 FrameObjects[FI].IsValid)
3719 GB.AddMember(TaggedFI);
3721 GB.EndCurrentGroup();
3724 GB.EndCurrentGroup();
3729 FrameObject::AccessHazard;
3731 for (
auto &Obj : FrameObjects)
3732 if (!Obj.Accesses ||
3733 Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
3734 Obj.Accesses = FrameObject::AccessGPR;
3743 FrameObjects[*TBPI].ObjectFirst =
true;
3744 FrameObjects[*TBPI].GroupFirst =
true;
3745 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3746 if (FirstGroupIndex >= 0)
3747 for (FrameObject &Object : FrameObjects)
3748 if (Object.GroupIndex == FirstGroupIndex)
3749 Object.GroupFirst =
true;
3755 for (
auto &Obj : FrameObjects) {
3759 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3763 dbgs() <<
"Final frame order:\n";
3764 for (
auto &Obj : FrameObjects) {
3767 dbgs() <<
" " << Obj.ObjectIndex <<
": group " << Obj.GroupIndex;
3768 if (Obj.ObjectFirst)
3769 dbgs() <<
", first";
3771 dbgs() <<
", group-first";
3782AArch64FrameLowering::inlineStackProbeLoopExactMultiple(
3793 MF.
insert(MBBInsertPoint, LoopMBB);
3795 MF.
insert(MBBInsertPoint, ExitMBB);
3830 MBB.addSuccessor(LoopMBB);
3834 return ExitMBB->
begin();
3837void AArch64FrameLowering::inlineStackProbeFixed(
3842 const AArch64InstrInfo *
TII =
3844 AArch64FunctionInfo *AFI = MF.
getInfo<AArch64FunctionInfo>();
3849 int64_t ProbeSize = MF.
getInfo<AArch64FunctionInfo>()->getStackProbeSize();
3850 int64_t NumBlocks = FrameSize / ProbeSize;
3851 int64_t ResidualSize = FrameSize % ProbeSize;
3853 LLVM_DEBUG(
dbgs() <<
"Stack probing: total " << FrameSize <<
" bytes, "
3854 << NumBlocks <<
" blocks of " << ProbeSize
3855 <<
" bytes, plus " << ResidualSize <<
" bytes\n");
3860 for (
int i = 0; i < NumBlocks; ++i) {
3866 EmitAsyncCFI && !HasFP, CFAOffset);
3879 }
else if (NumBlocks != 0) {
3885 EmitAsyncCFI && !HasFP, CFAOffset);
3887 MBBI = inlineStackProbeLoopExactMultiple(
MBBI, ProbeSize, ScratchReg);
3889 if (EmitAsyncCFI && !HasFP) {
3892 .buildDefCFARegister(AArch64::SP);
3896 if (ResidualSize != 0) {
3902 EmitAsyncCFI && !HasFP, CFAOffset);
3923 SmallVector<MachineInstr *, 4> ToReplace;
3924 for (MachineInstr &
MI :
MBB)
3925 if (
MI.getOpcode() == AArch64::PROBED_STACKALLOC ||
3926 MI.getOpcode() == AArch64::PROBED_STACKALLOC_VAR)
3929 for (MachineInstr *
MI : ToReplace) {
3930 if (
MI->getOpcode() == AArch64::PROBED_STACKALLOC) {
3931 Register ScratchReg =
MI->getOperand(0).getReg();
3932 int64_t FrameSize =
MI->getOperand(1).getImm();
3934 MI->getOperand(3).getImm());
3935 inlineStackProbeFixed(
MI->getIterator(), ScratchReg, FrameSize,
3938 assert(
MI->getOpcode() == AArch64::PROBED_STACKALLOC_VAR &&
3939 "Stack probe pseudo-instruction expected");
3940 const AArch64InstrInfo *
TII =
3941 MI->getMF()->getSubtarget<AArch64Subtarget>().getInstrInfo();
3942 Register TargetReg =
MI->getOperand(0).getReg();
3943 (void)
TII->probedStackAlloc(
MI->getIterator(), TargetReg,
true);
3945 MI->eraseFromParent();
3965 return std::make_tuple(
start(),
Idx) <
3966 std::make_tuple(Rhs.
start(), Rhs.
Idx);
3996 << (
Offset.getFixed() < 0 ?
"" :
"+") <<
Offset.getFixed();
3997 if (
Offset.getScalable())
3998 OS << (
Offset.getScalable() < 0 ?
"" :
"+") <<
Offset.getScalable()
4009void AArch64FrameLowering::emitRemarks(
4012 auto *AFI = MF.
getInfo<AArch64FunctionInfo>();
4017 const uint64_t HazardSize =
4020 if (HazardSize == 0)
4028 std::vector<StackAccess> StackAccesses(MFI.
getNumObjects());
4030 size_t NumFPLdSt = 0;
4031 size_t NumNonFPLdSt = 0;
4034 for (
const MachineBasicBlock &
MBB : MF) {
4035 for (
const MachineInstr &
MI :
MBB) {
4036 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
4038 for (MachineMemOperand *MMO :
MI.memoperands()) {
4045 StackAccesses[ArrIdx].Idx = FrameIdx;
4046 StackAccesses[ArrIdx].Offset =
4057 StackAccesses[ArrIdx].AccessTypes |= RegTy;
4068 if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
4079 if (StackAccesses.front().isMixed())
4080 MixedObjects.push_back(&StackAccesses.front());
4082 for (
auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
4084 const auto &
First = *It;
4085 const auto &Second = *(It + 1);
4087 if (Second.isMixed())
4088 MixedObjects.push_back(&Second);
4090 if ((
First.isSME() && Second.isCPU()) ||
4091 (
First.isCPU() && Second.isSME())) {
4092 uint64_t Distance =
static_cast<uint64_t
>(Second.start() -
First.end());
4093 if (Distance < HazardSize)
4098 auto EmitRemark = [&](llvm::StringRef Str) {
4100 auto R = MachineOptimizationRemarkAnalysis(
4101 "sme",
"StackHazard", MF.getFunction().getSubprogram(), &MF.front());
4102 return R <<
formatv(
"stack hazard in '{0}': ", MF.getName()).str() << Str;
4106 for (
const auto &
P : HazardPairs)
4107 EmitRemark(
formatv(
"{0} is too close to {1}", *
P.first, *
P.second).str());
4109 for (
const auto *Obj : MixedObjects)
4111 formatv(
"{0} accessed by both GP and FP instructions", *Obj).str());
static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB)
static const unsigned DefaultSafeSPDisplacement
This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...
static RegState getPrologueDeath(MachineFunction &MF, unsigned Reg)
static bool produceCompactUnwindFrame(const AArch64FrameLowering &, MachineFunction &MF)
static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)
bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, MachineFunction &MF)
static std::optional< int > getLdStFrameID(const MachineInstr &MI, const MachineFrameInfo &MFI)
static cl::opt< bool > SplitSVEObjects("aarch64-split-sve-objects", cl::desc("Split allocation of ZPR & PPR objects"), cl::init(true), cl::Hidden)
static cl::opt< bool > StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming", cl::init(false), cl::Hidden)
void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool NeedsFrameRecord)
static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)
static cl::opt< bool > DisableMultiVectorSpillFill("aarch64-disable-multivector-spill-fill", cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)
static bool invalidateRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, const TargetRegisterInfo *TRI)
Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))
static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, const MachineFunction &MF)
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile, unsigned SpillCount, unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, const TargetRegisterInfo *TRI)
static SVEStackSizes determineSVEStackSizes(MachineFunction &MF, AssignObjectOffsets AssignOffsets)
Process all the SVE stack objects and the SVE stack size and offsets for each object.
static bool isTargetWindows(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)
returns true if there are any SVE callee saves.
static cl::opt< unsigned > StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), cl::Hidden)
static MCRegister getRegisterOrZero(MCRegister Reg, bool HasSVE)
static unsigned getStackHazardSize(const MachineFunction &MF)
MCRegister findFreePredicateReg(BitVector &SavedRegs)
static bool isPPRAccess(const MachineInstr &MI)
static std::optional< int > getMMOFrameID(MachineMemOperand *MMO, const MachineFrameInfo &MFI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
static const int kSetTagLoopThreshold
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
#define CASE(ATTRNAME, AANAME,...)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
const HexagonInstrInfo * TII
static std::string getTypeString(Type *T)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
void emitEpilogue()
Emit the epilogue.
StackOffset getSVEStackSize(const MachineFunction &MF) const
Returns the size of the entire SVE stackframe (PPRs + ZPRs).
StackOffset getZPRStackSize(const MachineFunction &MF) const
Returns the size of the entire ZPR stackframe (calleesaves + spills).
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool enableStackSlotScavenging(const MachineFunction &MF) const override
Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool enableFullCFIFixup(const MachineFunction &MF) const override
enableFullCFIFixup - Returns true if we may need to fix the unwind information such that it is accura...
StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI) const override
getFrameIndexReferenceFromSP - This method returns the offset from the stack pointer to the slot of t...
bool enableCFIFixup(const MachineFunction &MF) const override
Returns true if we may need to fix the unwind information for the function.
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
TargetStackID::Value getStackIDForScalableVectors() const override
Returns the StackID that scalable vectors should be associated with.
friend class AArch64PrologueEmitter
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
friend class AArch64EpilogueEmitter
void resetCFIToInitialState(MachineBasicBlock &MBB) const override
Emit CFI instructions that recreate the state of the unwind information upon function entry.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
bool hasSVECalleeSavesAboveFrameRecord(const MachineFunction &MF) const
StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, TargetStackID::Value StackID, Register &FrameReg, bool PreferFP, bool ForSimm) const
bool canUseRedZone(const MachineFunction &MF) const
Can this function use the red zone for local allocations.
bool needsWinCFI(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
Should the Frame Pointer be reserved for the current function?
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const
Funclets only need to account for space for the callee saved registers, as the locals are accounted f...
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
StackOffset getPPRStackSize(const MachineFunction &MF) const
Returns the size of the entire PPR stackframe (calleesaves + spills + hazard padding).
int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB) const
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...
bool requiresSaveVG(const MachineFunction &MF) const
void emitPacRetPlusLeafHardening(MachineFunction &MF) const
Harden the entire function with pac-ret.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
unsigned getPPRCalleeSavedStackSize() const
void setHasStackFrame(bool s)
void setSwiftAsyncContextFrameIdx(int FI)
unsigned getTailCallReservedStack() const
unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const
void setCalleeSaveBaseToFrameRecordOffset(int Offset)
bool hasStackProbing() const
unsigned getArgumentStackToRestore() const
void setCalleeSaveStackHasFreeSpace(bool s)
int getCalleeSaveBaseToFrameRecordOffset() const
SignReturnAddress getSignReturnAddressCondition() const
bool hasStreamingModeChanges() const
void setPredicateRegForFillSpill(unsigned Reg)
int getStackHazardSlotIndex() const
void setCalleeSavedStackSize(unsigned Size)
void setSplitSVEObjects(bool s)
bool hasStackFrame() const
void setStackSizeSVE(uint64_t ZPR, uint64_t PPR)
std::optional< int > getTaggedBasePointerIndex() const
SMEAttrs getSMEFnAttrs() const
uint64_t getLocalStackSize() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
unsigned getVarArgsGPRSize() const
uint64_t getStackSizePPR() const
bool hasSwiftAsyncContext() const
bool hasStackHazardSlotIndex() const
void setStackHazardSlotIndex(int Index)
unsigned getZPRCalleeSavedStackSize() const
void setStackHazardCSRSlotIndex(int Index)
unsigned getPredicateRegForFillSpill() const
void setSVECalleeSavedStackSize(unsigned ZPR, unsigned PPR)
bool hasCalculatedStackSizeSVE() const
uint64_t getStackSizeZPR() const
bool hasSVEStackSize() const
bool isStackHazardIncludedInCalleeSaveArea() const
unsigned getSVECalleeSavedStackSize() const
bool hasSplitSVEObjects() const
bool needsAsyncDwarfUnwindInfo(const MachineFunction &MF) const
bool hasCalleeSaveStackFreeSpace() const
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
void emitPrologue()
Emit the prologue.
bool isTargetWindows() const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64InstrInfo * getInstrInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isTargetMachO() const
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
bool isStreaming() const
Returns true if the function has a streaming body.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
unsigned getRedZoneSize(const Function &F) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
BitVector & reset()
Reset all bits in the bitvector.
size_type count() const
Returns the number of bits which are set.
BitVector & set()
Set all bits in the bitvector.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
Returns the number of bits in this bitvector.
Helper class for creating CFI instructions and inserting them into MIR.
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
bool usesWindowsCFI() const
Wrapper class representing physical registers. Should be passed by value.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MachineInstr & instr_back()
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
reverse_iterator rbegin()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool isCalleeSavedObjectIndex(int ObjectIdx) const
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasScalableStackID(int ObjectIdx) const
int getStackProtectorIndex() const
Return the index for the stack protector object.
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isMaxCallFrameSizeComputed() const
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment, TargetStackID::Value StackID=TargetStackID::Default)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getNumObjects() const
Return the number of objects.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasStackProtectorIndex() const
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
void setIsCalleeSavedObjectIndex(int ObjectIdx, bool IsCalleeSaved)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool hasEHFunclets() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
void setFlags(unsigned flags)
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isLiveIn(Register Reg) const
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasStreamingInterface() const
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingBody() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
int64_t getFixed() const
Returns the fixed component of the stack.
int64_t getScalable() const
Returns the scalable component of the stack.
static StackOffset get(int64_t Fixed, int64_t Scalable)
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
virtual bool enableCFIFixup(const MachineFunction &MF) const
Returns true if we may need to fix the unwind information for the function.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Triple - Helper class for working with autoconf configuration names.
bool isOSBinFormatMachO() const
Tests whether the environment is MachO.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
const unsigned StackProbeMaxLoopUnroll
Maximum number of iterations to unroll for a constant size probing loop.
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AArch64_SVE_VectorCall
Used between AArch64 SVE functions.
@ PreserveMost
Used for runtime calls that preserves most registers.
@ CXX_FAST_TLS
Used for access functions.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ PreserveNone
Used for runtime calls that preserves none general registers.
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
@ C
The default llvm calling convention, compatible with C.
@ ScalablePredicateVector
initializer< Ty > init(const Ty &Val)
NodeAddr< InstrNode * > Instr
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
RegState
Flags to represent properties of register accesses.
@ Define
Register definition.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Count
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool operator<(const StackAccess &Rhs) const
void print(raw_ostream &OS) const
std::string getTypeString() const
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Pair of physical register and lane mask.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray