234 using namespace llvm;
236 #define DEBUG_TYPE "frame-info"
239 cl::desc(
"enable use of redzone on AArch64"),
244 cl::desc(
"reverse the CSR restore sequence"),
248 "stack-tagging-merge-settag",
258 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
259 "optimization (default = off)"));
261 STATISTIC(NumRedZoneFunctions,
"Number of functions using red zone");
272 bool IsTailCallReturn =
false;
274 unsigned RetOpcode =
MBBI->getOpcode();
275 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
276 RetOpcode == AArch64::TCRETURNri ||
277 RetOpcode == AArch64::TCRETURNriBTI;
281 int64_t ArgumentPopSize = 0;
282 if (IsTailCallReturn) {
288 ArgumentPopSize = StackAdjust.
getImm();
297 return ArgumentPopSize;
308 bool AArch64FrameLowering::homogeneousPrologEpilog(
338 bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
357 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
358 MI.getOpcode() == AArch64::ADDXri ||
359 MI.getOpcode() == AArch64::ADDSXri)
386 if (!IsWin64 || IsFunclet) {
394 const unsigned UnwindHelpObject = (MF.
hasEHFunclets() ? 8 : 0);
395 return alignTo(VarArgsArea + UnwindHelpObject, 16);
412 const unsigned RedZoneSize =
421 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
473 unsigned Opc =
I->getOpcode();
474 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
475 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
478 int64_t Amount =
I->getOperand(0).getImm();
486 if (CalleePopAmount == 0) {
497 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
501 }
else if (CalleePopAmount != 0) {
504 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
511 void AArch64FrameLowering::emitCalleeSavedGPRLocations(
525 for (
const auto &
Info : CSI) {
529 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
542 void AArch64FrameLowering::emitCalleeSavedSVELocations(
558 for (
const auto &
Info : CSI) {
564 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
599 const MCInstrDesc &CFIDesc =
TII.get(TargetOpcode::CFI_INSTRUCTION);
609 if (MFI.shouldSignReturnAddress(MF)) {
620 const std::vector<CalleeSavedInfo> &CSI =
622 for (
const auto &
Info : CSI) {
646 for (
const auto &
Info : CSI) {
664 void AArch64FrameLowering::emitCalleeSavedGPRRestores(
669 void AArch64FrameLowering::emitCalleeSavedSVERestores(
683 case AArch64::W##n: \
684 case AArch64::X##n: \
709 case AArch64::B##n: \
710 case AArch64::H##n: \
711 case AArch64::S##n: \
712 case AArch64::D##n: \
713 case AArch64::Q##n: \
714 return HasSVE ? AArch64::Z##n : AArch64::Q##n
751 void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
767 bool HasSVE = STI.hasSVE();
772 GPRsToZero.set(XReg);
773 }
else if (AArch64::FPR128RegClass.
contains(
Reg) ||
780 FPRsToZero.set(XReg);
801 {AArch64::P0, AArch64::P1,
AArch64::P2, AArch64::P3, AArch64::P4,
802 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
803 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
805 if (RegsToZero[PReg])
836 for (
unsigned i = 0; CSRegs[
i]; ++
i)
844 for (
unsigned Reg : AArch64::GPR64RegClass) {
848 return AArch64::NoRegister;
859 if (!RegInfo->hasStackRealignment(*MF))
874 unsigned StackProbeSize =
875 F.getFnAttributeAsParsedInteger(
"stack-probe-size", 4096);
876 return (StackSizeInBytes >= StackProbeSize) &&
877 !
F.hasFnAttribute(
"no-stack-arg-probe");
883 F.needsUnwindTableEntry();
886 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
892 if (homogeneousPrologEpilog(MF))
915 if (MFI.hasVarSizedObjects())
918 if (RegInfo->hasStackRealignment(MF))
935 bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
937 if (!shouldCombineCSRLocalStackBump(*
MBB.
getParent(), StackBumpBytes))
947 while (LastI != Begin) {
949 if (LastI->isTransient())
954 switch (LastI->getOpcode()) {
955 case AArch64::STGloop:
956 case AArch64::STZGloop:
957 case AArch64::STGOffset:
958 case AArch64::STZGOffset:
959 case AArch64::ST2GOffset:
960 case AArch64::STZ2GOffset:
973 unsigned Opc =
MBBI->getOpcode();
977 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
978 int Imm =
MBBI->getOperand(ImmIdx).getImm();
986 case AArch64::LDPDpost:
989 case AArch64::STPDpre: {
990 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
991 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
992 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
999 case AArch64::LDPXpost:
1002 case AArch64::STPXpre: {
1005 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1006 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1010 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1011 .
addImm(RegInfo->getSEHRegNum(Reg0))
1012 .
addImm(RegInfo->getSEHRegNum(Reg1))
1017 case AArch64::LDRDpost:
1020 case AArch64::STRDpre: {
1021 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1022 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1028 case AArch64::LDRXpost:
1031 case AArch64::STRXpre: {
1032 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1039 case AArch64::STPDi:
1040 case AArch64::LDPDi: {
1041 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1042 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1050 case AArch64::STPXi:
1051 case AArch64::LDPXi: {
1054 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1060 .
addImm(RegInfo->getSEHRegNum(Reg0))
1061 .
addImm(RegInfo->getSEHRegNum(Reg1))
1066 case AArch64::STRXui:
1067 case AArch64::LDRXui: {
1068 int Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1075 case AArch64::STRDui:
1076 case AArch64::LDRDui: {
1077 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1091 unsigned LocalStackSize) {
1093 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1094 switch (
MBBI->getOpcode()) {
1097 case AArch64::SEH_SaveFPLR:
1098 case AArch64::SEH_SaveRegP:
1099 case AArch64::SEH_SaveReg:
1100 case AArch64::SEH_SaveFRegP:
1101 case AArch64::SEH_SaveFReg:
1102 ImmOpnd = &
MBBI->getOperand(ImmIdx);
1115 bool NeedsWinCFI,
bool *HasWinCFI,
bool EmitCFI,
1117 int CFAOffset = 0) {
1119 switch (
MBBI->getOpcode()) {
1122 case AArch64::STPXi:
1123 NewOpc = AArch64::STPXpre;
1125 case AArch64::STPDi:
1126 NewOpc = AArch64::STPDpre;
1128 case AArch64::STPQi:
1129 NewOpc = AArch64::STPQpre;
1131 case AArch64::STRXui:
1132 NewOpc = AArch64::STRXpre;
1134 case AArch64::STRDui:
1135 NewOpc = AArch64::STRDpre;
1137 case AArch64::STRQui:
1138 NewOpc = AArch64::STRQpre;
1140 case AArch64::LDPXi:
1141 NewOpc = AArch64::LDPXpost;
1143 case AArch64::LDPDi:
1144 NewOpc = AArch64::LDPDpost;
1146 case AArch64::LDPQi:
1147 NewOpc = AArch64::LDPQpost;
1149 case AArch64::LDRXui:
1150 NewOpc = AArch64::LDRXpost;
1152 case AArch64::LDRDui:
1153 NewOpc = AArch64::LDRDpost;
1155 case AArch64::LDRQui:
1156 NewOpc = AArch64::LDRQpost;
1161 auto SEH = std::next(
MBBI);
1163 SEH->eraseFromParent();
1168 int64_t MinOffset, MaxOffset;
1170 NewOpc, Scale,
Width, MinOffset, MaxOffset);
1177 if (
MBBI->getOperand(
MBBI->getNumOperands() - 1).getImm() != 0 ||
1178 CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
1181 false,
false,
nullptr, EmitCFI,
1184 return std::prev(
MBBI);
1191 unsigned OpndIdx = 0;
1192 for (
unsigned OpndEnd =
MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
1194 MIB.
add(
MBBI->getOperand(OpndIdx));
1196 assert(
MBBI->getOperand(OpndIdx).getImm() == 0 &&
1197 "Unexpected immediate offset in first/last callee-save save/restore "
1199 assert(
MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
1200 "Unexpected base register in callee-save save/restore instruction!");
1201 assert(CSStackSizeInc % Scale == 0);
1202 MIB.
addImm(CSStackSizeInc / (
int)Scale);
1233 unsigned Opc =
MI.getOpcode();
1236 case AArch64::STPXi:
1237 case AArch64::STRXui:
1238 case AArch64::STPDi:
1239 case AArch64::STRDui:
1240 case AArch64::LDPXi:
1241 case AArch64::LDRXui:
1242 case AArch64::LDPDi:
1243 case AArch64::LDRDui:
1246 case AArch64::STPQi:
1247 case AArch64::STRQui:
1248 case AArch64::LDPQi:
1249 case AArch64::LDRQui:
1256 unsigned OffsetIdx =
MI.getNumExplicitOperands() - 1;
1257 assert(
MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1258 "Unexpected base register in callee-save save/restore instruction!");
1262 assert(LocalStackSize % Scale == 0);
1263 OffsetOpnd.
setImm(OffsetOpnd.
getImm() + LocalStackSize / Scale);
1268 assert(
MBBI !=
MI.getParent()->end() &&
"Expecting a valid instruction");
1270 "Expecting a SEH instruction");
1281 switch (
I->getOpcode()) {
1284 case AArch64::STR_ZXI:
1285 case AArch64::STR_PXI:
1286 case AArch64::LDR_ZXI:
1287 case AArch64::LDR_PXI:
1296 [](
const auto &
Info) { return Info.getReg() == AArch64::LR; }) &&
1311 bool NeedsUnwindInfo) {
1327 if (NeedsUnwindInfo) {
1330 static const char CFIInst[] = {
1331 dwarf::DW_CFA_val_expression,
1334 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1335 static_cast<char>(-8) & 0x7f,
1338 nullptr,
StringRef(CFIInst,
sizeof(CFIInst))));
1378 bool HasFP =
hasFP(MF);
1380 bool HasWinCFI =
false;
1397 MFnI.needsDwarfUnwindInfo(MF));
1399 if (MFnI.shouldSignReturnAddress(MF)) {
1400 if (MFnI.shouldSignWithBKey()) {
1408 TII->get(MFnI.shouldSignWithBKey() ? AArch64::PACIBSP
1409 : AArch64::PACIASP))
1418 }
else if (NeedsWinCFI) {
1424 if (EmitCFI && MFnI.isMTETagged()) {
1487 assert(!HasFP &&
"unexpected function without stack frame but with FP");
1489 "unexpected function without stack frame but with SVE objects");
1498 ++NumRedZoneFunctions;
1531 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1532 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1533 if (CombineSPBump) {
1534 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1540 }
else if (HomPrologEpilog) {
1542 NumBytes -= PrologueSaveSize;
1543 }
else if (PrologueSaveSize != 0) {
1545 MBB,
MBBI,
DL,
TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
1547 NumBytes -= PrologueSaveSize;
1549 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1559 NeedsWinCFI, &HasWinCFI);
1564 if (!IsFunclet && HasFP) {
1576 bool HaveInitialContext =
Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1577 if (HaveInitialContext)
1580 .
addUse(HaveInitialContext ? AArch64::X22 : AArch64::XZR)
1586 if (HomPrologEpilog) {
1599 if (NeedsWinCFI && HasWinCFI) {
1604 NeedsWinCFI =
false;
1609 const int OffsetToFirstCalleeSaveFromFP =
1612 Register FramePtr = RegInfo->getFrameRegister(MF);
1613 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr,
true);
1615 nullptr,
Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1626 emitCalleeSavedGPRLocations(
MBB,
MBBI);
1629 const bool NeedsRealignment =
1630 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
1631 int64_t RealignmentPadding =
1637 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1645 if (NumBytes >= (1 << 28))
1647 "unwinding purposes");
1649 uint32_t LowNumWords = NumWords & 0xFFFF;
1656 if ((NumWords & 0xFFFF0000) != 0) {
1659 .
addImm((NumWords & 0xFFFF0000) >> 16)
1730 if (RealignmentPadding > 0) {
1733 .
addImm(RealignmentPadding)
1748 StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
1755 CalleeSavesBegin =
MBBI;
1759 CalleeSavesEnd =
MBBI;
1762 AllocateAfter = SVEStackSize - AllocateBefore;
1767 MBB, CalleeSavesBegin,
DL, AArch64::SP, AArch64::SP, -AllocateBefore,
TII,
1769 EmitCFI && !HasFP && AllocateBefore,
1773 emitCalleeSavedSVELocations(
MBB, CalleeSavesEnd);
1778 nullptr, EmitCFI && !HasFP && AllocateAfter,
1784 unsigned scratchSPReg = AArch64::SP;
1786 if (NeedsRealignment) {
1788 assert(scratchSPReg != AArch64::NoRegister);
1797 MBB,
MBBI,
DL, scratchSPReg, AArch64::SP,
1799 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1803 if (NeedsRealignment) {
1805 assert(scratchSPReg != AArch64::SP);
1832 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
1844 if (NeedsWinCFI && HasWinCFI) {
1852 if (IsFunclet &&
F.hasPersonalityFn()) {
1864 bool NeedsWinCFI,
bool *HasWinCFI) {
1866 if (!MFI.shouldSignReturnAddress(MF))
1874 DL =
MBBI->getDebugLoc();
1881 if (Subtarget.hasPAuth() &&
1883 MBBI !=
MBB.
end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR &&
1886 TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
1892 TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
1909 switch (
MI.getOpcode()) {
1928 bool HasWinCFI =
false;
1929 bool IsFunclet =
false;
1933 DL =
MBBI->getDebugLoc();
1945 TII->get(AArch64::SEH_EpilogEnd))
1965 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1973 if (homogeneousPrologEpilog(MF, &
MBB)) {
1977 auto HomogeneousEpilog = std::prev(LastPopI);
1978 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1979 LastPopI = HomogeneousEpilog;
1989 assert(AfterCSRPopSize == 0);
1992 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(
MBB, NumBytes);
1995 bool CombineAfterCSRBump =
false;
1996 if (!CombineSPBump && PrologueSaveSize != 0) {
1998 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
2000 Pop = std::prev(Pop);
2003 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2007 if (OffsetOp.
getImm() == 0 && AfterCSRPopSize >= 0) {
2009 MBB, Pop,
DL,
TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
2016 AfterCSRPopSize += PrologueSaveSize;
2017 CombineAfterCSRBump =
true;
2026 while (LastPopI != Begin) {
2032 }
else if (CombineSPBump)
2034 NeedsWinCFI, &HasWinCFI);
2076 if (CombineSPBump) {
2077 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
2080 if (EmitCFI &&
hasFP(MF)) {
2082 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2097 NumBytes -= PrologueSaveSize;
2098 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
2102 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2105 RestoreBegin = std::prev(RestoreEnd);
2106 while (RestoreBegin !=
MBB.
begin() &&
2115 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2116 DeallocateAfter = CalleeSavedSizeAsOffset;
2138 MBB, RestoreBegin,
DL, AArch64::SP, AArch64::SP,
2140 false,
false,
nullptr, EmitCFI && !
hasFP(MF),
2147 false,
nullptr, EmitCFI && !
hasFP(MF),
2153 false,
nullptr, EmitCFI && !
hasFP(MF),
2158 emitCalleeSavedSVERestores(
MBB, RestoreEnd);
2165 if (RedZone && AfterCSRPopSize == 0)
2172 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2173 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
2174 if (NoCalleeSaveRestore)
2175 StackRestoreBytes += AfterCSRPopSize;
2178 MBB, LastPopI,
DL, AArch64::SP, AArch64::SP,
2185 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
2198 MBB, LastPopI,
DL, AArch64::SP, AArch64::FP,
2201 }
else if (NumBytes)
2207 if (EmitCFI &&
hasFP(MF)) {
2209 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2220 if (AfterCSRPopSize) {
2221 assert(AfterCSRPopSize > 0 &&
"attempting to reallocate arg stack that an "
2222 "interrupt may have clobbered");
2227 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
2253 int64_t ObjectOffset) {
2258 unsigned FixedObject =
2267 int64_t ObjectOffset) {
2278 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2285 bool ForSimm)
const {
2288 bool isFixed = MFI.isFixedObjectIndex(FI);
2295 const MachineFunction &MF, int64_t ObjectOffset,
bool isFixed,
bool isSVE,
2296 Register &FrameReg,
bool PreferFP,
bool ForSimm)
const {
2319 PreferFP &= !SVEStackSize;
2327 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
2331 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
2333 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
2338 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
2339 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
2341 if (MFI.hasVarSizedObjects()) {
2345 bool CanUseBP = RegInfo->hasBasePointer(MF);
2346 if (FPOffsetFits && CanUseBP)
2353 }
else if (FPOffset >= 0) {
2358 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2365 "Funclets should only be present on Win64");
2369 if (FPOffsetFits && PreferFP)
2376 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2377 "In the presence of dynamic stack pointer realignment, "
2378 "non-argument/CSR objects cannot be accessed through the frame pointer");
2390 RegInfo->hasStackRealignment(MF))) {
2391 FrameReg = RegInfo->getFrameRegister(MF);
2395 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2396 : (unsigned)AArch64::SP;
2401 if (UseFP && !(isFixed || isCSR))
2402 ScalableOffset = -SVEStackSize;
2403 if (!UseFP && (isFixed || isCSR))
2404 ScalableOffset = SVEStackSize;
2407 FrameReg = RegInfo->getFrameRegister(MF);
2412 if (RegInfo->hasBasePointer(MF))
2413 FrameReg = RegInfo->getBaseRegister();
2415 assert(!MFI.hasVarSizedObjects() &&
2416 "Can't use SP when we have var sized objects.");
2417 FrameReg = AArch64::SP;
2443 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2448 bool NeedsWinCFI,
bool IsFirst,
2457 if (Reg2 == AArch64::FP)
2468 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2469 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2479 bool UsesWinAAPCS,
bool NeedsWinCFI,
2480 bool NeedsFrameRecord,
bool IsFirst,
2488 if (NeedsFrameRecord)
2489 return Reg2 == AArch64::LR;
2496 struct RegPairInfo {
2497 unsigned Reg1 = AArch64::NoRegister;
2498 unsigned Reg2 = AArch64::NoRegister;
2501 enum RegType { GPR, FPR64, FPR128, PPR, ZPR }
Type;
2503 RegPairInfo() =
default;
2505 bool isPaired()
const {
return Reg2 != AArch64::NoRegister; }
2507 unsigned getScale()
const {
2521 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
2529 bool NeedsFrameRecord) {
2539 unsigned Count = CSI.
size();
2545 (Count & 1) == 0) &&
2546 "Odd number of callee-saved regs to spill!");
2548 int StackFillDir = -1;
2550 unsigned FirstReg = 0;
2558 FirstReg = Count - 1;
2564 for (
unsigned i = FirstReg;
i < Count;
i += RegInc) {
2566 RPI.Reg1 = CSI[
i].getReg();
2569 RPI.Type = RegPairInfo::GPR;
2570 else if (AArch64::FPR64RegClass.
contains(
RPI.Reg1))
2571 RPI.Type = RegPairInfo::FPR64;
2572 else if (AArch64::FPR128RegClass.
contains(
RPI.Reg1))
2573 RPI.Type = RegPairInfo::FPR128;
2575 RPI.Type = RegPairInfo::ZPR;
2577 RPI.Type = RegPairInfo::PPR;
2582 if (
unsigned(
i + RegInc) < Count) {
2583 Register NextReg = CSI[
i + RegInc].getReg();
2584 bool IsFirst =
i == FirstReg;
2586 case RegPairInfo::GPR:
2587 if (AArch64::GPR64RegClass.
contains(NextReg) &&
2589 NeedsWinCFI, NeedsFrameRecord, IsFirst,
2593 case RegPairInfo::FPR64:
2594 if (AArch64::FPR64RegClass.
contains(NextReg) &&
2599 case RegPairInfo::FPR128:
2600 if (AArch64::FPR128RegClass.
contains(NextReg))
2603 case RegPairInfo::PPR:
2604 case RegPairInfo::ZPR:
2616 (CSI[
i].getFrameIdx() + RegInc == CSI[
i + RegInc].getFrameIdx())) &&
2617 "Out of order callee saved regs!");
2619 assert((!
RPI.isPaired() || !NeedsFrameRecord ||
RPI.Reg2 != AArch64::FP ||
2620 RPI.Reg1 == AArch64::LR) &&
2621 "FrameRecord must be allocated together with LR");
2624 assert((!
RPI.isPaired() || !NeedsFrameRecord ||
RPI.Reg1 != AArch64::FP ||
2625 RPI.Reg2 == AArch64::LR) &&
2626 "FrameRecord must be allocated together with LR");
2633 ((
RPI.Reg1 == AArch64::LR &&
RPI.Reg2 == AArch64::FP) ||
2634 RPI.Reg1 + 1 ==
RPI.Reg2))) &&
2635 "Callee-save registers not saved as adjacent register pair!");
2637 RPI.FrameIdx = CSI[
i].getFrameIdx();
2640 RPI.FrameIdx = CSI[
i + RegInc].getFrameIdx();
2642 int Scale =
RPI.getScale();
2644 int OffsetPre =
RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2645 assert(OffsetPre % Scale == 0);
2647 if (
RPI.isScalable())
2648 ScalableByteOffset += StackFillDir * Scale;
2650 ByteOffset += StackFillDir * (
RPI.isPaired() ? 2 * Scale : Scale);
2655 RPI.Reg2 == AArch64::FP)
2656 ByteOffset += StackFillDir * 8;
2659 "Paired spill/fill instructions don't exist for SVE vectors");
2663 if (NeedGapToAlignStack && !NeedsWinCFI &&
2664 !
RPI.isScalable() &&
RPI.Type != RegPairInfo::FPR128 &&
2665 !
RPI.isPaired() && ByteOffset % 16 != 0) {
2666 ByteOffset += 8 * StackFillDir;
2671 MFI.setObjectAlignment(
RPI.FrameIdx,
Align(16));
2672 NeedGapToAlignStack =
false;
2675 int OffsetPost =
RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2676 assert(OffsetPost % Scale == 0);
2679 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
2684 RPI.Reg2 == AArch64::FP)
2688 assert(((!
RPI.isScalable() &&
RPI.Offset >= -64 &&
RPI.Offset <= 63) ||
2689 (
RPI.isScalable() &&
RPI.Offset >= -256 &&
RPI.Offset <= 255)) &&
2690 "Offset out of bounds for LDP/STP immediate");
2694 if (NeedsFrameRecord && ((!IsWindows &&
RPI.Reg1 == AArch64::LR &&
2695 RPI.Reg2 == AArch64::FP) ||
2696 (IsWindows &&
RPI.Reg1 == AArch64::FP &&
2697 RPI.Reg2 == AArch64::LR)))
2700 RegPairs.push_back(
RPI);
2711 MFI.setObjectAlignment(CSI[0].getFrameIdx(),
Align(16));
2730 if (homogeneousPrologEpilog(MF)) {
2734 for (
auto &
RPI : RegPairs) {
2747 unsigned Reg1 =
RPI.Reg1;
2748 unsigned Reg2 =
RPI.Reg2;
2764 case RegPairInfo::GPR:
2765 StrOpc =
RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
2767 Alignment =
Align(8);
2769 case RegPairInfo::FPR64:
2770 StrOpc =
RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2772 Alignment =
Align(8);
2774 case RegPairInfo::FPR128:
2775 StrOpc =
RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2777 Alignment =
Align(16);
2779 case RegPairInfo::ZPR:
2780 StrOpc = AArch64::STR_ZXI;
2782 Alignment =
Align(16);
2784 case RegPairInfo::PPR:
2785 StrOpc = AArch64::STR_PXI;
2787 Alignment =
Align(2);
2792 dbgs() <<
") -> fi#(" <<
RPI.FrameIdx;
2793 if (
RPI.isPaired())
dbgs() <<
", " <<
RPI.FrameIdx + 1;
2796 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2797 "Windows unwdinding requires a consecutive (FP,LR) pair");
2801 unsigned FrameIdxReg1 =
RPI.FrameIdx;
2802 unsigned FrameIdxReg2 =
RPI.FrameIdx + 1;
2803 if (NeedsWinCFI &&
RPI.isPaired()) {
2810 if (
RPI.isPaired()) {
2831 if (
RPI.Type == RegPairInfo::ZPR ||
RPI.Type == RegPairInfo::PPR)
2848 DL =
MBBI->getDebugLoc();
2853 unsigned Reg1 =
RPI.Reg1;
2854 unsigned Reg2 =
RPI.Reg2;
2868 case RegPairInfo::GPR:
2869 LdrOpc =
RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2871 Alignment =
Align(8);
2873 case RegPairInfo::FPR64:
2874 LdrOpc =
RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2876 Alignment =
Align(8);
2878 case RegPairInfo::FPR128:
2879 LdrOpc =
RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2881 Alignment =
Align(16);
2883 case RegPairInfo::ZPR:
2884 LdrOpc = AArch64::LDR_ZXI;
2886 Alignment =
Align(16);
2888 case RegPairInfo::PPR:
2889 LdrOpc = AArch64::LDR_PXI;
2891 Alignment =
Align(2);
2896 dbgs() <<
") -> fi#(" <<
RPI.FrameIdx;
2897 if (
RPI.isPaired())
dbgs() <<
", " <<
RPI.FrameIdx + 1;
2903 unsigned FrameIdxReg1 =
RPI.FrameIdx;
2904 unsigned FrameIdxReg2 =
RPI.FrameIdx + 1;
2905 if (NeedsWinCFI &&
RPI.isPaired()) {
2910 if (
RPI.isPaired()) {
2931 for (
const RegPairInfo &
RPI :
reverse(RegPairs))
2932 if (
RPI.isScalable())
2935 if (homogeneousPrologEpilog(MF, &
MBB)) {
2938 for (
auto &
RPI : RegPairs) {
2947 for (
const RegPairInfo &
RPI :
reverse(RegPairs)) {
2948 if (
RPI.isScalable())
2957 for (
const RegPairInfo &
RPI : RegPairs) {
2958 if (
RPI.isScalable())
2980 unsigned UnspilledCSGPR = AArch64::NoRegister;
2981 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2988 : (unsigned)AArch64::NoRegister;
2990 unsigned ExtraCSSpill = 0;
2992 for (
unsigned i = 0; CSRegs[
i]; ++
i) {
2993 const unsigned Reg = CSRegs[
i];
2996 if (
Reg == BasePointerReg)
2999 bool RegUsed = SavedRegs.
test(
Reg);
3000 unsigned PairedReg = AArch64::NoRegister;
3002 AArch64::FPR64RegClass.contains(
Reg) ||
3003 AArch64::FPR128RegClass.contains(
Reg))
3004 PairedReg = CSRegs[
i ^ 1];
3009 UnspilledCSGPR =
Reg;
3010 UnspilledCSGPRPaired = PairedReg;
3018 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
3019 !SavedRegs.
test(PairedReg)) {
3020 SavedRegs.
set(PairedReg);
3021 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
3023 ExtraCSSpill = PairedReg;
3034 SavedRegs.
set(AArch64::X18);
3038 unsigned CSStackSize = 0;
3039 unsigned SVECSStackSize = 0;
3045 AArch64::ZPRRegClass.contains(
Reg))
3052 unsigned NumSavedRegs = SavedRegs.
count();
3058 SavedRegs.
set(AArch64::FP);
3059 SavedRegs.
set(AArch64::LR);
3069 int64_t SVEStackSize =
3070 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
3071 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
3078 bool BigStack = SVEStackSize ||
3079 (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
3081 AFI->setHasStackFrame(
true);
3090 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
3092 <<
" to get a scratch register.\n");
3093 SavedRegs.
set(UnspilledCSGPR);
3097 if (producePairRegisters(MF))
3098 SavedRegs.
set(UnspilledCSGPRPaired);
3099 ExtraCSSpill = UnspilledCSGPR;
3111 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
3112 <<
" as the emergency spill slot.\n");
3117 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
3121 if (
hasFP(MF) && AFI->hasSwiftAsyncContext())
3126 << EstimatedStackSize + AlignedCSStackSize
3130 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3131 "Should not invalidate callee saved info");
3135 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
3136 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3137 AFI->setSVECalleeSavedStackSize(
alignTo(SVECSStackSize, 16));
3142 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
3143 unsigned &MaxCSFrameIndex)
const {
3165 if ((
unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
3166 if ((
unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
3169 for (
auto &CS : CSI) {
3176 CS.setFrameIdx(FrameIdx);
3178 if ((
unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
3179 if ((
unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
3183 Reg == AArch64::FP) {
3186 if ((
unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
3187 if ((
unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
3201 int &Min,
int &Max) {
3209 for (
auto &CS : CSI) {
3210 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
3211 AArch64::PPRRegClass.contains(CS.getReg())) {
3213 Max + 1 == CS.getFrameIdx()) &&
3214 "SVE CalleeSaves are not consecutive");
3216 Min =
std::min(Min, CS.getFrameIdx());
3217 Max =
std::max(Max, CS.getFrameIdx());
3229 int &MinCSFrameIndex,
3230 int &MaxCSFrameIndex,
3231 bool AssignOffsets) {
3236 "SVE vectors should never be passed on the stack by value, only by "
3240 auto Assign = [&MFI](
int FI, int64_t
Offset) {
3250 for (
int I = MinCSFrameIndex;
I <= MaxCSFrameIndex; ++
I) {
3266 int StackProtectorFI = -1;
3270 ObjectsToAllocate.push_back(StackProtectorFI);
3276 if (
I == StackProtectorFI)
3278 if (MaxCSFrameIndex >=
I &&
I >= MinCSFrameIndex)
3283 ObjectsToAllocate.push_back(
I);
3287 for (
unsigned FI : ObjectsToAllocate) {
3292 if (Alignment >
Align(16))
3294 "Alignment of scalable vectors > 16 bytes is not yet supported");
3304 int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
3306 int MinCSFrameIndex, MaxCSFrameIndex;
3310 int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
3321 "Upwards growing stack unsupported");
3323 int MinCSFrameIndex, MaxCSFrameIndex;
3324 int64_t SVEStackSize =
3325 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
3345 int64_t FixedObject =
3358 assert(DstReg &&
"There must be a free register after frame setup");
3367 struct TagStoreInstr {
3374 class TagStoreEdit {
3389 std::optional<int64_t> FrameRegUpdate;
3391 unsigned FrameRegUpdateFlags;
3402 :
MBB(
MBB), ZeroData(ZeroData) {
3408 void addInstruction(TagStoreInstr
I) {
3409 assert((TagStores.empty() ||
3410 TagStores.back().Offset + TagStores.back().Size ==
I.Offset) &&
3411 "Non-adjacent tag store instructions.");
3412 TagStores.push_back(
I);
3426 const int64_t kMinOffset = -256 * 16;
3427 const int64_t kMaxOffset = 255 * 16;
3430 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
3431 if (BaseRegOffsetBytes < kMinOffset ||
3432 BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
3436 BaseReg = ScratchReg;
3437 BaseRegOffsetBytes = 0;
3442 int64_t InstrSize = (
Size > 16) ? 32 : 16;
3445 ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
3446 : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
3450 .
addImm(BaseRegOffsetBytes / 16)
3454 if (BaseRegOffsetBytes == 0)
3456 BaseRegOffsetBytes += InstrSize;
3475 int64_t LoopSize =
Size;
3478 if (FrameRegUpdate && *FrameRegUpdate)
3479 LoopSize -= LoopSize % 32;
3481 TII->get(ZeroData ? AArch64::STZGloop_wback
3482 : AArch64::STGloop_wback))
3489 LoopI->
setFlags(FrameRegUpdateFlags);
3491 int64_t ExtraBaseRegUpdate =
3492 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
3493 if (LoopSize < Size) {
3495 assert(Size - LoopSize == 16);
3498 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3502 .
addImm(1 + ExtraBaseRegUpdate / 16)
3505 }
else if (ExtraBaseRegUpdate) {
3509 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3522 int64_t Size, int64_t *TotalOffset) {
3524 if ((
MI.getOpcode() == AArch64::ADDXri ||
3525 MI.getOpcode() == AArch64::SUBXri) &&
3526 MI.getOperand(0).getReg() ==
Reg &&
MI.getOperand(1).getReg() ==
Reg) {
3529 if (
MI.getOpcode() == AArch64::SUBXri)
3532 const int64_t kMaxOffset =
3534 if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
3545 for (
auto &TS : TSE) {
3549 if (
MI->memoperands_empty()) {
3553 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
3559 bool TryMergeSPUpdate) {
3560 if (TagStores.empty())
3562 TagStoreInstr &FirstTagStore = TagStores[0];
3563 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
3564 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3565 DL = TagStores[0].MI->getDebugLoc();
3569 *MF, FirstTagStore.Offset,
false ,
false ,
Reg,
3572 FrameRegUpdate = std::nullopt;
3574 mergeMemRefs(TagStores, CombinedMemRefs);
3577 for (
const auto &Instr
3578 : TagStores) {
dbgs() <<
" " << *Instr.MI; });
3584 if (TagStores.size() < 2)
3586 emitUnrolled(InsertI);
3589 int64_t TotalOffset = 0;
3590 if (TryMergeSPUpdate) {
3596 if (InsertI !=
MBB->
end() &&
3597 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() + Size,
3599 UpdateInstr = &*InsertI++;
3605 if (!UpdateInstr && TagStores.size() < 2)
3609 FrameRegUpdate = TotalOffset;
3610 FrameRegUpdateFlags = UpdateInstr->
getFlags();
3617 for (
auto &TS : TagStores)
3618 TS.MI->eraseFromParent();
3622 int64_t &Size,
bool &ZeroData) {
3626 unsigned Opcode =
MI.getOpcode();
3627 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
3628 Opcode == AArch64::STZ2GOffset);
3630 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3631 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
3633 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
3636 Size =
MI.getOperand(2).getImm();
3640 if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
3642 else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
3647 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
3651 16 *
MI.getOperand(2).getImm();
3671 if (!isMergeableStackTaggingInstruction(
MI,
Offset, Size, FirstZeroData))
3677 constexpr
int kScanLimit = 10;
3680 NextI !=
E && Count < kScanLimit; ++NextI) {
3689 if (isMergeableStackTaggingInstruction(
MI,
Offset, Size, ZeroData)) {
3690 if (ZeroData != FirstZeroData)
3698 if (!
MI.isTransient())
3707 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects())
3716 [](
const TagStoreInstr &Left,
const TagStoreInstr &Right) {
3721 int64_t CurOffset = Instrs[0].Offset;
3722 for (
auto &Instr : Instrs) {
3723 if (CurOffset > Instr.Offset)
3725 CurOffset = Instr.Offset + Instr.Size;
3730 TagStoreEdit TSE(
MBB, FirstZeroData);
3731 std::optional<int64_t> EndOffset;
3732 for (
auto &Instr : Instrs) {
3733 if (EndOffset && *EndOffset != Instr.Offset) {
3735 TSE.emitCode(InsertI, TFI,
false);
3739 TSE.addInstruction(Instr);
3740 EndOffset = Instr.Offset + Instr.Size;
3758 II = tryMergeAdjacentSTG(II,
this, RS);
3766 bool IgnoreSPUpdates)
const {
3768 if (IgnoreSPUpdates) {
3771 FrameReg = AArch64::SP;
3781 FrameReg = AArch64::SP;
3805 struct FrameObject {
3806 bool IsValid =
false;
3808 int ObjectIndex = 0;
3810 int GroupIndex = -1;
3812 bool ObjectFirst =
false;
3815 bool GroupFirst =
false;
3818 class GroupBuilder {
3820 int NextGroupIndex = 0;
3821 std::vector<FrameObject> &Objects;
3824 GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
3825 void AddMember(
int Index) { CurrentMembers.push_back(
Index); }
3826 void EndCurrentGroup() {
3827 if (CurrentMembers.size() > 1) {
3832 for (
int Index : CurrentMembers) {
3833 Objects[
Index].GroupIndex = NextGroupIndex;
3839 CurrentMembers.clear();
3843 bool FrameObjectCompare(
const FrameObject &A,
const FrameObject &
B) {
3861 return std::make_tuple(!
A.IsValid,
A.ObjectFirst,
A.GroupFirst,
A.GroupIndex,
3863 std::make_tuple(!
B.IsValid,
B.ObjectFirst,
B.GroupFirst,
B.GroupIndex,
3875 for (
auto &Obj : ObjectsToAllocate) {
3876 FrameObjects[Obj].IsValid =
true;
3877 FrameObjects[Obj].ObjectIndex = Obj;
3881 GroupBuilder GB(FrameObjects);
3882 for (
auto &
MBB : MF) {
3883 for (
auto &
MI :
MBB) {
3884 if (
MI.isDebugInstr())
3887 switch (
MI.getOpcode()) {
3888 case AArch64::STGloop:
3889 case AArch64::STZGloop:
3892 case AArch64::STGOffset:
3893 case AArch64::STZGOffset:
3894 case AArch64::ST2GOffset:
3895 case AArch64::STZ2GOffset:
3908 FrameObjects[FI].IsValid)
3916 GB.AddMember(TaggedFI);
3918 GB.EndCurrentGroup();
3921 GB.EndCurrentGroup();
3931 FrameObjects[*TBPI].ObjectFirst =
true;
3932 FrameObjects[*TBPI].GroupFirst =
true;
3933 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3934 if (FirstGroupIndex >= 0)
3935 for (FrameObject &
Object : FrameObjects)
3936 if (
Object.GroupIndex == FirstGroupIndex)
3937 Object.GroupFirst =
true;
3943 for (
auto &Obj : FrameObjects) {
3947 ObjectsToAllocate[
i++] = Obj.ObjectIndex;
3954 dbgs() <<
" " << Obj.ObjectIndex <<
": group " << Obj.GroupIndex;
3955 if (Obj.ObjectFirst)
3956 dbgs() <<
", first";
3958 dbgs() <<
", group-first";