166 using namespace llvm;
168 #define DEBUG_TYPE "frame-info"
171 cl::desc(
"enable use of redzone on AArch64"),
176 cl::desc(
"reverse the CSR restore sequence"),
180 "stack-tagging-merge-settag",
190 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
191 "optimization (default = off)"));
193 STATISTIC(NumRedZoneFunctions,
"Number of functions using red zone");
199 bool IsTailCallReturn =
false;
201 unsigned RetOpcode =
MBBI->getOpcode();
202 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
203 RetOpcode == AArch64::TCRETURNri ||
204 RetOpcode == AArch64::TCRETURNriBTI;
208 uint64_t ArgumentPopSize = 0;
209 if (IsTailCallReturn) {
215 ArgumentPopSize = StackAdjust.
getImm();
224 return ArgumentPopSize;
234 bool AArch64FrameLowering::homogeneousPrologEpilog(
264 bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
283 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
284 MI.getOpcode() == AArch64::ADDXri ||
285 MI.getOpcode() == AArch64::ADDSXri)
312 if (!IsWin64 || IsFunclet) {
320 const unsigned UnwindHelpObject = (MF.
hasEHFunclets() ? 8 : 0);
321 return alignTo(VarArgsArea + UnwindHelpObject, 16);
338 const unsigned RedZoneSize =
347 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
399 unsigned Opc =
I->getOpcode();
400 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
401 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
404 int64_t Amount =
I->getOperand(0).getImm();
412 if (CalleePopAmount == 0) {
423 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
427 }
else if (CalleePopAmount != 0) {
430 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
440 int NumBytes,
int NumVGScaledBytes,
unsigned VG,
445 Expr.push_back(dwarf::DW_OP_consts);
447 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
448 Comment << (NumBytes < 0 ?
" - " :
" + ") <<
std::abs(NumBytes);
451 if (NumVGScaledBytes) {
452 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
455 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
459 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
460 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
462 Comment << (NumVGScaledBytes < 0 ?
" - " :
" + ")
463 <<
std::abs(NumVGScaledBytes) <<
" * VG";
471 int64_t NumBytes, NumVGScaledBytes;
475 std::string CommentBuffer =
"sp";
480 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + 31));
487 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
499 int64_t NumBytes, NumVGScaledBytes;
501 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
506 if (!NumVGScaledBytes)
509 std::string CommentBuffer;
520 CfaExpr.push_back(dwarf::DW_CFA_expression);
543 for (
const auto &
Info : CSI) {
596 for (
unsigned i = 0; CSRegs[
i]; ++
i)
604 for (
unsigned Reg : AArch64::GPR64RegClass) {
608 return AArch64::NoRegister;
619 if (!RegInfo->hasStackRealignment(*MF))
627 uint64_t StackSizeInBytes) {
634 unsigned StackProbeSize = 4096;
635 if (
F.hasFnAttribute(
"stack-probe-size"))
636 F.getFnAttribute(
"stack-probe-size")
638 .getAsInteger(0, StackProbeSize);
639 return (StackSizeInBytes >= StackProbeSize) &&
640 !
F.hasFnAttribute(
"no-stack-arg-probe");
646 F.needsUnwindTableEntry();
649 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
655 if (homogeneousPrologEpilog(MF))
678 if (MFI.hasVarSizedObjects())
681 if (RegInfo->hasStackRealignment(MF))
698 bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
700 if (!shouldCombineCSRLocalStackBump(*
MBB.
getParent(), StackBumpBytes))
710 while (LastI != Begin) {
712 if (LastI->isTransient())
717 switch (LastI->getOpcode()) {
718 case AArch64::STGloop:
719 case AArch64::STZGloop:
720 case AArch64::STGOffset:
721 case AArch64::STZGOffset:
722 case AArch64::ST2GOffset:
723 case AArch64::STZ2GOffset:
736 unsigned Opc =
MBBI->getOpcode();
740 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
741 int Imm =
MBBI->getOperand(ImmIdx).getImm();
749 case AArch64::LDPDpost:
752 case AArch64::STPDpre: {
753 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
754 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
755 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
762 case AArch64::LDPXpost:
765 case AArch64::STPXpre: {
768 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
774 .
addImm(RegInfo->getSEHRegNum(Reg0))
775 .
addImm(RegInfo->getSEHRegNum(Reg1))
780 case AArch64::LDRDpost:
783 case AArch64::STRDpre: {
784 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
791 case AArch64::LDRXpost:
794 case AArch64::STRXpre: {
795 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
803 case AArch64::LDPDi: {
804 unsigned Reg0 = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
805 unsigned Reg1 = RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
814 case AArch64::LDPXi: {
817 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
823 .
addImm(RegInfo->getSEHRegNum(Reg0))
824 .
addImm(RegInfo->getSEHRegNum(Reg1))
829 case AArch64::STRXui:
830 case AArch64::LDRXui: {
831 int Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
838 case AArch64::STRDui:
839 case AArch64::LDRDui: {
840 unsigned Reg = RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
854 unsigned LocalStackSize) {
856 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
857 switch (
MBBI->getOpcode()) {
860 case AArch64::SEH_SaveFPLR:
861 case AArch64::SEH_SaveRegP:
862 case AArch64::SEH_SaveReg:
863 case AArch64::SEH_SaveFRegP:
864 case AArch64::SEH_SaveFReg:
865 ImmOpnd = &
MBBI->getOperand(ImmIdx);
878 bool NeedsWinCFI,
bool *HasWinCFI,
bool InProlog =
true) {
881 while (
MBBI->getOpcode() == AArch64::STRXpost ||
882 MBBI->getOpcode() == AArch64::LDRXpre ||
883 MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
884 if (
MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
885 assert(
MBBI->getOperand(0).getReg() != AArch64::SP);
890 switch (
MBBI->getOpcode()) {
894 NewOpc = AArch64::STPXpre;
898 NewOpc = AArch64::STPDpre;
902 NewOpc = AArch64::STPQpre;
905 case AArch64::STRXui:
906 NewOpc = AArch64::STRXpre;
908 case AArch64::STRDui:
909 NewOpc = AArch64::STRDpre;
911 case AArch64::STRQui:
912 NewOpc = AArch64::STRQpre;
915 NewOpc = AArch64::LDPXpost;
919 NewOpc = AArch64::LDPDpost;
923 NewOpc = AArch64::LDPQpost;
926 case AArch64::LDRXui:
927 NewOpc = AArch64::LDRXpost;
929 case AArch64::LDRDui:
930 NewOpc = AArch64::LDRDpost;
932 case AArch64::LDRQui:
933 NewOpc = AArch64::LDRQpost;
938 auto SEH = std::next(
MBBI);
940 SEH->eraseFromParent();
947 unsigned OpndIdx = 0;
948 for (
unsigned OpndEnd =
MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
950 MIB.
add(
MBBI->getOperand(OpndIdx));
952 assert(
MBBI->getOperand(OpndIdx).getImm() == 0 &&
953 "Unexpected immediate offset in first/last callee-save save/restore "
955 assert(
MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
956 "Unexpected base register in callee-save save/restore instruction!");
957 assert(CSStackSizeInc % Scale == 0);
958 MIB.
addImm(CSStackSizeInc / Scale);
976 uint64_t LocalStackSize,
982 unsigned Opc =
MI.getOpcode();
986 if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
987 Opc == AArch64::CFI_INSTRUCTION) {
988 if (Opc != AArch64::CFI_INSTRUCTION)
989 assert(
MI.getOperand(0).getReg() != AArch64::SP);
996 case AArch64::STRXui:
998 case AArch64::STRDui:
1000 case AArch64::LDRXui:
1001 case AArch64::LDPDi:
1002 case AArch64::LDRDui:
1005 case AArch64::STPQi:
1006 case AArch64::STRQui:
1007 case AArch64::LDPQi:
1008 case AArch64::LDRQui:
1015 unsigned OffsetIdx =
MI.getNumExplicitOperands() - 1;
1016 assert(
MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1017 "Unexpected base register in callee-save save/restore instruction!");
1021 assert(LocalStackSize % Scale == 0);
1022 OffsetOpnd.
setImm(OffsetOpnd.
getImm() + LocalStackSize / Scale);
1027 assert(
MBBI !=
MI.getParent()->end() &&
"Expecting a valid instruction");
1029 "Expecting a SEH instruction");
1072 switch (
I->getOpcode()) {
1075 case AArch64::STR_ZXI:
1076 case AArch64::STR_PXI:
1077 case AArch64::LDR_ZXI:
1078 case AArch64::LDR_PXI:
1094 bool needsFrameMoves =
1096 bool HasFP =
hasFP(MF);
1098 bool HasWinCFI =
false;
1113 if (MFnI.shouldSignReturnAddress()) {
1114 if (MFnI.shouldSignWithBKey()) {
1155 assert(!HasFP &&
"unexpected function without stack frame but with FP");
1157 "unexpected function without stack frame but with SVE objects");
1166 ++NumRedZoneFunctions;
1171 if (!NeedsWinCFI && needsFrameMoves) {
1199 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1200 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1201 if (CombineSPBump) {
1202 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1207 }
else if (HomPrologEpilog) {
1209 NumBytes -= PrologueSaveSize;
1210 }
else if (PrologueSaveSize != 0) {
1212 MBB,
MBBI,
DL,
TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
1213 NumBytes -= PrologueSaveSize;
1215 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1225 NeedsWinCFI, &HasWinCFI);
1230 if (!IsFunclet && HasFP) {
1237 if (HomPrologEpilog) {
1254 uint64_t NumWords = NumBytes >> 4;
1262 if (NumBytes >= (1 << 28))
1264 "unwinding purposes");
1266 uint32_t LowNumWords = NumWords & 0xFFFF;
1273 if ((NumWords & 0xFFFF0000) != 0) {
1276 .
addImm((NumWords & 0xFFFF0000) >> 16)
1347 StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
1354 CalleeSavesBegin =
MBBI;
1358 CalleeSavesEnd =
MBBI;
1361 AllocateAfter = SVEStackSize - AllocateBefore;
1366 -AllocateBefore,
TII,
1371 -AllocateAfter,
TII,
1377 const bool NeedsRealignment =
1378 !IsFunclet && RegInfo->hasStackRealignment(MF);
1379 unsigned scratchSPReg = AArch64::SP;
1381 if (NeedsRealignment) {
1383 assert(scratchSPReg != AArch64::NoRegister);
1395 if (NeedsRealignment) {
1397 assert(NrBitsToZero > 1);
1398 assert(scratchSPReg != AArch64::SP);
1407 uint32_t andMaskEncoded = (1 << 12)
1408 | ((64 - NrBitsToZero) << 6)
1409 | ((64 - NrBitsToZero - 1) << 0);
1418 .
addImm(NumBytes & andMaskEncoded)
1432 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
1444 if (NeedsWinCFI && HasWinCFI) {
1452 if (IsFunclet &&
F.hasPersonalityFn()) {
1462 if (needsFrameMoves) {
1530 const int OffsetToFirstCalleeSaveFromFP =
1533 Register FramePtr = RegInfo->getFrameRegister(MF);
1536 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr,
true);
1549 CFIIndex = MF.
addFrameInst(createDefCFAExpressionFromSP(
TRI, TotalSize));
1569 if (!MFI.shouldSignReturnAddress())
1577 DL =
MBBI->getDebugLoc();
1584 MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1586 TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
1592 TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
1598 switch (
MI.getOpcode()) {
1615 bool HasWinCFI =
false;
1616 bool IsFunclet =
false;
1620 DL =
MBBI->getDebugLoc();
1670 uint64_t AfterCSRPopSize = ArgumentPopSize;
1678 if (homogeneousPrologEpilog(MF, &
MBB)) {
1682 auto HomogeneousEpilog = std::prev(LastPopI);
1683 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1684 LastPopI = HomogeneousEpilog;
1694 assert(AfterCSRPopSize == 0);
1697 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(
MBB, NumBytes);
1700 if (!CombineSPBump && PrologueSaveSize != 0) {
1703 Pop = std::prev(Pop);
1706 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1708 if (OffsetOp.
getImm() == 0)
1710 MBB, Pop,
DL,
TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
false);
1716 AfterCSRPopSize += PrologueSaveSize;
1725 while (LastPopI != Begin) {
1731 }
else if (CombineSPBump)
1733 NeedsWinCFI, &HasWinCFI);
1751 if (CombineSPBump) {
1752 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1759 TII->get(AArch64::SEH_EpilogEnd))
1764 NumBytes -= PrologueSaveSize;
1765 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1769 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1772 RestoreBegin = std::prev(RestoreEnd);
1773 while (RestoreBegin !=
MBB.
begin() &&
1782 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1783 DeallocateAfter = CalleeSavedSizeAsOffset;
1818 if (RedZone && AfterCSRPopSize == 0)
1821 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1822 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1823 if (NoCalleeSaveRestore)
1824 StackRestoreBytes += AfterCSRPopSize;
1828 bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1840 TII->get(AArch64::SEH_EpilogEnd))
1855 MBB, LastPopI,
DL, AArch64::SP, AArch64::FP,
1858 }
else if (NumBytes)
1866 if (AfterCSRPopSize) {
1871 while (FirstSPPopI != Begin) {
1872 auto Prev = std::prev(FirstSPPopI);
1873 if (Prev->getOpcode() != AArch64::LDRXpre ||
1874 Prev->getOperand(0).getReg() == AArch64::SP)
1911 int64_t ObjectOffset) {
1916 unsigned FixedObject =
1925 int64_t ObjectOffset) {
1936 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1943 bool ForSimm)
const {
1946 bool isFixed = MFI.isFixedObjectIndex(FI);
1953 const MachineFunction &MF, int64_t ObjectOffset,
bool isFixed,
bool isSVE,
1954 Register &FrameReg,
bool PreferFP,
bool ForSimm)
const {
1976 PreferFP &= !SVEStackSize;
1984 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
1988 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
1990 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
1995 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1996 PreferFP |=
Offset > -FPOffset;
1998 if (MFI.hasVarSizedObjects()) {
2002 bool CanUseBP = RegInfo->hasBasePointer(MF);
2003 if (FPOffsetFits && CanUseBP)
2010 }
else if (FPOffset >= 0) {
2015 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2022 "Funclets should only be present on Win64");
2026 if (FPOffsetFits && PreferFP)
2033 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2034 "In the presence of dynamic stack pointer realignment, "
2035 "non-argument/CSR objects cannot be accessed through the frame pointer");
2047 RegInfo->hasStackRealignment(MF))) {
2048 FrameReg = RegInfo->getFrameRegister(MF);
2052 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2053 : (unsigned)AArch64::SP;
2058 if (UseFP && !(isFixed || isCSR))
2059 ScalableOffset = -SVEStackSize;
2060 if (!UseFP && (isFixed || isCSR))
2061 ScalableOffset = SVEStackSize;
2064 FrameReg = RegInfo->getFrameRegister(MF);
2069 if (RegInfo->hasBasePointer(MF))
2070 FrameReg = RegInfo->getBaseRegister();
2072 assert(!MFI.hasVarSizedObjects() &&
2073 "Can't use SP when we have var sized objects.");
2074 FrameReg = AArch64::SP;
2100 Attrs.hasAttrSomewhere(Attribute::SwiftError));
2104 bool NeedsWinCFI,
bool IsFirst) {
2112 if (Reg2 == AArch64::FP)
2116 if (Reg2 == Reg1 + 1)
2123 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2124 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2134 bool UsesWinAAPCS,
bool NeedsWinCFI,
2135 bool NeedsFrameRecord,
bool IsFirst) {
2141 if (NeedsFrameRecord)
2142 return Reg2 == AArch64::LR;
2149 struct RegPairInfo {
2150 unsigned Reg1 = AArch64::NoRegister;
2151 unsigned Reg2 = AArch64::NoRegister;
2154 enum RegType { GPR, FPR64, FPR128, PPR, ZPR }
Type;
2156 RegPairInfo() =
default;
2158 bool isPaired()
const {
return Reg2 != AArch64::NoRegister; }
2160 unsigned getScale()
const {
2174 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
2182 bool &NeedShadowCallStackProlog,
bool NeedsFrameRecord) {
2192 unsigned Count = CSI.
size();
2198 (Count & 1) == 0) &&
2199 "Odd number of callee-saved regs to spill!");
2201 int StackFillDir = -1;
2203 unsigned FirstReg = 0;
2211 FirstReg = Count - 1;
2216 for (
unsigned i = FirstReg;
i < Count;
i += RegInc) {
2218 RPI.Reg1 = CSI[
i].getReg();
2221 RPI.Type = RegPairInfo::GPR;
2222 else if (AArch64::FPR64RegClass.
contains(
RPI.Reg1))
2223 RPI.Type = RegPairInfo::FPR64;
2224 else if (AArch64::FPR128RegClass.
contains(
RPI.Reg1))
2225 RPI.Type = RegPairInfo::FPR128;
2227 RPI.Type = RegPairInfo::ZPR;
2229 RPI.Type = RegPairInfo::PPR;
2234 if (
unsigned(
i + RegInc) < Count) {
2235 unsigned NextReg = CSI[
i + RegInc].getReg();
2236 bool IsFirst =
i == FirstReg;
2238 case RegPairInfo::GPR:
2239 if (AArch64::GPR64RegClass.
contains(NextReg) &&
2241 NeedsWinCFI, NeedsFrameRecord, IsFirst))
2244 case RegPairInfo::FPR64:
2245 if (AArch64::FPR64RegClass.
contains(NextReg) &&
2250 case RegPairInfo::FPR128:
2251 if (AArch64::FPR128RegClass.
contains(NextReg))
2254 case RegPairInfo::PPR:
2255 case RegPairInfo::ZPR:
2262 if ((
RPI.Reg1 == AArch64::LR ||
RPI.Reg2 == AArch64::LR) &&
2266 NeedShadowCallStackProlog =
true;
2276 (CSI[
i].getFrameIdx() + RegInc == CSI[
i + RegInc].getFrameIdx())) &&
2277 "Out of order callee saved regs!");
2279 assert((!
RPI.isPaired() || !NeedsFrameRecord ||
RPI.Reg2 != AArch64::FP ||
2280 RPI.Reg1 == AArch64::LR) &&
2281 "FrameRecord must be allocated together with LR");
2284 assert((!
RPI.isPaired() || !NeedsFrameRecord ||
RPI.Reg1 != AArch64::FP ||
2285 RPI.Reg2 == AArch64::LR) &&
2286 "FrameRecord must be allocated together with LR");
2293 ((
RPI.Reg1 == AArch64::LR &&
RPI.Reg2 == AArch64::FP) ||
2294 RPI.Reg1 + 1 ==
RPI.Reg2))) &&
2295 "Callee-save registers not saved as adjacent register pair!");
2297 RPI.FrameIdx = CSI[
i].getFrameIdx();
2300 RPI.FrameIdx = CSI[
i + RegInc].getFrameIdx();
2302 int Scale =
RPI.getScale();
2304 int OffsetPre =
RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2305 assert(OffsetPre % Scale == 0);
2307 if (
RPI.isScalable())
2308 ScalableByteOffset += StackFillDir * Scale;
2310 ByteOffset += StackFillDir * (
RPI.isPaired() ? 2 * Scale : Scale);
2313 "Paired spill/fill instructions don't exist for SVE vectors");
2318 !
RPI.isScalable() &&
RPI.Type != RegPairInfo::FPR128 &&
2320 ByteOffset += 8 * StackFillDir;
2321 assert(ByteOffset % 16 == 0);
2327 MFI.setObjectAlignment(
RPI.FrameIdx,
Align(16));
2330 int OffsetPost =
RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2331 assert(OffsetPost % Scale == 0);
2334 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
2337 assert(((!
RPI.isScalable() &&
RPI.Offset >= -64 &&
RPI.Offset <= 63) ||
2338 (
RPI.isScalable() &&
RPI.Offset >= -256 &&
RPI.Offset <= 255)) &&
2339 "Offset out of bounds for LDP/STP immediate");
2343 if (NeedsFrameRecord && ((!IsWindows &&
RPI.Reg1 == AArch64::LR &&
2344 RPI.Reg2 == AArch64::FP) ||
2345 (IsWindows &&
RPI.Reg1 == AArch64::FP &&
2346 RPI.Reg2 == AArch64::LR)))
2349 RegPairs.push_back(
RPI);
2360 MFI.setObjectAlignment(CSI[0].getFrameIdx(),
Align(16));
2376 bool NeedShadowCallStackProlog =
false;
2378 NeedShadowCallStackProlog,
hasFP(MF));
2381 if (NeedShadowCallStackProlog) {
2397 static const char CFIInst[] = {
2398 dwarf::DW_CFA_val_expression,
2401 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
2402 static_cast<char>(-8) & 0x7f,
2405 nullptr,
StringRef(CFIInst,
sizeof(CFIInst))));
2415 if (homogeneousPrologEpilog(MF)) {
2419 for (
auto &
RPI : RegPairs) {
2431 for (
auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
2433 RegPairInfo
RPI = *RPII;
2434 unsigned Reg1 =
RPI.Reg1;
2435 unsigned Reg2 =
RPI.Reg2;
2451 case RegPairInfo::GPR:
2452 StrOpc =
RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
2454 Alignment =
Align(8);
2456 case RegPairInfo::FPR64:
2457 StrOpc =
RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2459 Alignment =
Align(8);
2461 case RegPairInfo::FPR128:
2462 StrOpc =
RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2464 Alignment =
Align(16);
2466 case RegPairInfo::ZPR:
2467 StrOpc = AArch64::STR_ZXI;
2469 Alignment =
Align(16);
2471 case RegPairInfo::PPR:
2472 StrOpc = AArch64::STR_PXI;
2474 Alignment =
Align(2);
2479 dbgs() <<
") -> fi#(" <<
RPI.FrameIdx;
2480 if (
RPI.isPaired())
dbgs() <<
", " <<
RPI.FrameIdx + 1;
2483 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2484 "Windows unwdinding requires a consecutive (FP,LR) pair");
2488 unsigned FrameIdxReg1 =
RPI.FrameIdx;
2489 unsigned FrameIdxReg2 =
RPI.FrameIdx + 1;
2490 if (NeedsWinCFI &&
RPI.isPaired()) {
2497 if (
RPI.isPaired()) {
2518 if (
RPI.Type == RegPairInfo::ZPR ||
RPI.Type == RegPairInfo::PPR)
2535 DL =
MI->getDebugLoc();
2537 bool NeedShadowCallStackProlog =
false;
2539 NeedShadowCallStackProlog,
hasFP(MF));
2541 auto EmitMI = [&](
const RegPairInfo &
RPI) {
2542 unsigned Reg1 =
RPI.Reg1;
2543 unsigned Reg2 =
RPI.Reg2;
2557 case RegPairInfo::GPR:
2558 LdrOpc =
RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2560 Alignment =
Align(8);
2562 case RegPairInfo::FPR64:
2563 LdrOpc =
RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2565 Alignment =
Align(8);
2567 case RegPairInfo::FPR128:
2568 LdrOpc =
RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2570 Alignment =
Align(16);
2572 case RegPairInfo::ZPR:
2573 LdrOpc = AArch64::LDR_ZXI;
2575 Alignment =
Align(16);
2577 case RegPairInfo::PPR:
2578 LdrOpc = AArch64::LDR_PXI;
2580 Alignment =
Align(2);
2585 dbgs() <<
") -> fi#(" <<
RPI.FrameIdx;
2586 if (
RPI.isPaired())
dbgs() <<
", " <<
RPI.FrameIdx + 1;
2592 unsigned FrameIdxReg1 =
RPI.FrameIdx;
2593 unsigned FrameIdxReg2 =
RPI.FrameIdx + 1;
2594 if (NeedsWinCFI &&
RPI.isPaired()) {
2599 if (
RPI.isPaired()) {
2618 for (
const RegPairInfo &
RPI :
reverse(RegPairs))
2619 if (
RPI.isScalable())
2623 for (
const RegPairInfo &
RPI :
reverse(RegPairs))
2624 if (!
RPI.isScalable())
2626 }
else if (homogeneousPrologEpilog(MF, &
MBB)) {
2629 for (
auto &
RPI : RegPairs) {
2635 for (
const RegPairInfo &
RPI : RegPairs)
2636 if (!
RPI.isScalable())
2639 if (NeedShadowCallStackProlog) {
2665 unsigned UnspilledCSGPR = AArch64::NoRegister;
2666 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2673 : (unsigned)AArch64::NoRegister;
2675 unsigned ExtraCSSpill = 0;
2677 for (
unsigned i = 0; CSRegs[
i]; ++
i) {
2678 const unsigned Reg = CSRegs[
i];
2681 if (
Reg == BasePointerReg)
2684 bool RegUsed = SavedRegs.
test(
Reg);
2685 unsigned PairedReg = AArch64::NoRegister;
2687 AArch64::FPR64RegClass.contains(
Reg) ||
2688 AArch64::FPR128RegClass.contains(
Reg))
2689 PairedReg = CSRegs[
i ^ 1];
2694 UnspilledCSGPR =
Reg;
2695 UnspilledCSGPRPaired = PairedReg;
2703 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2704 !SavedRegs.
test(PairedReg)) {
2705 SavedRegs.
set(PairedReg);
2706 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
2708 ExtraCSSpill = PairedReg;
2719 SavedRegs.
set(AArch64::X18);
2723 unsigned CSStackSize = 0;
2724 unsigned SVECSStackSize = 0;
2730 AArch64::ZPRRegClass.contains(
Reg))
2731 SVECSStackSize += RegSize;
2733 CSStackSize += RegSize;
2737 unsigned NumSavedRegs = SavedRegs.
count();
2743 SavedRegs.
set(AArch64::FP);
2744 SavedRegs.
set(AArch64::LR);
2754 int64_t SVEStackSize =
2755 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
2756 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
2763 bool BigStack = SVEStackSize ||
2764 (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2766 AFI->setHasStackFrame(
true);
2775 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2777 <<
" to get a scratch register.\n");
2778 SavedRegs.
set(UnspilledCSGPR);
2782 if (producePairRegisters(MF))
2783 SavedRegs.
set(UnspilledCSGPRPaired);
2784 ExtraCSSpill = UnspilledCSGPR;
2796 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
2797 <<
" as the emergency spill slot.\n");
2802 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
2803 uint64_t AlignedCSStackSize =
alignTo(CSStackSize, 16);
2805 << EstimatedStackSize + AlignedCSStackSize
2809 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
2810 "Should not invalidate callee saved info");
2814 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2815 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2816 AFI->setSVECalleeSavedStackSize(
alignTo(SVECSStackSize, 16));
2821 std::vector<CalleeSavedInfo> &CSI)
const {
2842 int &Min,
int &Max) {
2850 for (
auto &CS : CSI) {
2851 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
2852 AArch64::PPRRegClass.contains(CS.getReg())) {
2854 Max + 1 == CS.getFrameIdx()) &&
2855 "SVE CalleeSaves are not consecutive");
2857 Min =
std::min(Min, CS.getFrameIdx());
2858 Max =
std::max(Max, CS.getFrameIdx());
2870 int &MinCSFrameIndex,
2871 int &MaxCSFrameIndex,
2872 bool AssignOffsets) {
2877 "SVE vectors should never be passed on the stack by value, only by "
2881 auto Assign = [&MFI](
int FI, int64_t
Offset) {
2891 for (
int I = MinCSFrameIndex;
I <= MaxCSFrameIndex; ++
I) {
2908 if (MaxCSFrameIndex >=
I &&
I >= MinCSFrameIndex)
2913 ObjectsToAllocate.push_back(
I);
2917 for (
unsigned FI : ObjectsToAllocate) {
2922 if (Alignment >
Align(16))
2924 "Alignment of scalable vectors > 16 bytes is not yet supported");
2934 int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
2936 int MinCSFrameIndex, MaxCSFrameIndex;
2940 int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
2951 "Upwards growing stack unsupported");
2953 int MinCSFrameIndex, MaxCSFrameIndex;
2954 int64_t SVEStackSize =
2955 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
2975 int64_t FixedObject =
2987 unsigned DstReg = RS->
FindUnusedReg(&AArch64::GPR64commonRegClass);
2988 assert(DstReg &&
"There must be a free register after frame setup");
2997 struct TagStoreInstr {
3004 class TagStoreEdit {
3021 unsigned FrameRegUpdateFlags;
3032 :
MBB(
MBB), ZeroData(ZeroData) {
3038 void addInstruction(TagStoreInstr
I) {
3039 assert((TagStores.empty() ||
3040 TagStores.back().Offset + TagStores.back().Size ==
I.Offset) &&
3041 "Non-adjacent tag store instructions.");
3042 TagStores.push_back(
I);
3056 const int64_t kMinOffset = -256 * 16;
3057 const int64_t kMaxOffset = 255 * 16;
3060 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
3061 if (BaseRegOffsetBytes < kMinOffset ||
3062 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset) {
3066 BaseReg = ScratchReg;
3067 BaseRegOffsetBytes = 0;
3072 int64_t InstrSize = (
Size > 16) ? 32 : 16;
3075 ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
3076 : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
3080 .
addImm(BaseRegOffsetBytes / 16)
3084 if (BaseRegOffsetBytes == 0)
3086 BaseRegOffsetBytes += InstrSize;
3105 int64_t LoopSize =
Size;
3108 if (FrameRegUpdate && *FrameRegUpdate)
3109 LoopSize -= LoopSize % 32;
3111 TII->get(ZeroData ? AArch64::STZGloop_wback
3112 : AArch64::STGloop_wback))
3119 LoopI->
setFlags(FrameRegUpdateFlags);
3121 int64_t ExtraBaseRegUpdate =
3122 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
3123 if (LoopSize <
Size) {
3128 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3132 .
addImm(1 + ExtraBaseRegUpdate / 16)
3135 }
else if (ExtraBaseRegUpdate) {
3139 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3152 int64_t
Size, int64_t *TotalOffset) {
3154 if ((
MI.getOpcode() == AArch64::ADDXri ||
3155 MI.getOpcode() == AArch64::SUBXri) &&
3156 MI.getOperand(0).getReg() ==
Reg &&
MI.getOperand(1).getReg() ==
Reg) {
3159 if (
MI.getOpcode() == AArch64::SUBXri)
3162 const int64_t kMaxOffset =
3164 if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
3175 for (
auto &TS : TSE) {
3179 if (
MI->memoperands_empty()) {
3183 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
3189 if (TagStores.empty())
3191 TagStoreInstr &FirstTagStore = TagStores[0];
3192 TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
3193 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3194 DL = TagStores[0].MI->getDebugLoc();
3198 *MF, FirstTagStore.Offset,
false ,
false ,
Reg,
3201 FrameRegUpdate =
None;
3203 mergeMemRefs(TagStores, CombinedMemRefs);
3206 for (
const auto &Instr
3207 : TagStores) {
dbgs() <<
" " << *Instr.MI; });
3213 if (TagStores.size() < 2)
3215 emitUnrolled(InsertI);
3218 int64_t TotalOffset;
3225 if (InsertI !=
MBB->
end() &&
3226 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
3228 UpdateInstr = &*InsertI++;
3234 if (!UpdateInstr && TagStores.size() < 2)
3238 FrameRegUpdate = TotalOffset;
3239 FrameRegUpdateFlags = UpdateInstr->
getFlags();
3246 for (
auto &TS : TagStores)
3247 TS.MI->eraseFromParent();
3251 int64_t &
Size,
bool &ZeroData) {
3255 unsigned Opcode =
MI.getOpcode();
3256 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
3257 Opcode == AArch64::STZ2GOffset);
3259 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3260 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
3262 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
3265 Size =
MI.getOperand(2).getImm();
3269 if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
3271 else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
3276 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
3280 16 *
MI.getOperand(2).getImm();
3300 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
3306 constexpr
int kScanLimit = 10;
3309 NextI !=
E && Count < kScanLimit; ++NextI) {
3318 if (isMergeableStackTaggingInstruction(
MI,
Offset,
Size, ZeroData)) {
3319 if (ZeroData != FirstZeroData)
3327 if (!
MI.isTransient())
3336 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects())
3345 [](
const TagStoreInstr &Left,
const TagStoreInstr &Right) {
3350 int64_t CurOffset = Instrs[0].Offset;
3351 for (
auto &Instr : Instrs) {
3352 if (CurOffset > Instr.Offset)
3354 CurOffset = Instr.Offset + Instr.Size;
3359 TagStoreEdit TSE(
MBB, FirstZeroData);
3361 for (
auto &Instr : Instrs) {
3362 if (EndOffset && *EndOffset != Instr.Offset) {
3364 TSE.emitCode(InsertI, TFI,
false);
3368 TSE.addInstruction(Instr);
3369 EndOffset = Instr.Offset + Instr.Size;
3372 TSE.emitCode(InsertI, TFI,
true);
3383 II = tryMergeAdjacentSTG(II,
this, RS);
3391 bool IgnoreSPUpdates)
const {
3393 if (IgnoreSPUpdates) {
3396 FrameReg = AArch64::SP;
3423 struct FrameObject {
3424 bool IsValid =
false;
3426 int ObjectIndex = 0;
3428 int GroupIndex = -1;
3430 bool ObjectFirst =
false;
3433 bool GroupFirst =
false;
3436 class GroupBuilder {
3438 int NextGroupIndex = 0;
3439 std::vector<FrameObject> &Objects;
3442 GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
3443 void AddMember(
int Index) { CurrentMembers.push_back(
Index); }
3444 void EndCurrentGroup() {
3445 if (CurrentMembers.size() > 1) {
3450 for (
int Index : CurrentMembers) {
3451 Objects[
Index].GroupIndex = NextGroupIndex;
3457 CurrentMembers.clear();
3461 bool FrameObjectCompare(
const FrameObject &A,
const FrameObject &
B) {
3479 return std::make_tuple(!
A.IsValid,
A.ObjectFirst,
A.GroupFirst,
A.GroupIndex,
3481 std::make_tuple(!
B.IsValid,
B.ObjectFirst,
B.GroupFirst,
B.GroupIndex,
3493 for (
auto &Obj : ObjectsToAllocate) {
3494 FrameObjects[Obj].IsValid =
true;
3495 FrameObjects[Obj].ObjectIndex = Obj;
3499 GroupBuilder GB(FrameObjects);
3500 for (
auto &
MBB : MF) {
3501 for (
auto &
MI :
MBB) {
3502 if (
MI.isDebugInstr())
3505 switch (
MI.getOpcode()) {
3506 case AArch64::STGloop:
3507 case AArch64::STZGloop:
3510 case AArch64::STGOffset:
3511 case AArch64::STZGOffset:
3512 case AArch64::ST2GOffset:
3513 case AArch64::STZ2GOffset:
3526 FrameObjects[FI].IsValid)
3534 GB.AddMember(TaggedFI);
3536 GB.EndCurrentGroup();
3539 GB.EndCurrentGroup();
3549 FrameObjects[*TBPI].ObjectFirst =
true;
3550 FrameObjects[*TBPI].GroupFirst =
true;
3551 int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3552 if (FirstGroupIndex >= 0)
3553 for (FrameObject &
Object : FrameObjects)
3554 if (
Object.GroupIndex == FirstGroupIndex)
3555 Object.GroupFirst =
true;
3561 for (
auto &Obj : FrameObjects) {
3565 ObjectsToAllocate[
i++] = Obj.ObjectIndex;
3572 dbgs() <<
" " << Obj.ObjectIndex <<
": group " << Obj.GroupIndex;
3573 if (Obj.ObjectFirst)
3574 dbgs() <<
", first";
3576 dbgs() <<
", group-first";