257#define DEBUG_TYPE "frame-info"
260 cl::desc(
"enable use of redzone on AArch64"),
264 "stack-tagging-merge-settag",
274 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
275 "optimization (default = off)"));
287 "aarch64-disable-multivector-spill-fill",
291STATISTIC(NumRedZoneFunctions,
"Number of functions using red zone");
307 int64_t ArgumentPopSize = 0;
308 if (IsTailCallReturn) {
314 ArgumentPopSize = StackAdjust.
getImm();
323 return ArgumentPopSize;
334bool AArch64FrameLowering::homogeneousPrologEpilog(
359 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
366 unsigned NumGPRs = 0;
367 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
369 if (Reg == AArch64::LR) {
370 assert(CSRegs[
I + 1] == AArch64::FP);
371 if (NumGPRs % 2 != 0)
375 if (AArch64::GPR64RegClass.
contains(Reg))
383bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
402 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
403 MI.getOpcode() == AArch64::ADDXri ||
404 MI.getOpcode() == AArch64::ADDSXri)
431 if (!IsWin64 || IsFunclet) {
436 Attribute::SwiftAsync))
441 const unsigned UnwindHelpObject = (MF.
hasEHFunclets() ? 8 : 0);
443 alignTo(VarArgsArea + UnwindHelpObject, 16);
460 const unsigned RedZoneSize =
473 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
477 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
538 unsigned Opc =
I->getOpcode();
539 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
540 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
543 int64_t Amount =
I->getOperand(0).getImm();
551 if (CalleePopAmount == 0) {
562 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
573 "non-reserved call frame without var sized objects?");
582 }
else if (CalleePopAmount != 0) {
585 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
592void AArch64FrameLowering::emitCalleeSavedGPRLocations(
598 bool LocallyStreaming =
599 Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
610 for (
const auto &Info : CSI) {
611 unsigned FrameIdx =
Info.getFrameIdx();
615 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
616 int64_t DwarfReg =
TRI.getDwarfRegNum(
Info.getReg(),
true);
623 (!LocallyStreaming &&
624 DwarfReg ==
TRI.getDwarfRegNum(AArch64::VG,
true)))
635void AArch64FrameLowering::emitCalleeSavedSVELocations(
651 for (
const auto &Info : CSI) {
657 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
692 const MCInstrDesc &CFIDesc =
TII.get(TargetOpcode::CFI_INSTRUCTION);
698 nullptr,
TRI.getDwarfRegNum(AArch64::SP,
true), 0));
702 if (MFI.shouldSignReturnAddress(MF)) {
703 auto CFIInst = MFI.branchProtectionPAuthLR()
711 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
713 TRI.getDwarfRegNum(AArch64::X18,
true));
716 const std::vector<CalleeSavedInfo> &CSI =
718 for (
const auto &
Info : CSI) {
719 unsigned Reg =
Info.getReg();
720 if (!
TRI.regNeedsCFI(Reg, Reg))
723 TRI.getDwarfRegNum(Reg,
true));
742 for (
const auto &
Info : CSI) {
747 unsigned Reg =
Info.getReg();
752 if (!
Info.isRestored())
756 nullptr,
TRI.getDwarfRegNum(
Info.getReg(),
true)));
763void AArch64FrameLowering::emitCalleeSavedGPRRestores(
768void AArch64FrameLowering::emitCalleeSavedSVERestores(
776 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
777 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
Size.getFixed();
780void AArch64FrameLowering::allocateStackSpace(
782 int64_t RealignmentPadding,
StackOffset AllocSize,
bool NeedsWinCFI,
783 bool *HasWinCFI,
bool EmitCFI,
StackOffset InitialOffset,
784 bool FollowupAllocs)
const {
797 const uint64_t AndMask = ~(MaxAlign - 1);
800 Register TargetReg = RealignmentPadding
806 EmitCFI, InitialOffset);
808 if (RealignmentPadding) {
829 if (AllocSize.
getScalable() == 0 && RealignmentPadding == 0) {
831 assert(ScratchReg != AArch64::NoRegister);
841 if (FollowupAllocs) {
858 if (
upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
859 Register ScratchReg = RealignmentPadding
862 assert(ScratchReg != AArch64::NoRegister);
866 EmitCFI, InitialOffset);
867 if (RealignmentPadding) {
875 if (FollowupAllocs ||
upperBound(AllocSize) + RealignmentPadding >
891 assert(TargetReg != AArch64::NoRegister);
895 EmitCFI, InitialOffset);
896 if (RealignmentPadding) {
916 if (RealignmentPadding)
929 case AArch64::W##n: \
930 case AArch64::X##n: \
955 case AArch64::B##n: \
956 case AArch64::H##n: \
957 case AArch64::S##n: \
958 case AArch64::D##n: \
959 case AArch64::Q##n: \
960 return HasSVE ? AArch64::Z##n : AArch64::Q##n
997void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
1005 DL =
MBBI->getDebugLoc();
1015 if (
TRI.isGeneralPurposeRegister(MF, Reg)) {
1018 GPRsToZero.set(XReg);
1022 FPRsToZero.set(XReg);
1038 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
1039 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
1040 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
1042 if (RegsToZero[PReg])
1054 for (
unsigned i = 0; CSRegs[i]; ++i)
1055 LiveRegs.
addReg(CSRegs[i]);
1089 for (
unsigned Reg : AArch64::GPR64RegClass) {
1093 return AArch64::NoRegister;
1139 StackSizeInBytes >=
uint64_t(MFI.getStackProbeSize());
1145 F.needsUnwindTableEntry();
1148bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1154 if (homogeneousPrologEpilog(MF))
1177 if (MFI.hasVarSizedObjects())
1180 if (
RegInfo->hasStackRealignment(MF))
1197bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1199 if (!shouldCombineCSRLocalStackBump(*
MBB.
getParent(), StackBumpBytes))
1209 while (LastI != Begin) {
1211 if (LastI->isTransient())
1216 switch (LastI->getOpcode()) {
1217 case AArch64::STGloop:
1218 case AArch64::STZGloop:
1220 case AArch64::STZGi:
1221 case AArch64::ST2Gi:
1222 case AArch64::STZ2Gi:
1235 unsigned Opc =
MBBI->getOpcode();
1239 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1240 int Imm =
MBBI->getOperand(ImmIdx).getImm();
1248 case AArch64::LDPDpost:
1251 case AArch64::STPDpre: {
1252 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1253 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1254 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
1261 case AArch64::LDPXpost:
1264 case AArch64::STPXpre: {
1267 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1268 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1272 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1279 case AArch64::LDRDpost:
1282 case AArch64::STRDpre: {
1283 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1284 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1290 case AArch64::LDRXpost:
1293 case AArch64::STRXpre: {
1294 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1301 case AArch64::STPDi:
1302 case AArch64::LDPDi: {
1303 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1304 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1312 case AArch64::STPXi:
1313 case AArch64::LDPXi: {
1316 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1328 case AArch64::STRXui:
1329 case AArch64::LDRXui: {
1330 int Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1337 case AArch64::STRDui:
1338 case AArch64::LDRDui: {
1339 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1346 case AArch64::STPQi:
1347 case AArch64::LDPQi: {
1348 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1349 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1350 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQP))
1357 case AArch64::LDPQpost:
1360 case AArch64::STPQpre: {
1361 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1362 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1363 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQPX))
1377 unsigned LocalStackSize) {
1379 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1380 switch (
MBBI->getOpcode()) {
1383 case AArch64::SEH_SaveFPLR:
1384 case AArch64::SEH_SaveRegP:
1385 case AArch64::SEH_SaveReg:
1386 case AArch64::SEH_SaveFRegP:
1387 case AArch64::SEH_SaveFReg:
1388 case AArch64::SEH_SaveAnyRegQP:
1389 case AArch64::SEH_SaveAnyRegQPX:
1390 ImmOpnd = &
MBBI->getOperand(ImmIdx);
1410 if (ST.isTargetDarwin())
1416 unsigned Opc =
MBBI->getOpcode();
1417 if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1418 Opc == AArch64::UBFMXri)
1422 if (Opc == AArch64::ORRXrr)
1425 if (Opc == AArch64::BL) {
1426 auto Op1 =
MBBI->getOperand(0);
1427 return Op1.isSymbol() &&
1428 (
StringRef(Op1.getSymbolName()) ==
"__arm_get_current_vg");
1441 bool NeedsWinCFI,
bool *HasWinCFI,
bool EmitCFI,
1443 int CFAOffset = 0) {
1455 switch (
MBBI->getOpcode()) {
1458 case AArch64::STPXi:
1459 NewOpc = AArch64::STPXpre;
1461 case AArch64::STPDi:
1462 NewOpc = AArch64::STPDpre;
1464 case AArch64::STPQi:
1465 NewOpc = AArch64::STPQpre;
1467 case AArch64::STRXui:
1468 NewOpc = AArch64::STRXpre;
1470 case AArch64::STRDui:
1471 NewOpc = AArch64::STRDpre;
1473 case AArch64::STRQui:
1474 NewOpc = AArch64::STRQpre;
1476 case AArch64::LDPXi:
1477 NewOpc = AArch64::LDPXpost;
1479 case AArch64::LDPDi:
1480 NewOpc = AArch64::LDPDpost;
1482 case AArch64::LDPQi:
1483 NewOpc = AArch64::LDPQpost;
1485 case AArch64::LDRXui:
1486 NewOpc = AArch64::LDRXpost;
1488 case AArch64::LDRDui:
1489 NewOpc = AArch64::LDRDpost;
1491 case AArch64::LDRQui:
1492 NewOpc = AArch64::LDRQpost;
1497 auto SEH = std::next(
MBBI);
1499 SEH->eraseFromParent();
1503 int64_t MinOffset, MaxOffset;
1505 NewOpc, Scale, Width, MinOffset, MaxOffset);
1511 if (
MBBI->getOperand(
MBBI->getNumOperands() - 1).getImm() != 0 ||
1512 CSStackSizeInc < MinOffset * (int64_t)Scale.
getFixedValue() ||
1513 CSStackSizeInc > MaxOffset * (int64_t)Scale.
getFixedValue()) {
1520 false,
false,
nullptr, EmitCFI,
1523 return std::prev(
MBBI);
1530 unsigned OpndIdx = 0;
1531 for (
unsigned OpndEnd =
MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
1533 MIB.
add(
MBBI->getOperand(OpndIdx));
1535 assert(
MBBI->getOperand(OpndIdx).getImm() == 0 &&
1536 "Unexpected immediate offset in first/last callee-save save/restore "
1538 assert(
MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
1539 "Unexpected base register in callee-save save/restore instruction!");
1540 assert(CSStackSizeInc % Scale == 0);
1541 MIB.
addImm(CSStackSizeInc / (
int)Scale);
1572 unsigned Opc =
MI.getOpcode();
1575 case AArch64::STPXi:
1576 case AArch64::STRXui:
1577 case AArch64::STPDi:
1578 case AArch64::STRDui:
1579 case AArch64::LDPXi:
1580 case AArch64::LDRXui:
1581 case AArch64::LDPDi:
1582 case AArch64::LDRDui:
1585 case AArch64::STPQi:
1586 case AArch64::STRQui:
1587 case AArch64::LDPQi:
1588 case AArch64::LDRQui:
1595 unsigned OffsetIdx =
MI.getNumExplicitOperands() - 1;
1596 assert(
MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1597 "Unexpected base register in callee-save save/restore instruction!");
1601 assert(LocalStackSize % Scale == 0);
1602 OffsetOpnd.
setImm(OffsetOpnd.
getImm() + LocalStackSize / Scale);
1607 assert(
MBBI !=
MI.getParent()->end() &&
"Expecting a valid instruction");
1609 "Expecting a SEH instruction");
1624 switch (
I->getOpcode()) {
1627 case AArch64::PTRUE_C_B:
1628 case AArch64::LD1B_2Z_IMM:
1629 case AArch64::ST1B_2Z_IMM:
1630 case AArch64::STR_ZXI:
1631 case AArch64::STR_PXI:
1632 case AArch64::LDR_ZXI:
1633 case AArch64::LDR_PXI:
1644 bool NeedsUnwindInfo) {
1660 if (NeedsUnwindInfo) {
1663 static const char CFIInst[] = {
1664 dwarf::DW_CFA_val_expression,
1667 static_cast<char>(
unsigned(dwarf::DW_OP_breg18)),
1668 static_cast<char>(-8) & 0x7f,
1671 nullptr,
StringRef(CFIInst,
sizeof(CFIInst))));
1709 const int OffsetToFirstCalleeSaveFromFP =
1713 unsigned Reg =
TRI->getDwarfRegNum(
FramePtr,
true);
1715 nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1747 bool HasFP =
hasFP(MF);
1749 bool HasWinCFI =
false;
1758 while (NonFrameStart !=
End &&
1763 if (NonFrameStart !=
MBB.
end()) {
1779 if (NonFrameStart ==
MBB.
end())
1784 for (auto &Op : MI.operands())
1785 if (Op.isReg() && Op.isDef())
1786 assert(!LiveRegs.contains(Op.getReg()) &&
1787 "live register clobbered by inserted prologue instructions");
1804 if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
1806 MFnI.needsDwarfUnwindInfo(MF));
1808 if (MFnI.shouldSignReturnAddress(MF)) {
1815 if (EmitCFI && MFnI.isMTETagged()) {
1893 assert(!HasFP &&
"unexpected function without stack frame but with FP");
1895 "unexpected function without stack frame but with SVE objects");
1904 ++NumRedZoneFunctions;
1936 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1937 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1938 if (CombineSPBump) {
1939 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1945 }
else if (HomPrologEpilog) {
1947 NumBytes -= PrologueSaveSize;
1948 }
else if (PrologueSaveSize != 0) {
1950 MBB,
MBBI,
DL,
TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
1952 NumBytes -= PrologueSaveSize;
1954 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1961 if (CombineSPBump &&
1965 NeedsWinCFI, &HasWinCFI);
1970 if (!IsFunclet && HasFP) {
1982 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1983 if (HaveInitialContext)
1985 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
2001 if (HomPrologEpilog) {
2014 if (NeedsWinCFI && HasWinCFI) {
2019 NeedsWinCFI =
false;
2030 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2033 const bool NeedsRealignment =
2034 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
2035 const int64_t RealignmentPadding =
2041 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
2049 if (NumBytes >= (1 << 28))
2051 "unwinding purposes");
2053 uint32_t LowNumWords = NumWords & 0xFFFF;
2060 if ((NumWords & 0xFFFF0000) != 0) {
2063 .
addImm((NumWords & 0xFFFF0000) >> 16)
2134 if (RealignmentPadding > 0) {
2135 if (RealignmentPadding >= 4096) {
2138 .
addImm(RealignmentPadding)
2148 .
addImm(RealignmentPadding)
2165 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2171 LLVM_DEBUG(
dbgs() <<
"SVECalleeSavedStackSize = " << CalleeSavedSize
2174 CalleeSavesBegin =
MBBI;
2178 CalleeSavesEnd =
MBBI;
2181 SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2188 allocateStackSpace(
MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
false,
2189 nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
2191 CFAOffset += SVECalleeSavesSize;
2194 emitCalleeSavedSVELocations(
MBB, CalleeSavesEnd);
2199 "Cannot use redzone with stack realignment");
2204 allocateStackSpace(
MBB, CalleeSavesEnd, RealignmentPadding,
2206 NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2218 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
2230 if (NeedsWinCFI && HasWinCFI) {
2238 if (IsFunclet &&
F.hasPersonalityFn()) {
2248 if (EmitCFI && !EmitAsyncCFI) {
2255 *RegInfo, AArch64::SP, AArch64::SP, TotalSize,
2261 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2262 emitCalleeSavedSVELocations(
MBB,
MBBI);
2267 switch (
MI.getOpcode()) {
2270 case AArch64::CATCHRET:
2271 case AArch64::CLEANUPRET:
2286 bool HasWinCFI =
false;
2287 bool IsFunclet =
false;
2290 DL =
MBBI->getDebugLoc();
2298 BuildMI(MBB, MBB.getFirstTerminator(), DL,
2299 TII->get(AArch64::PAUTH_EPILOGUE))
2300 .setMIFlag(MachineInstr::FrameDestroy);
2310 TII->get(AArch64::SEH_EpilogEnd))
2337 int64_t AfterCSRPopSize = ArgumentStackToRestore;
2345 if (homogeneousPrologEpilog(MF, &
MBB)) {
2349 auto HomogeneousEpilog = std::prev(LastPopI);
2350 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2351 LastPopI = HomogeneousEpilog;
2361 assert(AfterCSRPopSize == 0);
2364 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(
MBB, NumBytes);
2367 bool CombineAfterCSRBump =
false;
2368 if (!CombineSPBump && PrologueSaveSize != 0) {
2370 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
2372 Pop = std::prev(Pop);
2375 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2379 if (OffsetOp.
getImm() == 0 && AfterCSRPopSize >= 0) {
2381 MBB, Pop,
DL,
TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
2388 AfterCSRPopSize += PrologueSaveSize;
2389 CombineAfterCSRBump =
true;
2398 while (LastPopI != Begin) {
2404 }
else if (CombineSPBump)
2406 NeedsWinCFI, &HasWinCFI);
2418 EpilogStartI = LastPopI;
2454 if (CombineSPBump) {
2455 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
2458 if (EmitCFI &&
hasFP(MF)) {
2460 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2475 NumBytes -= PrologueSaveSize;
2476 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
2480 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2483 RestoreBegin = std::prev(RestoreEnd);
2484 while (RestoreBegin !=
MBB.
begin() &&
2493 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2494 DeallocateAfter = CalleeSavedSizeAsOffset;
2516 MBB, RestoreBegin,
DL, AArch64::SP, AArch64::SP,
2518 false,
false,
nullptr, EmitCFI && !
hasFP(MF),
2525 false,
nullptr, EmitCFI && !
hasFP(MF),
2531 false,
nullptr, EmitCFI && !
hasFP(MF),
2536 emitCalleeSavedSVERestores(
MBB, RestoreEnd);
2543 if (RedZone && AfterCSRPopSize == 0)
2550 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2551 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
2552 if (NoCalleeSaveRestore)
2553 StackRestoreBytes += AfterCSRPopSize;
2556 MBB, LastPopI,
DL, AArch64::SP, AArch64::SP,
2563 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
2576 MBB, LastPopI,
DL, AArch64::SP, AArch64::FP,
2579 }
else if (NumBytes)
2585 if (EmitCFI &&
hasFP(MF)) {
2587 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2598 if (AfterCSRPopSize) {
2599 assert(AfterCSRPopSize > 0 &&
"attempting to reallocate arg stack that an "
2600 "interrupt may have clobbered");
2605 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
2647 if (MFI.isVariableSizedObjectIndex(FI)) {
2661 bool IsFixed = MFI.isFixedObjectIndex(FI);
2666 if (!IsFixed && !IsCSR)
2667 ScalableOffset = -SVEStackSize;
2679 int64_t ObjectOffset) {
2683 bool IsWin64 = Subtarget.isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
2684 unsigned FixedObject =
2693 int64_t ObjectOffset) {
2704 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2711 bool ForSimm)
const {
2714 bool isFixed = MFI.isFixedObjectIndex(FI);
2721 const MachineFunction &MF, int64_t ObjectOffset,
bool isFixed,
bool isSVE,
2722 Register &FrameReg,
bool PreferFP,
bool ForSimm)
const {
2745 PreferFP &= !SVEStackSize;
2753 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
2757 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
2759 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
2764 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
2765 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
2767 if (FPOffset >= 0) {
2771 }
else if (MFI.hasVarSizedObjects()) {
2775 bool CanUseBP = RegInfo->hasBasePointer(MF);
2776 if (FPOffsetFits && CanUseBP)
2783 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2790 "Funclets should only be present on Win64");
2794 if (FPOffsetFits && PreferFP)
2801 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2802 "In the presence of dynamic stack pointer realignment, "
2803 "non-argument/CSR objects cannot be accessed through the frame pointer");
2815 RegInfo->hasStackRealignment(MF))) {
2816 FrameReg = RegInfo->getFrameRegister(MF);
2820 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2826 if (UseFP && !(isFixed || isCSR))
2827 ScalableOffset = -SVEStackSize;
2828 if (!UseFP && (isFixed || isCSR))
2829 ScalableOffset = SVEStackSize;
2832 FrameReg = RegInfo->getFrameRegister(MF);
2837 if (RegInfo->hasBasePointer(MF))
2838 FrameReg = RegInfo->getBaseRegister();
2840 assert(!MFI.hasVarSizedObjects() &&
2841 "Can't use SP when we have var sized objects.");
2842 FrameReg = AArch64::SP;
2869 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2875 bool NeedsWinCFI,
bool IsFirst,
2884 if (Reg2 == AArch64::FP)
2888 if (
TRI->getEncodingValue(Reg2) ==
TRI->getEncodingValue(Reg1) + 1)
2895 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2896 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2906 bool UsesWinAAPCS,
bool NeedsWinCFI,
2907 bool NeedsFrameRecord,
bool IsFirst,
2915 if (NeedsFrameRecord)
2916 return Reg2 == AArch64::LR;
2924 unsigned Reg1 = AArch64::NoRegister;
2925 unsigned Reg2 = AArch64::NoRegister;
2928 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG }
Type;
2930 RegPairInfo() =
default;
2932 bool isPaired()
const {
return Reg2 != AArch64::NoRegister; }
2934 unsigned getScale()
const {
2949 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
2955 for (
unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
2956 if (SavedRegs.
test(PReg)) {
2957 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
2961 return AArch64::NoRegister;
2971 bool IsLocallyStreaming =
2977 return Subtarget.hasSVE2p1() ||
2978 (Subtarget.hasSME2() &&
2979 (!IsLocallyStreaming && Subtarget.
isStreaming()));
2985 bool NeedsFrameRecord) {
2996 unsigned Count = CSI.
size();
3003 "Odd number of callee-saved regs to spill!");
3005 int StackFillDir = -1;
3007 unsigned FirstReg = 0;
3015 FirstReg = Count - 1;
3022 for (
unsigned i = FirstReg; i < Count; i += RegInc) {
3024 RPI.Reg1 = CSI[i].getReg();
3026 if (AArch64::GPR64RegClass.
contains(RPI.Reg1))
3027 RPI.Type = RegPairInfo::GPR;
3028 else if (AArch64::FPR64RegClass.
contains(RPI.Reg1))
3029 RPI.Type = RegPairInfo::FPR64;
3030 else if (AArch64::FPR128RegClass.
contains(RPI.Reg1))
3031 RPI.Type = RegPairInfo::FPR128;
3032 else if (AArch64::ZPRRegClass.
contains(RPI.Reg1))
3033 RPI.Type = RegPairInfo::ZPR;
3034 else if (AArch64::PPRRegClass.
contains(RPI.Reg1))
3035 RPI.Type = RegPairInfo::PPR;
3036 else if (RPI.Reg1 == AArch64::VG)
3037 RPI.Type = RegPairInfo::VG;
3045 ByteOffset += StackFillDir * StackHazardSize;
3048 int Scale = RPI.getScale();
3051 Register NextReg = CSI[i + RegInc].getReg();
3052 bool IsFirst = i == FirstReg;
3054 case RegPairInfo::GPR:
3055 if (AArch64::GPR64RegClass.
contains(NextReg) &&
3057 NeedsWinCFI, NeedsFrameRecord, IsFirst,
3061 case RegPairInfo::FPR64:
3062 if (AArch64::FPR64RegClass.
contains(NextReg) &&
3067 case RegPairInfo::FPR128:
3068 if (AArch64::FPR128RegClass.
contains(NextReg))
3071 case RegPairInfo::PPR:
3073 case RegPairInfo::ZPR:
3075 ((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) {
3078 int Offset = (ScalableByteOffset + StackFillDir * 2 * Scale) / Scale;
3083 case RegPairInfo::VG:
3094 assert((!RPI.isPaired() ||
3095 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
3096 "Out of order callee saved regs!");
3098 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
3099 RPI.Reg1 == AArch64::LR) &&
3100 "FrameRecord must be allocated together with LR");
3103 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3104 RPI.Reg2 == AArch64::LR) &&
3105 "FrameRecord must be allocated together with LR");
3113 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3114 RPI.Reg1 + 1 == RPI.Reg2))) &&
3115 "Callee-save registers not saved as adjacent register pair!");
3117 RPI.FrameIdx = CSI[i].getFrameIdx();
3120 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3122 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3123 assert(OffsetPre % Scale == 0);
3125 if (RPI.isScalable())
3126 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3128 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3133 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3134 (IsWindows && RPI.Reg2 == AArch64::LR)))
3135 ByteOffset += StackFillDir * 8;
3139 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
3140 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
3141 ByteOffset % 16 != 0) {
3142 ByteOffset += 8 * StackFillDir;
3148 NeedGapToAlignStack =
false;
3151 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3152 assert(OffsetPost % Scale == 0);
3155 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3160 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3161 (IsWindows && RPI.Reg2 == AArch64::LR)))
3163 RPI.Offset =
Offset / Scale;
3165 assert((!RPI.isPaired() ||
3166 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3167 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
3168 "Offset out of bounds for LDP/STP immediate");
3170 auto isFrameRecord = [&] {
3172 return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
3173 : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
3181 return i > 0 && RPI.Reg1 == AArch64::FP &&
3182 CSI[i - 1].getReg() == AArch64::LR;
3187 if (NeedsFrameRecord && isFrameRecord())
3204 std::reverse(RegPairs.
begin(), RegPairs.
end());
3223 MRI.freezeReservedRegs();
3225 if (homogeneousPrologEpilog(MF)) {
3229 for (
auto &RPI : RegPairs) {
3234 if (!
MRI.isReserved(RPI.Reg1))
3236 if (RPI.isPaired() && !
MRI.isReserved(RPI.Reg2))
3241 bool PTrueCreated =
false;
3243 unsigned Reg1 = RPI.Reg1;
3244 unsigned Reg2 = RPI.Reg2;
3260 case RegPairInfo::GPR:
3261 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
3263 Alignment =
Align(8);
3265 case RegPairInfo::FPR64:
3266 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
3268 Alignment =
Align(8);
3270 case RegPairInfo::FPR128:
3271 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
3273 Alignment =
Align(16);
3275 case RegPairInfo::ZPR:
3276 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3278 Alignment =
Align(16);
3280 case RegPairInfo::PPR:
3281 StrOpc = AArch64::STR_PXI;
3283 Alignment =
Align(2);
3285 case RegPairInfo::VG:
3286 StrOpc = AArch64::STRXui;
3288 Alignment =
Align(8);
3292 unsigned X0Scratch = AArch64::NoRegister;
3293 if (Reg1 == AArch64::VG) {
3296 assert(Reg1 != AArch64::NoRegister);
3299 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3324 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3325 AArch64::X0, LiveIn.PhysReg);
3329 if (X0Scratch != AArch64::NoRegister)
3336 const uint32_t *RegMask =
TRI->getCallPreservedMask(
3351 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3352 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3355 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
3356 "Windows unwdinding requires a consecutive (FP,LR) pair");
3360 unsigned FrameIdxReg1 = RPI.FrameIdx;
3361 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3362 if (NeedsWinCFI && RPI.isPaired()) {
3367 if (RPI.isPaired() && RPI.isScalable()) {
3373 "Expects SVE2.1 or SME2 target and a predicate register");
3374#ifdef EXPENSIVE_CHECKS
3375 auto IsPPR = [](
const RegPairInfo &c) {
3376 return c.Reg1 == RegPairInfo::PPR;
3378 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3379 auto IsZPR = [](
const RegPairInfo &c) {
3380 return c.Type == RegPairInfo::ZPR;
3382 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3383 assert(!(PPRBegin < ZPRBegin) &&
3384 "Expected callee save predicate to be handled first");
3386 if (!PTrueCreated) {
3387 PTrueCreated =
true;
3392 if (!
MRI.isReserved(Reg1))
3394 if (!
MRI.isReserved(Reg2))
3396 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
3412 if (!
MRI.isReserved(Reg1))
3414 if (RPI.isPaired()) {
3415 if (!
MRI.isReserved(Reg2))
3435 if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
3441 if (X0Scratch != AArch64::NoRegister)
3461 DL =
MBBI->getDebugLoc();
3464 if (homogeneousPrologEpilog(MF, &
MBB)) {
3467 for (
auto &RPI : RegPairs) {
3475 auto IsPPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::PPR; };
3476 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3477 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.
end(), IsPPR);
3478 std::reverse(PPRBegin, PPREnd);
3479 auto IsZPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::ZPR; };
3480 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3481 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.
end(), IsZPR);
3482 std::reverse(ZPRBegin, ZPREnd);
3484 bool PTrueCreated =
false;
3485 for (
const RegPairInfo &RPI : RegPairs) {
3486 unsigned Reg1 = RPI.Reg1;
3487 unsigned Reg2 = RPI.Reg2;
3501 case RegPairInfo::GPR:
3502 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
3504 Alignment =
Align(8);
3506 case RegPairInfo::FPR64:
3507 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
3509 Alignment =
Align(8);
3511 case RegPairInfo::FPR128:
3512 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
3514 Alignment =
Align(16);
3516 case RegPairInfo::ZPR:
3517 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3519 Alignment =
Align(16);
3521 case RegPairInfo::PPR:
3522 LdrOpc = AArch64::LDR_PXI;
3524 Alignment =
Align(2);
3526 case RegPairInfo::VG:
3531 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3532 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3538 unsigned FrameIdxReg1 = RPI.FrameIdx;
3539 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3540 if (NeedsWinCFI && RPI.isPaired()) {
3546 if (RPI.isPaired() && RPI.isScalable()) {
3551 "Expects SVE2.1 or SME2 target and a predicate register");
3552#ifdef EXPENSIVE_CHECKS
3553 assert(!(PPRBegin < ZPRBegin) &&
3554 "Expected callee save predicate to be handled first");
3556 if (!PTrueCreated) {
3557 PTrueCreated =
true;
3562 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
3579 if (RPI.isPaired()) {
3604 dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->
getPseudoValue());
3606 return std::optional<int>(PSV->getFrameIndex());
3617 return std::nullopt;
3623 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
3624 return std::nullopt;
3632void AArch64FrameLowering::determineStackHazardSlot(
3635 if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
3648 bool HasFPRCSRs =
any_of(SavedRegs.
set_bits(), [](
unsigned Reg) {
3649 return AArch64::FPR64RegClass.contains(Reg) ||
3650 AArch64::FPR128RegClass.contains(Reg) ||
3651 AArch64::ZPRRegClass.contains(Reg) ||
3652 AArch64::PPRRegClass.contains(Reg);
3654 bool HasFPRStackObjects =
false;
3657 for (
auto &
MBB : MF) {
3658 for (
auto &
MI :
MBB) {
3660 if (FI && *FI >= 0 && *FI < (
int)FrameObjects.size()) {
3663 FrameObjects[*FI] |= 2;
3665 FrameObjects[*FI] |= 1;
3669 HasFPRStackObjects =
3670 any_of(FrameObjects, [](
unsigned B) {
return (
B & 3) == 2; });
3673 if (HasFPRCSRs || HasFPRStackObjects) {
3676 << StackHazardSize <<
"\n");
3694 unsigned UnspilledCSGPR = AArch64::NoRegister;
3695 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
3704 unsigned ExtraCSSpill = 0;
3705 bool HasUnpairedGPR64 =
false;
3706 bool HasPairZReg =
false;
3708 for (
unsigned i = 0; CSRegs[i]; ++i) {
3709 const unsigned Reg = CSRegs[i];
3712 if (Reg == BasePointerReg)
3715 bool RegUsed = SavedRegs.
test(Reg);
3716 unsigned PairedReg = AArch64::NoRegister;
3717 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
3718 if (RegIsGPR64 || AArch64::FPR64RegClass.
contains(Reg) ||
3719 AArch64::FPR128RegClass.contains(Reg)) {
3722 if (HasUnpairedGPR64)
3723 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
3725 PairedReg = CSRegs[i ^ 1];
3732 if (RegIsGPR64 && !AArch64::GPR64RegClass.
contains(PairedReg)) {
3733 PairedReg = AArch64::NoRegister;
3734 HasUnpairedGPR64 =
true;
3736 assert(PairedReg == AArch64::NoRegister ||
3737 AArch64::GPR64RegClass.
contains(Reg, PairedReg) ||
3738 AArch64::FPR64RegClass.
contains(Reg, PairedReg) ||
3739 AArch64::FPR128RegClass.
contains(Reg, PairedReg));
3742 if (AArch64::GPR64RegClass.
contains(Reg) &&
3744 UnspilledCSGPR = Reg;
3745 UnspilledCSGPRPaired = PairedReg;
3753 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
3754 !SavedRegs.
test(PairedReg)) {
3755 SavedRegs.
set(PairedReg);
3756 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
3758 ExtraCSSpill = PairedReg;
3761 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
3762 SavedRegs.
test(CSRegs[i ^ 1]));
3770 if (PnReg != AArch64::NoRegister)
3776 SavedRegs.
set(AArch64::P8);
3781 "Predicate cannot be a reserved register");
3791 SavedRegs.
set(AArch64::X18);
3795 unsigned CSStackSize = 0;
3796 unsigned SVECSStackSize = 0;
3799 for (
unsigned Reg : SavedRegs.
set_bits()) {
3801 if (AArch64::PPRRegClass.
contains(Reg) ||
3802 AArch64::ZPRRegClass.
contains(Reg))
3815 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3823 determineStackHazardSlot(MF, SavedRegs);
3824 if (AFI->hasStackHazardSlotIndex())
3828 unsigned NumSavedRegs = SavedRegs.
count();
3834 SavedRegs.
set(AArch64::FP);
3835 SavedRegs.
set(AArch64::LR);
3839 dbgs() <<
"*** determineCalleeSaves\nSaved CSRs:";
3840 for (
unsigned Reg : SavedRegs.
set_bits())
3846 int64_t SVEStackSize =
3847 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
3848 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
3857 int64_t CalleeStackUsed = 0;
3860 if (FixedOff > CalleeStackUsed)
3861 CalleeStackUsed = FixedOff;
3865 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
3866 CalleeStackUsed) > EstimatedStackSizeLimit;
3868 AFI->setHasStackFrame(
true);
3877 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
3879 <<
" to get a scratch register.\n");
3880 SavedRegs.
set(UnspilledCSGPR);
3881 ExtraCSSpill = UnspilledCSGPR;
3886 if (producePairRegisters(MF)) {
3887 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
3890 SavedRegs.
reset(UnspilledCSGPR);
3891 ExtraCSSpill = AArch64::NoRegister;
3894 SavedRegs.
set(UnspilledCSGPRPaired);
3903 unsigned Size =
TRI->getSpillSize(RC);
3904 Align Alignment =
TRI->getSpillAlign(RC);
3907 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
3908 <<
" as the emergency spill slot.\n");
3913 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
3917 if (
hasFP(MF) && AFI->hasSwiftAsyncContext())
3922 << EstimatedStackSize + AlignedCSStackSize <<
" bytes.\n");
3925 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3926 "Should not invalidate callee saved info");
3930 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
3931 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3932 AFI->setSVECalleeSavedStackSize(
alignTo(SVECSStackSize, 16));
3937 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
3938 unsigned &MaxCSFrameIndex)
const {
3947 std::reverse(CSI.begin(), CSI.end());
3961 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3962 MinCSFrameIndex = FrameIdx;
3963 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3964 MaxCSFrameIndex = FrameIdx;
3969 std::vector<CalleeSavedInfo> VGSaves;
3973 VGInfo.setRestored(
false);
3974 VGSaves.push_back(VGInfo);
3978 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3979 VGSaves.push_back(VGInfo);
3981 bool InsertBeforeLR =
false;
3983 for (
unsigned I = 0;
I < CSI.size();
I++)
3984 if (CSI[
I].
getReg() == AArch64::LR) {
3985 InsertBeforeLR =
true;
3986 CSI.insert(CSI.begin() +
I, VGSaves.begin(), VGSaves.end());
3990 if (!InsertBeforeLR)
3991 CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
3995 int HazardSlotIndex = std::numeric_limits<int>::max();
3996 for (
auto &CS : CSI) {
4004 assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
4005 "Unexpected register order for hazard slot");
4007 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
4010 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
4011 MinCSFrameIndex = HazardSlotIndex;
4012 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
4013 MaxCSFrameIndex = HazardSlotIndex;
4019 CS.setFrameIdx(FrameIdx);
4021 if ((
unsigned)FrameIdx < MinCSFrameIndex)
4022 MinCSFrameIndex = FrameIdx;
4023 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
4024 MaxCSFrameIndex = FrameIdx;
4028 Reg == AArch64::FP) {
4031 if ((
unsigned)FrameIdx < MinCSFrameIndex)
4032 MinCSFrameIndex = FrameIdx;
4033 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
4034 MaxCSFrameIndex = FrameIdx;
4041 HazardSlotIndex == std::numeric_limits<int>::max()) {
4043 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
4046 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
4047 MinCSFrameIndex = HazardSlotIndex;
4048 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
4049 MaxCSFrameIndex = HazardSlotIndex;
4073 int &Min,
int &Max) {
4074 Min = std::numeric_limits<int>::max();
4075 Max = std::numeric_limits<int>::min();
4081 for (
auto &CS : CSI) {
4082 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
4083 AArch64::PPRRegClass.contains(CS.getReg())) {
4084 assert((Max == std::numeric_limits<int>::min() ||
4085 Max + 1 == CS.getFrameIdx()) &&
4086 "SVE CalleeSaves are not consecutive");
4088 Min = std::min(Min, CS.getFrameIdx());
4089 Max = std::max(Max, CS.getFrameIdx());
4092 return Min != std::numeric_limits<int>::max();
4101 int &MinCSFrameIndex,
4102 int &MaxCSFrameIndex,
4103 bool AssignOffsets) {
4108 "SVE vectors should never be passed on the stack by value, only by "
4112 auto Assign = [&MFI](
int FI, int64_t
Offset) {
4122 for (
int I = MinCSFrameIndex;
I <= MaxCSFrameIndex; ++
I) {
4138 int StackProtectorFI = -1;
4142 ObjectsToAllocate.
push_back(StackProtectorFI);
4148 if (
I == StackProtectorFI)
4150 if (MaxCSFrameIndex >=
I &&
I >= MinCSFrameIndex)
4159 for (
unsigned FI : ObjectsToAllocate) {
4164 if (Alignment >
Align(16))
4166 "Alignment of scalable vectors > 16 bytes is not yet supported");
4176int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4178 int MinCSFrameIndex, MaxCSFrameIndex;
4182int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4193 "Upwards growing stack unsupported");
4195 int MinCSFrameIndex, MaxCSFrameIndex;
4196 int64_t SVEStackSize =
4197 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
4217 int64_t FixedObject =
4230 assert(DstReg &&
"There must be a free register after frame setup");
4239struct TagStoreInstr {
4262 std::optional<int64_t> FrameRegUpdate;
4264 unsigned FrameRegUpdateFlags;
4275 :
MBB(
MBB), ZeroData(ZeroData) {
4281 void addInstruction(TagStoreInstr
I) {
4283 TagStores.
back().Offset + TagStores.
back().Size ==
I.Offset) &&
4284 "Non-adjacent tag store instructions.");
4287 void clear() { TagStores.
clear(); }
4299 const int64_t kMinOffset = -256 * 16;
4300 const int64_t kMaxOffset = 255 * 16;
4303 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
4304 if (BaseRegOffsetBytes < kMinOffset ||
4305 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset ||
4309 BaseRegOffsetBytes % 16 != 0) {
4310 Register ScratchReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4313 BaseReg = ScratchReg;
4314 BaseRegOffsetBytes = 0;
4319 int64_t InstrSize = (
Size > 16) ? 32 : 16;
4322 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
4323 : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
4324 assert(BaseRegOffsetBytes % 16 == 0);
4328 .
addImm(BaseRegOffsetBytes / 16)
4332 if (BaseRegOffsetBytes == 0)
4334 BaseRegOffsetBytes += InstrSize;
4348 :
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4349 Register SizeReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4353 int64_t LoopSize =
Size;
4356 if (FrameRegUpdate && *FrameRegUpdate)
4357 LoopSize -= LoopSize % 32;
4359 TII->get(ZeroData ? AArch64::STZGloop_wback
4360 : AArch64::STGloop_wback))
4367 LoopI->
setFlags(FrameRegUpdateFlags);
4369 int64_t ExtraBaseRegUpdate =
4370 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
4371 LLVM_DEBUG(
dbgs() <<
"TagStoreEdit::emitLoop: LoopSize=" << LoopSize
4372 <<
", Size=" <<
Size
4373 <<
", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate
4374 <<
", FrameRegUpdate=" << FrameRegUpdate
4375 <<
", FrameRegOffset.getFixed()="
4376 << FrameRegOffset.
getFixed() <<
"\n");
4377 if (LoopSize <
Size) {
4381 int64_t STGOffset = ExtraBaseRegUpdate + 16;
4382 assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 &&
4383 "STG immediate out of range");
4385 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
4392 }
else if (ExtraBaseRegUpdate) {
4394 int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate);
4395 assert(AddSubOffset <= 4095 &&
"ADD/SUB immediate out of range");
4398 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
4411 int64_t
Size, int64_t *TotalOffset) {
4413 if ((
MI.getOpcode() == AArch64::ADDXri ||
4414 MI.getOpcode() == AArch64::SUBXri) &&
4415 MI.getOperand(0).getReg() == Reg &&
MI.getOperand(1).getReg() == Reg) {
4417 int64_t
Offset =
MI.getOperand(2).getImm() << Shift;
4418 if (
MI.getOpcode() == AArch64::SUBXri)
4429 const int64_t kMaxOffset = 4080 - 16;
4431 const int64_t kMinOffset = -4095;
4432 if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset &&
4433 PostOffset % 16 == 0) {
4444 for (
auto &TS : TSE) {
4448 if (
MI->memoperands_empty()) {
4452 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
4458 bool TryMergeSPUpdate) {
4459 if (TagStores.
empty())
4461 TagStoreInstr &FirstTagStore = TagStores[0];
4462 TagStoreInstr &LastTagStore = TagStores[TagStores.
size() - 1];
4463 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
4464 DL = TagStores[0].MI->getDebugLoc();
4468 *MF, FirstTagStore.Offset,
false ,
false , Reg,
4471 FrameRegUpdate = std::nullopt;
4473 mergeMemRefs(TagStores, CombinedMemRefs);
4476 dbgs() <<
"Replacing adjacent STG instructions:\n";
4477 for (
const auto &Instr : TagStores) {
4486 if (TagStores.
size() < 2)
4488 emitUnrolled(InsertI);
4491 int64_t TotalOffset = 0;
4492 if (TryMergeSPUpdate) {
4498 if (InsertI !=
MBB->
end() &&
4499 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
4501 UpdateInstr = &*InsertI++;
4507 if (!UpdateInstr && TagStores.
size() < 2)
4511 FrameRegUpdate = TotalOffset;
4512 FrameRegUpdateFlags = UpdateInstr->
getFlags();
4519 for (
auto &TS : TagStores)
4520 TS.MI->eraseFromParent();
4524 int64_t &
Size,
bool &ZeroData) {
4528 unsigned Opcode =
MI.getOpcode();
4529 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
4530 Opcode == AArch64::STZ2Gi);
4532 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
4533 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
4535 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
4538 Size =
MI.getOperand(2).getImm();
4542 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
4544 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
4549 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
4553 16 *
MI.getOperand(2).getImm();
4573 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
4579 constexpr int kScanLimit = 10;
4582 NextI != E && Count < kScanLimit; ++NextI) {