256#define DEBUG_TYPE "frame-info"
259 cl::desc(
"enable use of redzone on AArch64"),
263 "stack-tagging-merge-settag",
273 cl::desc(
"Emit homogeneous prologue and epilogue for the size "
274 "optimization (default = off)"));
288STATISTIC(NumRedZoneFunctions,
"Number of functions using red zone");
304 int64_t ArgumentPopSize = 0;
305 if (IsTailCallReturn) {
311 ArgumentPopSize = StackAdjust.
getImm();
320 return ArgumentPopSize;
331bool AArch64FrameLowering::homogeneousPrologEpilog(
356 if (AFI->hasSwiftAsyncContext() || AFI->hasStreamingModeChanges())
363 unsigned NumGPRs = 0;
364 for (
unsigned I = 0; CSRegs[
I]; ++
I) {
366 if (Reg == AArch64::LR) {
367 assert(CSRegs[
I + 1] == AArch64::FP);
368 if (NumGPRs % 2 != 0)
372 if (AArch64::GPR64RegClass.
contains(Reg))
380bool AArch64FrameLowering::producePairRegisters(
MachineFunction &MF)
const {
399 if (
MI.isDebugInstr() ||
MI.isPseudo() ||
400 MI.getOpcode() == AArch64::ADDXri ||
401 MI.getOpcode() == AArch64::ADDSXri)
428 if (!IsWin64 || IsFunclet) {
433 Attribute::SwiftAsync))
438 const unsigned UnwindHelpObject = (MF.
hasEHFunclets() ? 8 : 0);
440 alignTo(VarArgsArea + UnwindHelpObject, 16);
457 const unsigned RedZoneSize =
470 bool LowerQRegCopyThroughMem = Subtarget.hasFPARMv8() &&
474 return !(MFI.
hasCalls() ||
hasFP(MF) || NumBytes > RedZoneSize ||
535 unsigned Opc =
I->getOpcode();
536 bool IsDestroy = Opc ==
TII->getCallFrameDestroyOpcode();
537 uint64_t CalleePopAmount = IsDestroy ?
I->getOperand(1).getImm() : 0;
540 int64_t Amount =
I->getOperand(0).getImm();
548 if (CalleePopAmount == 0) {
559 assert(Amount > -0xffffff && Amount < 0xffffff &&
"call frame too large");
570 "non-reserved call frame without var sized objects?");
579 }
else if (CalleePopAmount != 0) {
582 assert(CalleePopAmount < 0xffffff &&
"call frame too large");
589void AArch64FrameLowering::emitCalleeSavedGPRLocations(
595 bool LocallyStreaming =
596 Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface();
607 for (
const auto &Info : CSI) {
608 unsigned FrameIdx =
Info.getFrameIdx();
612 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
613 int64_t DwarfReg =
TRI.getDwarfRegNum(
Info.getReg(),
true);
620 (!LocallyStreaming &&
621 DwarfReg ==
TRI.getDwarfRegNum(AArch64::VG,
true)))
632void AArch64FrameLowering::emitCalleeSavedSVELocations(
648 for (
const auto &Info : CSI) {
654 assert(!
Info.isSpilledToReg() &&
"Spilling to registers not implemented");
689 const MCInstrDesc &CFIDesc =
TII.get(TargetOpcode::CFI_INSTRUCTION);
695 nullptr,
TRI.getDwarfRegNum(AArch64::SP,
true), 0));
699 if (MFI.shouldSignReturnAddress(MF)) {
705 if (MFI.needsShadowCallStackPrologueEpilogue(MF))
707 TRI.getDwarfRegNum(AArch64::X18,
true));
710 const std::vector<CalleeSavedInfo> &CSI =
712 for (
const auto &
Info : CSI) {
713 unsigned Reg =
Info.getReg();
714 if (!
TRI.regNeedsCFI(Reg, Reg))
717 TRI.getDwarfRegNum(Reg,
true));
736 for (
const auto &
Info : CSI) {
741 unsigned Reg =
Info.getReg();
746 if (!
Info.isRestored())
750 nullptr,
TRI.getDwarfRegNum(
Info.getReg(),
true)));
757void AArch64FrameLowering::emitCalleeSavedGPRRestores(
762void AArch64FrameLowering::emitCalleeSavedSVERestores(
770 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
771 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE +
Size.getFixed();
774void AArch64FrameLowering::allocateStackSpace(
776 int64_t RealignmentPadding,
StackOffset AllocSize,
bool NeedsWinCFI,
777 bool *HasWinCFI,
bool EmitCFI,
StackOffset InitialOffset,
778 bool FollowupAllocs)
const {
791 const uint64_t AndMask = ~(MaxAlign - 1);
794 Register TargetReg = RealignmentPadding
800 EmitCFI, InitialOffset);
802 if (RealignmentPadding) {
823 if (AllocSize.
getScalable() == 0 && RealignmentPadding == 0) {
825 assert(ScratchReg != AArch64::NoRegister);
835 if (FollowupAllocs) {
852 if (
upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
853 Register ScratchReg = RealignmentPadding
856 assert(ScratchReg != AArch64::NoRegister);
860 EmitCFI, InitialOffset);
861 if (RealignmentPadding) {
869 if (FollowupAllocs ||
upperBound(AllocSize) + RealignmentPadding >
885 assert(TargetReg != AArch64::NoRegister);
889 EmitCFI, InitialOffset);
890 if (RealignmentPadding) {
910 if (RealignmentPadding)
923 case AArch64::W##n: \
924 case AArch64::X##n: \
949 case AArch64::B##n: \
950 case AArch64::H##n: \
951 case AArch64::S##n: \
952 case AArch64::D##n: \
953 case AArch64::Q##n: \
954 return HasSVE ? AArch64::Z##n : AArch64::Q##n
991void AArch64FrameLowering::emitZeroCallUsedRegs(
BitVector RegsToZero,
1007 bool HasSVE = STI.hasSVE();
1009 if (
TRI.isGeneralPurposeRegister(MF, Reg)) {
1012 GPRsToZero.set(XReg);
1016 FPRsToZero.set(XReg);
1032 {AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, AArch64::P4,
1033 AArch64::P5, AArch64::P6, AArch64::P7, AArch64::P8, AArch64::P9,
1034 AArch64::P10, AArch64::P11, AArch64::P12, AArch64::P13, AArch64::P14,
1036 if (RegsToZero[PReg])
1048 for (
unsigned i = 0; CSRegs[i]; ++i)
1049 LiveRegs.
addReg(CSRegs[i]);
1083 for (
unsigned Reg : AArch64::GPR64RegClass) {
1087 return AArch64::NoRegister;
1133 StackSizeInBytes >=
uint64_t(MFI.getStackProbeSize());
1139 F.needsUnwindTableEntry();
1142bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
1148 if (homogeneousPrologEpilog(MF))
1171 if (MFI.hasVarSizedObjects())
1174 if (
RegInfo->hasStackRealignment(MF))
1191bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1193 if (!shouldCombineCSRLocalStackBump(*
MBB.
getParent(), StackBumpBytes))
1203 while (LastI != Begin) {
1205 if (LastI->isTransient())
1210 switch (LastI->getOpcode()) {
1211 case AArch64::STGloop:
1212 case AArch64::STZGloop:
1214 case AArch64::STZGi:
1215 case AArch64::ST2Gi:
1216 case AArch64::STZ2Gi:
1229 unsigned Opc =
MBBI->getOpcode();
1233 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1234 int Imm =
MBBI->getOperand(ImmIdx).getImm();
1242 case AArch64::LDPDpost:
1245 case AArch64::STPDpre: {
1246 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1247 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1248 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFRegP_X))
1255 case AArch64::LDPXpost:
1258 case AArch64::STPXpre: {
1261 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1262 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFPLR_X))
1266 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveRegP_X))
1273 case AArch64::LDRDpost:
1276 case AArch64::STRDpre: {
1277 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1278 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveFReg_X))
1284 case AArch64::LDRXpost:
1287 case AArch64::STRXpre: {
1288 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1295 case AArch64::STPDi:
1296 case AArch64::LDPDi: {
1297 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1298 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1306 case AArch64::STPXi:
1307 case AArch64::LDPXi: {
1310 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
1322 case AArch64::STRXui:
1323 case AArch64::LDRXui: {
1324 int Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1331 case AArch64::STRDui:
1332 case AArch64::LDRDui: {
1333 unsigned Reg =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1340 case AArch64::STPQi:
1341 case AArch64::LDPQi: {
1342 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(0).getReg());
1343 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1344 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQP))
1351 case AArch64::LDPQpost:
1354 case AArch64::STPQpre: {
1355 unsigned Reg0 =
RegInfo->getSEHRegNum(
MBBI->getOperand(1).getReg());
1356 unsigned Reg1 =
RegInfo->getSEHRegNum(
MBBI->getOperand(2).getReg());
1357 MIB =
BuildMI(MF,
DL,
TII.get(AArch64::SEH_SaveAnyRegQPX))
1371 unsigned LocalStackSize) {
1373 unsigned ImmIdx =
MBBI->getNumOperands() - 1;
1374 switch (
MBBI->getOpcode()) {
1377 case AArch64::SEH_SaveFPLR:
1378 case AArch64::SEH_SaveRegP:
1379 case AArch64::SEH_SaveReg:
1380 case AArch64::SEH_SaveFRegP:
1381 case AArch64::SEH_SaveFReg:
1382 case AArch64::SEH_SaveAnyRegQP:
1383 case AArch64::SEH_SaveAnyRegQPX:
1384 ImmOpnd = &
MBBI->getOperand(ImmIdx);
1404 if (ST.isTargetDarwin())
1410 unsigned Opc =
MBBI->getOpcode();
1411 if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
1412 Opc == AArch64::UBFMXri)
1416 if (Opc == AArch64::ORRXrr)
1419 if (Opc == AArch64::BL) {
1420 auto Op1 =
MBBI->getOperand(0);
1421 return Op1.isSymbol() &&
1422 (
StringRef(Op1.getSymbolName()) ==
"__arm_get_current_vg");
1435 bool NeedsWinCFI,
bool *HasWinCFI,
bool EmitCFI,
1437 int CFAOffset = 0) {
1449 switch (
MBBI->getOpcode()) {
1452 case AArch64::STPXi:
1453 NewOpc = AArch64::STPXpre;
1455 case AArch64::STPDi:
1456 NewOpc = AArch64::STPDpre;
1458 case AArch64::STPQi:
1459 NewOpc = AArch64::STPQpre;
1461 case AArch64::STRXui:
1462 NewOpc = AArch64::STRXpre;
1464 case AArch64::STRDui:
1465 NewOpc = AArch64::STRDpre;
1467 case AArch64::STRQui:
1468 NewOpc = AArch64::STRQpre;
1470 case AArch64::LDPXi:
1471 NewOpc = AArch64::LDPXpost;
1473 case AArch64::LDPDi:
1474 NewOpc = AArch64::LDPDpost;
1476 case AArch64::LDPQi:
1477 NewOpc = AArch64::LDPQpost;
1479 case AArch64::LDRXui:
1480 NewOpc = AArch64::LDRXpost;
1482 case AArch64::LDRDui:
1483 NewOpc = AArch64::LDRDpost;
1485 case AArch64::LDRQui:
1486 NewOpc = AArch64::LDRQpost;
1491 auto SEH = std::next(
MBBI);
1493 SEH->eraseFromParent();
1497 int64_t MinOffset, MaxOffset;
1499 NewOpc, Scale, Width, MinOffset, MaxOffset);
1505 if (
MBBI->getOperand(
MBBI->getNumOperands() - 1).getImm() != 0 ||
1506 CSStackSizeInc < MinOffset * (int64_t)Scale.
getFixedValue() ||
1507 CSStackSizeInc > MaxOffset * (int64_t)Scale.
getFixedValue()) {
1514 false,
false,
nullptr, EmitCFI,
1517 return std::prev(
MBBI);
1524 unsigned OpndIdx = 0;
1525 for (
unsigned OpndEnd =
MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
1527 MIB.
add(
MBBI->getOperand(OpndIdx));
1529 assert(
MBBI->getOperand(OpndIdx).getImm() == 0 &&
1530 "Unexpected immediate offset in first/last callee-save save/restore "
1532 assert(
MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
1533 "Unexpected base register in callee-save save/restore instruction!");
1534 assert(CSStackSizeInc % Scale == 0);
1535 MIB.
addImm(CSStackSizeInc / (
int)Scale);
1566 unsigned Opc =
MI.getOpcode();
1569 case AArch64::STPXi:
1570 case AArch64::STRXui:
1571 case AArch64::STPDi:
1572 case AArch64::STRDui:
1573 case AArch64::LDPXi:
1574 case AArch64::LDRXui:
1575 case AArch64::LDPDi:
1576 case AArch64::LDRDui:
1579 case AArch64::STPQi:
1580 case AArch64::STRQui:
1581 case AArch64::LDPQi:
1582 case AArch64::LDRQui:
1589 unsigned OffsetIdx =
MI.getNumExplicitOperands() - 1;
1590 assert(
MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1591 "Unexpected base register in callee-save save/restore instruction!");
1595 assert(LocalStackSize % Scale == 0);
1596 OffsetOpnd.
setImm(OffsetOpnd.
getImm() + LocalStackSize / Scale);
1601 assert(
MBBI !=
MI.getParent()->end() &&
"Expecting a valid instruction");
1603 "Expecting a SEH instruction");
1614 switch (
I->getOpcode()) {
1617 case AArch64::PTRUE_C_B:
1618 case AArch64::LD1B_2Z_IMM:
1619 case AArch64::ST1B_2Z_IMM:
1620 case AArch64::STR_ZXI:
1621 case AArch64::STR_PXI:
1622 case AArch64::LDR_ZXI:
1623 case AArch64::LDR_PXI:
1634 bool NeedsUnwindInfo) {
1650 if (NeedsUnwindInfo) {
1653 static const char CFIInst[] = {
1654 dwarf::DW_CFA_val_expression,
1657 static_cast<char>(
unsigned(dwarf::DW_OP_breg18)),
1658 static_cast<char>(-8) & 0x7f,
1661 nullptr,
StringRef(CFIInst,
sizeof(CFIInst))));
1699 const int OffsetToFirstCalleeSaveFromFP =
1703 unsigned Reg =
TRI->getDwarfRegNum(
FramePtr,
true);
1705 nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1737 bool HasFP =
hasFP(MF);
1739 bool HasWinCFI =
false;
1748 while (NonFrameStart !=
End &&
1753 if (NonFrameStart !=
MBB.
end()) {
1769 if (NonFrameStart ==
MBB.
end())
1774 for (auto &Op : MI.operands())
1775 if (Op.isReg() && Op.isDef())
1776 assert(!LiveRegs.contains(Op.getReg()) &&
1777 "live register clobbered by inserted prologue instructions");
1794 if (MFnI.needsShadowCallStackPrologueEpilogue(MF))
1796 MFnI.needsDwarfUnwindInfo(MF));
1798 if (MFnI.shouldSignReturnAddress(MF)) {
1805 if (EmitCFI && MFnI.isMTETagged()) {
1883 assert(!HasFP &&
"unexpected function without stack frame but with FP");
1885 "unexpected function without stack frame but with SVE objects");
1894 ++NumRedZoneFunctions;
1926 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1927 bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1928 if (CombineSPBump) {
1929 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
1935 }
else if (HomPrologEpilog) {
1937 NumBytes -= PrologueSaveSize;
1938 }
else if (PrologueSaveSize != 0) {
1940 MBB,
MBBI,
DL,
TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
1942 NumBytes -= PrologueSaveSize;
1944 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
1958 NeedsWinCFI, &HasWinCFI);
1963 if (!IsFunclet && HasFP) {
1975 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1976 if (HaveInitialContext)
1978 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1994 if (HomPrologEpilog) {
2007 if (NeedsWinCFI && HasWinCFI) {
2012 NeedsWinCFI =
false;
2023 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2026 const bool NeedsRealignment =
2027 NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF);
2028 const int64_t RealignmentPadding =
2034 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
2042 if (NumBytes >= (1 << 28))
2044 "unwinding purposes");
2046 uint32_t LowNumWords = NumWords & 0xFFFF;
2053 if ((NumWords & 0xFFFF0000) != 0) {
2056 .
addImm((NumWords & 0xFFFF0000) >> 16)
2127 if (RealignmentPadding > 0) {
2128 if (RealignmentPadding >= 4096) {
2131 .
addImm(RealignmentPadding)
2141 .
addImm(RealignmentPadding)
2158 StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
2164 LLVM_DEBUG(
dbgs() <<
"SVECalleeSavedStackSize = " << CalleeSavedSize
2167 CalleeSavesBegin =
MBBI;
2171 CalleeSavesEnd =
MBBI;
2174 SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
2181 allocateStackSpace(
MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
false,
2182 nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
2184 CFAOffset += SVECalleeSavesSize;
2187 emitCalleeSavedSVELocations(
MBB, CalleeSavesEnd);
2192 "Cannot use redzone with stack realignment");
2197 allocateStackSpace(
MBB, CalleeSavesEnd, RealignmentPadding,
2199 NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
2211 if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
2223 if (NeedsWinCFI && HasWinCFI) {
2231 if (IsFunclet &&
F.hasPersonalityFn()) {
2241 if (EmitCFI && !EmitAsyncCFI) {
2248 *RegInfo, AArch64::SP, AArch64::SP, TotalSize,
2254 emitCalleeSavedGPRLocations(
MBB,
MBBI);
2255 emitCalleeSavedSVELocations(
MBB,
MBBI);
2260 switch (
MI.getOpcode()) {
2263 case AArch64::CATCHRET:
2264 case AArch64::CLEANUPRET:
2279 bool HasWinCFI =
false;
2280 bool IsFunclet =
false;
2283 DL =
MBBI->getDebugLoc();
2291 BuildMI(MBB, MBB.getFirstTerminator(), DL,
2292 TII->get(AArch64::PAUTH_EPILOGUE))
2293 .setMIFlag(MachineInstr::FrameDestroy);
2303 TII->get(AArch64::SEH_EpilogEnd))
2330 int64_t AfterCSRPopSize = ArgumentStackToRestore;
2338 if (homogeneousPrologEpilog(MF, &
MBB)) {
2342 auto HomogeneousEpilog = std::prev(LastPopI);
2343 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
2344 LastPopI = HomogeneousEpilog;
2354 assert(AfterCSRPopSize == 0);
2357 bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(
MBB, NumBytes);
2360 bool CombineAfterCSRBump =
false;
2361 if (!CombineSPBump && PrologueSaveSize != 0) {
2363 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
2365 Pop = std::prev(Pop);
2368 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
2372 if (OffsetOp.
getImm() == 0 && AfterCSRPopSize >= 0) {
2374 MBB, Pop,
DL,
TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
2381 AfterCSRPopSize += PrologueSaveSize;
2382 CombineAfterCSRBump =
true;
2391 while (LastPopI != Begin) {
2397 }
else if (CombineSPBump)
2399 NeedsWinCFI, &HasWinCFI);
2411 EpilogStartI = LastPopI;
2447 if (CombineSPBump) {
2448 assert(!SVEStackSize &&
"Cannot combine SP bump with SVE");
2451 if (EmitCFI &&
hasFP(MF)) {
2453 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2468 NumBytes -= PrologueSaveSize;
2469 assert(NumBytes >= 0 &&
"Negative stack allocation size!?");
2473 StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
2476 RestoreBegin = std::prev(RestoreEnd);
2477 while (RestoreBegin !=
MBB.
begin() &&
2486 DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
2487 DeallocateAfter = CalleeSavedSizeAsOffset;
2509 MBB, RestoreBegin,
DL, AArch64::SP, AArch64::SP,
2511 false,
false,
nullptr, EmitCFI && !
hasFP(MF),
2518 false,
nullptr, EmitCFI && !
hasFP(MF),
2524 false,
nullptr, EmitCFI && !
hasFP(MF),
2529 emitCalleeSavedSVERestores(
MBB, RestoreEnd);
2536 if (RedZone && AfterCSRPopSize == 0)
2543 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
2544 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
2545 if (NoCalleeSaveRestore)
2546 StackRestoreBytes += AfterCSRPopSize;
2549 MBB, LastPopI,
DL, AArch64::SP, AArch64::SP,
2556 if (NoCalleeSaveRestore || AfterCSRPopSize == 0) {
2569 MBB, LastPopI,
DL, AArch64::SP, AArch64::FP,
2572 }
else if (NumBytes)
2578 if (EmitCFI &&
hasFP(MF)) {
2580 unsigned Reg = RegInfo.getDwarfRegNum(AArch64::SP,
true);
2591 if (AfterCSRPopSize) {
2592 assert(AfterCSRPopSize > 0 &&
"attempting to reallocate arg stack that an "
2593 "interrupt may have clobbered");
2598 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
2640 if (MFI.isVariableSizedObjectIndex(FI)) {
2654 bool IsFixed = MFI.isFixedObjectIndex(FI);
2659 if (!IsFixed && !IsCSR)
2660 ScalableOffset = -SVEStackSize;
2672 int64_t ObjectOffset) {
2676 bool IsWin64 = Subtarget.isCallingConvWin64(
F.getCallingConv(),
F.isVarArg());
2677 unsigned FixedObject =
2686 int64_t ObjectOffset) {
2697 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
2704 bool ForSimm)
const {
2707 bool isFixed = MFI.isFixedObjectIndex(FI);
2714 const MachineFunction &MF, int64_t ObjectOffset,
bool isFixed,
bool isSVE,
2715 Register &FrameReg,
bool PreferFP,
bool ForSimm)
const {
2738 PreferFP &= !SVEStackSize;
2746 }
else if (isCSR && RegInfo->hasStackRealignment(MF)) {
2750 assert(
hasFP(MF) &&
"Re-aligned stack must have frame pointer");
2752 }
else if (
hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
2757 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
2758 PreferFP |=
Offset > -FPOffset && !SVEStackSize;
2760 if (MFI.hasVarSizedObjects()) {
2764 bool CanUseBP = RegInfo->hasBasePointer(MF);
2765 if (FPOffsetFits && CanUseBP)
2772 }
else if (FPOffset >= 0) {
2777 }
else if (MF.
hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2784 "Funclets should only be present on Win64");
2788 if (FPOffsetFits && PreferFP)
2795 ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2796 "In the presence of dynamic stack pointer realignment, "
2797 "non-argument/CSR objects cannot be accessed through the frame pointer");
2809 RegInfo->hasStackRealignment(MF))) {
2810 FrameReg = RegInfo->getFrameRegister(MF);
2814 FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2820 if (UseFP && !(isFixed || isCSR))
2821 ScalableOffset = -SVEStackSize;
2822 if (!UseFP && (isFixed || isCSR))
2823 ScalableOffset = SVEStackSize;
2826 FrameReg = RegInfo->getFrameRegister(MF);
2831 if (RegInfo->hasBasePointer(MF))
2832 FrameReg = RegInfo->getBaseRegister();
2834 assert(!MFI.hasVarSizedObjects() &&
2835 "Can't use SP when we have var sized objects.");
2836 FrameReg = AArch64::SP;
2862 Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
2867 bool NeedsWinCFI,
bool IsFirst,
2876 if (Reg2 == AArch64::FP)
2880 if (
TRI->getEncodingValue(Reg2) ==
TRI->getEncodingValue(Reg1) + 1)
2887 if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2888 (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2898 bool UsesWinAAPCS,
bool NeedsWinCFI,
2899 bool NeedsFrameRecord,
bool IsFirst,
2907 if (NeedsFrameRecord)
2908 return Reg2 == AArch64::LR;
2916 unsigned Reg1 = AArch64::NoRegister;
2917 unsigned Reg2 = AArch64::NoRegister;
2920 enum RegType { GPR, FPR64, FPR128, PPR, ZPR, VG }
Type;
2922 RegPairInfo() =
default;
2924 bool isPaired()
const {
return Reg2 != AArch64::NoRegister; }
2926 unsigned getScale()
const {
2941 bool isScalable()
const {
return Type == PPR ||
Type == ZPR; }
2947 for (
unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
2948 if (SavedRegs.
test(PReg)) {
2949 unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
2953 return AArch64::NoRegister;
2959 bool NeedsFrameRecord) {
2969 unsigned Count = CSI.
size();
2976 "Odd number of callee-saved regs to spill!");
2978 int StackFillDir = -1;
2980 unsigned FirstReg = 0;
2988 FirstReg = Count - 1;
2995 for (
unsigned i = FirstReg; i < Count; i += RegInc) {
2997 RPI.Reg1 = CSI[i].getReg();
2999 if (AArch64::GPR64RegClass.
contains(RPI.Reg1))
3000 RPI.Type = RegPairInfo::GPR;
3001 else if (AArch64::FPR64RegClass.
contains(RPI.Reg1))
3002 RPI.Type = RegPairInfo::FPR64;
3003 else if (AArch64::FPR128RegClass.
contains(RPI.Reg1))
3004 RPI.Type = RegPairInfo::FPR128;
3005 else if (AArch64::ZPRRegClass.
contains(RPI.Reg1))
3006 RPI.Type = RegPairInfo::ZPR;
3007 else if (AArch64::PPRRegClass.
contains(RPI.Reg1))
3008 RPI.Type = RegPairInfo::PPR;
3009 else if (RPI.Reg1 == AArch64::VG)
3010 RPI.Type = RegPairInfo::VG;
3023 Register NextReg = CSI[i + RegInc].getReg();
3024 bool IsFirst = i == FirstReg;
3026 case RegPairInfo::GPR:
3027 if (AArch64::GPR64RegClass.
contains(NextReg) &&
3029 NeedsWinCFI, NeedsFrameRecord, IsFirst,
3033 case RegPairInfo::FPR64:
3034 if (AArch64::FPR64RegClass.
contains(NextReg) &&
3039 case RegPairInfo::FPR128:
3040 if (AArch64::FPR128RegClass.
contains(NextReg))
3043 case RegPairInfo::PPR:
3045 case RegPairInfo::ZPR:
3047 if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1))
3050 case RegPairInfo::VG:
3061 assert((!RPI.isPaired() ||
3062 (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
3063 "Out of order callee saved regs!");
3065 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
3066 RPI.Reg1 == AArch64::LR) &&
3067 "FrameRecord must be allocated together with LR");
3070 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
3071 RPI.Reg2 == AArch64::LR) &&
3072 "FrameRecord must be allocated together with LR");
3080 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3081 RPI.Reg1 + 1 == RPI.Reg2))) &&
3082 "Callee-save registers not saved as adjacent register pair!");
3084 RPI.FrameIdx = CSI[i].getFrameIdx();
3087 RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
3088 int Scale = RPI.getScale();
3090 int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3091 assert(OffsetPre % Scale == 0);
3093 if (RPI.isScalable())
3094 ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3096 ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
3101 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3102 (IsWindows && RPI.Reg2 == AArch64::LR)))
3103 ByteOffset += StackFillDir * 8;
3107 if (NeedGapToAlignStack && !NeedsWinCFI && !RPI.isScalable() &&
3108 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired() &&
3109 ByteOffset % 16 != 0) {
3110 ByteOffset += 8 * StackFillDir;
3111 assert(MFI.getObjectAlign(RPI.FrameIdx) <=
Align(16));
3115 MFI.setObjectAlignment(RPI.FrameIdx,
Align(16));
3116 NeedGapToAlignStack =
false;
3119 int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
3120 assert(OffsetPost % Scale == 0);
3123 int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
3128 ((!IsWindows && RPI.Reg2 == AArch64::FP) ||
3129 (IsWindows && RPI.Reg2 == AArch64::LR)))
3131 RPI.Offset =
Offset / Scale;
3133 assert((!RPI.isPaired() ||
3134 (!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
3135 (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
3136 "Offset out of bounds for LDP/STP immediate");
3140 if (NeedsFrameRecord &&
3141 ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
3142 (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
3156 MFI.setObjectAlignment(CSI[0].getFrameIdx(),
Align(16));
3159 std::reverse(RegPairs.
begin(), RegPairs.
end());
3178 MRI.freezeReservedRegs();
3180 if (homogeneousPrologEpilog(MF)) {
3184 for (
auto &RPI : RegPairs) {
3189 if (!
MRI.isReserved(RPI.Reg1))
3191 if (RPI.isPaired() && !
MRI.isReserved(RPI.Reg2))
3196 bool PTrueCreated =
false;
3198 unsigned Reg1 = RPI.Reg1;
3199 unsigned Reg2 = RPI.Reg2;
3215 case RegPairInfo::GPR:
3216 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
3218 Alignment =
Align(8);
3220 case RegPairInfo::FPR64:
3221 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
3223 Alignment =
Align(8);
3225 case RegPairInfo::FPR128:
3226 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
3228 Alignment =
Align(16);
3230 case RegPairInfo::ZPR:
3231 StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI;
3233 Alignment =
Align(16);
3235 case RegPairInfo::PPR:
3236 StrOpc = AArch64::STR_PXI;
3238 Alignment =
Align(2);
3240 case RegPairInfo::VG:
3241 StrOpc = AArch64::STRXui;
3243 Alignment =
Align(8);
3247 unsigned X0Scratch = AArch64::NoRegister;
3248 if (Reg1 == AArch64::VG) {
3251 assert(Reg1 != AArch64::NoRegister);
3254 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface() &&
3279 return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
3280 AArch64::X0, LiveIn.PhysReg);
3284 if (X0Scratch != AArch64::NoRegister)
3291 const uint32_t *RegMask =
TRI->getCallPreservedMask(
3306 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3307 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3310 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
3311 "Windows unwdinding requires a consecutive (FP,LR) pair");
3315 unsigned FrameIdxReg1 = RPI.FrameIdx;
3316 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3317 if (NeedsWinCFI && RPI.isPaired()) {
3322 if (RPI.isPaired() && RPI.isScalable()) {
3327 assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
3328 "Expects SVE2.1 or SME2 target and a predicate register");
3329#ifdef EXPENSIVE_CHECKS
3330 auto IsPPR = [](
const RegPairInfo &c) {
3331 return c.Reg1 == RegPairInfo::PPR;
3333 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3334 auto IsZPR = [](
const RegPairInfo &c) {
3335 return c.Type == RegPairInfo::ZPR;
3337 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3338 assert(!(PPRBegin < ZPRBegin) &&
3339 "Expected callee save predicate to be handled first");
3341 if (!PTrueCreated) {
3342 PTrueCreated =
true;
3347 if (!
MRI.isReserved(Reg1))
3349 if (!
MRI.isReserved(Reg2))
3351 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0));
3367 if (!
MRI.isReserved(Reg1))
3369 if (RPI.isPaired()) {
3370 if (!
MRI.isReserved(Reg2))
3390 if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) {
3396 if (X0Scratch != AArch64::NoRegister)
3416 DL =
MBBI->getDebugLoc();
3419 if (homogeneousPrologEpilog(MF, &
MBB)) {
3422 for (
auto &RPI : RegPairs) {
3430 auto IsPPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::PPR; };
3431 auto PPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsPPR);
3432 auto PPREnd = std::find_if_not(PPRBegin, RegPairs.
end(), IsPPR);
3433 std::reverse(PPRBegin, PPREnd);
3434 auto IsZPR = [](
const RegPairInfo &c) {
return c.Type == RegPairInfo::ZPR; };
3435 auto ZPRBegin = std::find_if(RegPairs.
begin(), RegPairs.
end(), IsZPR);
3436 auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.
end(), IsZPR);
3437 std::reverse(ZPRBegin, ZPREnd);
3439 bool PTrueCreated =
false;
3440 for (
const RegPairInfo &RPI : RegPairs) {
3441 unsigned Reg1 = RPI.Reg1;
3442 unsigned Reg2 = RPI.Reg2;
3456 case RegPairInfo::GPR:
3457 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
3459 Alignment =
Align(8);
3461 case RegPairInfo::FPR64:
3462 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
3464 Alignment =
Align(8);
3466 case RegPairInfo::FPR128:
3467 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
3469 Alignment =
Align(16);
3471 case RegPairInfo::ZPR:
3472 LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI;
3474 Alignment =
Align(16);
3476 case RegPairInfo::PPR:
3477 LdrOpc = AArch64::LDR_PXI;
3479 Alignment =
Align(2);
3481 case RegPairInfo::VG:
3486 dbgs() <<
") -> fi#(" << RPI.FrameIdx;
3487 if (RPI.isPaired())
dbgs() <<
", " << RPI.FrameIdx + 1;
3493 unsigned FrameIdxReg1 = RPI.FrameIdx;
3494 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
3495 if (NeedsWinCFI && RPI.isPaired()) {
3501 if (RPI.isPaired() && RPI.isScalable()) {
3505 assert(((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && PnReg != 0) &&
3506 "Expects SVE2.1 or SME2 target and a predicate register");
3507#ifdef EXPENSIVE_CHECKS
3508 assert(!(PPRBegin < ZPRBegin) &&
3509 "Expected callee save predicate to be handled first");
3511 if (!PTrueCreated) {
3512 PTrueCreated =
true;
3517 MIB.
addReg( AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0),
3534 if (RPI.isPaired()) {
3559 dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->
getPseudoValue());
3561 return std::optional<int>(PSV->getFrameIndex());
3572 return std::nullopt;
3578 if (!
MI.mayLoadOrStore() ||
MI.getNumMemOperands() < 1)
3579 return std::nullopt;
3587void AArch64FrameLowering::determineStackHazardSlot(
3602 bool HasFPRCSRs =
any_of(SavedRegs.
set_bits(), [](
unsigned Reg) {
3603 return AArch64::FPR64RegClass.contains(Reg) ||
3604 AArch64::FPR128RegClass.contains(Reg) ||
3605 AArch64::ZPRRegClass.contains(Reg) ||
3606 AArch64::PPRRegClass.contains(Reg);
3608 bool HasFPRStackObjects =
false;
3611 for (
auto &
MBB : MF) {
3612 for (
auto &
MI :
MBB) {
3614 if (FI && *FI >= 0 && *FI < (
int)FrameObjects.size()) {
3617 FrameObjects[*FI] |= 2;
3619 FrameObjects[*FI] |= 1;
3623 HasFPRStackObjects =
3624 any_of(FrameObjects, [](
unsigned B) {
return (
B & 3) == 2; });
3627 if (HasFPRCSRs || HasFPRStackObjects) {
3648 unsigned UnspilledCSGPR = AArch64::NoRegister;
3649 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
3658 unsigned ExtraCSSpill = 0;
3659 bool HasUnpairedGPR64 =
false;
3660 bool HasPairZReg =
false;
3662 for (
unsigned i = 0; CSRegs[i]; ++i) {
3663 const unsigned Reg = CSRegs[i];
3666 if (Reg == BasePointerReg)
3669 bool RegUsed = SavedRegs.
test(Reg);
3670 unsigned PairedReg = AArch64::NoRegister;
3671 const bool RegIsGPR64 = AArch64::GPR64RegClass.contains(Reg);
3672 if (RegIsGPR64 || AArch64::FPR64RegClass.
contains(Reg) ||
3673 AArch64::FPR128RegClass.contains(Reg)) {
3676 if (HasUnpairedGPR64)
3677 PairedReg = CSRegs[i % 2 == 0 ? i - 1 : i + 1];
3679 PairedReg = CSRegs[i ^ 1];
3686 if (RegIsGPR64 && !AArch64::GPR64RegClass.
contains(PairedReg)) {
3687 PairedReg = AArch64::NoRegister;
3688 HasUnpairedGPR64 =
true;
3690 assert(PairedReg == AArch64::NoRegister ||
3691 AArch64::GPR64RegClass.
contains(Reg, PairedReg) ||
3692 AArch64::FPR64RegClass.
contains(Reg, PairedReg) ||
3693 AArch64::FPR128RegClass.
contains(Reg, PairedReg));
3696 if (AArch64::GPR64RegClass.
contains(Reg) &&
3698 UnspilledCSGPR = Reg;
3699 UnspilledCSGPRPaired = PairedReg;
3707 if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
3708 !SavedRegs.
test(PairedReg)) {
3709 SavedRegs.
set(PairedReg);
3710 if (AArch64::GPR64RegClass.
contains(PairedReg) &&
3712 ExtraCSSpill = PairedReg;
3715 HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
3716 SavedRegs.
test(CSRegs[i ^ 1]));
3719 if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
3724 if (PnReg != AArch64::NoRegister)
3730 SavedRegs.
set(AArch64::P8);
3735 "Predicate cannot be a reserved register");
3745 SavedRegs.
set(AArch64::X18);
3749 unsigned CSStackSize = 0;
3750 unsigned SVECSStackSize = 0;
3753 for (
unsigned Reg : SavedRegs.
set_bits()) {
3755 if (AArch64::PPRRegClass.
contains(Reg) ||
3756 AArch64::ZPRRegClass.
contains(Reg))
3769 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3777 determineStackHazardSlot(MF, SavedRegs);
3778 if (AFI->hasStackHazardSlotIndex())
3782 unsigned NumSavedRegs = SavedRegs.
count();
3788 SavedRegs.
set(AArch64::FP);
3789 SavedRegs.
set(AArch64::LR);
3793 dbgs() <<
"*** determineCalleeSaves\nSaved CSRs:";
3794 for (
unsigned Reg : SavedRegs.
set_bits())
3800 int64_t SVEStackSize =
3801 alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
3802 bool CanEliminateFrame = (SavedRegs.
count() == 0) && !SVEStackSize;
3811 int64_t CalleeStackUsed = 0;
3814 if (FixedOff > CalleeStackUsed)
3815 CalleeStackUsed = FixedOff;
3819 bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
3820 CalleeStackUsed) > EstimatedStackSizeLimit;
3822 AFI->setHasStackFrame(
true);
3831 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
3833 <<
" to get a scratch register.\n");
3834 SavedRegs.
set(UnspilledCSGPR);
3835 ExtraCSSpill = UnspilledCSGPR;
3840 if (producePairRegisters(MF)) {
3841 if (UnspilledCSGPRPaired == AArch64::NoRegister) {
3844 SavedRegs.
reset(UnspilledCSGPR);
3845 ExtraCSSpill = AArch64::NoRegister;
3848 SavedRegs.
set(UnspilledCSGPRPaired);
3857 unsigned Size =
TRI->getSpillSize(RC);
3858 Align Alignment =
TRI->getSpillAlign(RC);
3861 LLVM_DEBUG(
dbgs() <<
"No available CS registers, allocated fi#" << FI
3862 <<
" as the emergency spill slot.\n");
3867 CSStackSize += 8 * (SavedRegs.
count() - NumSavedRegs);
3871 if (
hasFP(MF) && AFI->hasSwiftAsyncContext())
3876 << EstimatedStackSize + AlignedCSStackSize <<
" bytes.\n");
3879 AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
3880 "Should not invalidate callee saved info");
3884 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
3885 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
3886 AFI->setSVECalleeSavedStackSize(
alignTo(SVECSStackSize, 16));
3891 std::vector<CalleeSavedInfo> &CSI,
unsigned &MinCSFrameIndex,
3892 unsigned &MaxCSFrameIndex)
const {
3900 std::reverse(CSI.begin(), CSI.end());
3914 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3915 MinCSFrameIndex = FrameIdx;
3916 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3917 MaxCSFrameIndex = FrameIdx;
3922 std::vector<CalleeSavedInfo> VGSaves;
3926 VGInfo.setRestored(
false);
3927 VGSaves.push_back(VGInfo);
3931 if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
3932 VGSaves.push_back(VGInfo);
3934 bool InsertBeforeLR =
false;
3936 for (
unsigned I = 0;
I < CSI.size();
I++)
3937 if (CSI[
I].
getReg() == AArch64::LR) {
3938 InsertBeforeLR =
true;
3939 CSI.insert(CSI.begin() +
I, VGSaves.begin(), VGSaves.end());
3943 if (!InsertBeforeLR)
3944 CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
3948 int HazardSlotIndex = std::numeric_limits<int>::max();
3949 for (
auto &CS : CSI) {
3957 assert(HazardSlotIndex == std::numeric_limits<int>::max() &&
3958 "Unexpected register order for hazard slot");
3960 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
3963 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
3964 MinCSFrameIndex = HazardSlotIndex;
3965 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
3966 MaxCSFrameIndex = HazardSlotIndex;
3972 CS.setFrameIdx(FrameIdx);
3974 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3975 MinCSFrameIndex = FrameIdx;
3976 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3977 MaxCSFrameIndex = FrameIdx;
3981 Reg == AArch64::FP) {
3984 if ((
unsigned)FrameIdx < MinCSFrameIndex)
3985 MinCSFrameIndex = FrameIdx;
3986 if ((
unsigned)FrameIdx > MaxCSFrameIndex)
3987 MaxCSFrameIndex = FrameIdx;
3994 HazardSlotIndex == std::numeric_limits<int>::max()) {
3996 LLVM_DEBUG(
dbgs() <<
"Created CSR Hazard at slot " << HazardSlotIndex
3999 if ((
unsigned)HazardSlotIndex < MinCSFrameIndex)
4000 MinCSFrameIndex = HazardSlotIndex;
4001 if ((
unsigned)HazardSlotIndex > MaxCSFrameIndex)
4002 MaxCSFrameIndex = HazardSlotIndex;
4026 int &Min,
int &Max) {
4027 Min = std::numeric_limits<int>::max();
4028 Max = std::numeric_limits<int>::min();
4034 for (
auto &CS : CSI) {
4035 if (AArch64::ZPRRegClass.
contains(CS.getReg()) ||
4036 AArch64::PPRRegClass.contains(CS.getReg())) {
4037 assert((Max == std::numeric_limits<int>::min() ||
4038 Max + 1 == CS.getFrameIdx()) &&
4039 "SVE CalleeSaves are not consecutive");
4041 Min = std::min(Min, CS.getFrameIdx());
4042 Max = std::max(Max, CS.getFrameIdx());
4045 return Min != std::numeric_limits<int>::max();
4054 int &MinCSFrameIndex,
4055 int &MaxCSFrameIndex,
4056 bool AssignOffsets) {
4061 "SVE vectors should never be passed on the stack by value, only by "
4065 auto Assign = [&MFI](
int FI, int64_t
Offset) {
4075 for (
int I = MinCSFrameIndex;
I <= MaxCSFrameIndex; ++
I) {
4091 int StackProtectorFI = -1;
4095 ObjectsToAllocate.
push_back(StackProtectorFI);
4101 if (
I == StackProtectorFI)
4103 if (MaxCSFrameIndex >=
I &&
I >= MinCSFrameIndex)
4112 for (
unsigned FI : ObjectsToAllocate) {
4117 if (Alignment >
Align(16))
4119 "Alignment of scalable vectors > 16 bytes is not yet supported");
4129int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
4131 int MinCSFrameIndex, MaxCSFrameIndex;
4135int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
4146 "Upwards growing stack unsupported");
4148 int MinCSFrameIndex, MaxCSFrameIndex;
4149 int64_t SVEStackSize =
4150 assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
4170 int64_t FixedObject =
4183 assert(DstReg &&
"There must be a free register after frame setup");
4192struct TagStoreInstr {
4215 std::optional<int64_t> FrameRegUpdate;
4217 unsigned FrameRegUpdateFlags;
4228 :
MBB(
MBB), ZeroData(ZeroData) {
4234 void addInstruction(TagStoreInstr
I) {
4236 TagStores.
back().Offset + TagStores.
back().Size ==
I.Offset) &&
4237 "Non-adjacent tag store instructions.");
4252 const int64_t kMinOffset = -256 * 16;
4253 const int64_t kMaxOffset = 255 * 16;
4256 int64_t BaseRegOffsetBytes = FrameRegOffset.
getFixed();
4257 if (BaseRegOffsetBytes < kMinOffset ||
4258 BaseRegOffsetBytes + (
Size -
Size % 32) > kMaxOffset ||
4262 BaseRegOffsetBytes % 16 != 0) {
4263 Register ScratchReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4266 BaseReg = ScratchReg;
4267 BaseRegOffsetBytes = 0;
4272 int64_t InstrSize = (
Size > 16) ? 32 : 16;
4275 ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
4276 : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
4277 assert(BaseRegOffsetBytes % 16 == 0);
4281 .
addImm(BaseRegOffsetBytes / 16)
4285 if (BaseRegOffsetBytes == 0)
4287 BaseRegOffsetBytes += InstrSize;
4301 :
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4302 Register SizeReg =
MRI->createVirtualRegister(&AArch64::GPR64RegClass);
4306 int64_t LoopSize =
Size;
4309 if (FrameRegUpdate && *FrameRegUpdate)
4310 LoopSize -= LoopSize % 32;
4312 TII->get(ZeroData ? AArch64::STZGloop_wback
4313 : AArch64::STGloop_wback))
4320 LoopI->
setFlags(FrameRegUpdateFlags);
4322 int64_t ExtraBaseRegUpdate =
4323 FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.
getFixed() -
Size) : 0;
4324 if (LoopSize <
Size) {
4329 TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
4333 .
addImm(1 + ExtraBaseRegUpdate / 16)
4336 }
else if (ExtraBaseRegUpdate) {
4340 TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
4343 .
addImm(std::abs(ExtraBaseRegUpdate))
4353 int64_t
Size, int64_t *TotalOffset) {
4355 if ((
MI.getOpcode() == AArch64::ADDXri ||
4356 MI.getOpcode() == AArch64::SUBXri) &&
4357 MI.getOperand(0).getReg() == Reg &&
MI.getOperand(1).getReg() == Reg) {
4359 int64_t
Offset =
MI.getOperand(2).getImm() << Shift;
4360 if (
MI.getOpcode() == AArch64::SUBXri)
4362 int64_t AbsPostOffset = std::abs(
Offset -
Size);
4363 const int64_t kMaxOffset =
4365 if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
4376 for (
auto &TS : TSE) {
4380 if (
MI->memoperands_empty()) {
4384 MemRefs.
append(
MI->memoperands_begin(),
MI->memoperands_end());
4390 bool TryMergeSPUpdate) {
4391 if (TagStores.
empty())
4393 TagStoreInstr &FirstTagStore = TagStores[0];
4394 TagStoreInstr &LastTagStore = TagStores[TagStores.
size() - 1];
4395 Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
4396 DL = TagStores[0].MI->getDebugLoc();
4400 *MF, FirstTagStore.Offset,
false ,
false , Reg,
4403 FrameRegUpdate = std::nullopt;
4405 mergeMemRefs(TagStores, CombinedMemRefs);
4408 dbgs() <<
"Replacing adjacent STG instructions:\n";
4409 for (
const auto &Instr : TagStores) {
4418 if (TagStores.
size() < 2)
4420 emitUnrolled(InsertI);
4423 int64_t TotalOffset = 0;
4424 if (TryMergeSPUpdate) {
4430 if (InsertI !=
MBB->
end() &&
4431 canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.
getFixed() +
Size,
4433 UpdateInstr = &*InsertI++;
4439 if (!UpdateInstr && TagStores.
size() < 2)
4443 FrameRegUpdate = TotalOffset;
4444 FrameRegUpdateFlags = UpdateInstr->
getFlags();
4451 for (
auto &TS : TagStores)
4452 TS.MI->eraseFromParent();
4456 int64_t &
Size,
bool &ZeroData) {
4460 unsigned Opcode =
MI.getOpcode();
4461 ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
4462 Opcode == AArch64::STZ2Gi);
4464 if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
4465 if (!
MI.getOperand(0).isDead() || !
MI.getOperand(1).isDead())
4467 if (!
MI.getOperand(2).isImm() || !
MI.getOperand(3).isFI())
4470 Size =
MI.getOperand(2).getImm();
4474 if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
4476 else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
4481 if (
MI.getOperand(0).getReg() != AArch64::SP || !
MI.getOperand(1).isFI())
4485 16 *
MI.getOperand(2).getImm();
4505 if (!isMergeableStackTaggingInstruction(
MI,
Offset,
Size, FirstZeroData))
4511 constexpr int kScanLimit = 10;
4514 NextI != E && Count < kScanLimit; ++NextI) {
4523 if (isMergeableStackTaggingInstruction(
MI,
Offset,
Size, ZeroData)) {
4524 if (ZeroData != FirstZeroData)
4532 if (!
MI.isTransient())
4541 if (
MI.mayLoadOrStore() ||
MI.hasUnmodeledSideEffects())
4557 LiveRegs.addLiveOuts(*
MBB);
4562 LiveRegs.stepBackward(*
I);
4565 if (LiveRegs.contains(AArch64::NZCV))
4569 [](
const TagStoreInstr &
Left,
const TagStoreInstr &
Right) {
4574 int64_t CurOffset = Instrs[0].Offset;
4575 for (
auto &Instr : Instrs) {
4576 if (CurOffset >
Instr.Offset)
4583 TagStoreEdit TSE(
MBB, FirstZeroData);
4584 std::optional<int64_t> EndOffset;
4585 for (
auto &Instr : Instrs) {
4586 if (EndOffset && *EndOffset !=
Instr.Offset) {
4588 TSE.emitCode(InsertI, TFI,
false);
4592 TSE.addInstruction(Instr);