LLVM 23.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 if (AFL.hasSVECalleeSavesAboveFrameRecord(MF)) {
100 if (AFI->hasStackHazardSlotIndex())
101 reportFatalUsageError("SME hazard padding is not supported on Windows");
102 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
103 } else if (AFI->hasSplitSVEObjects()) {
104 SVELayout = SVEStackLayout::Split;
105 }
106}
107
110 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
111 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
112 unsigned NewOpc;
113
114 // If the function contains streaming mode changes, we expect instructions
115 // to calculate the value of VG before spilling. Move past these instructions
116 // if necessary.
117 if (AFL.requiresSaveVG(MF)) {
118 auto &TLI = *Subtarget.getTargetLowering();
119 while (isVGInstruction(MBBI, TLI))
120 ++MBBI;
121 }
122
123 switch (MBBI->getOpcode()) {
124 default:
125 llvm_unreachable("Unexpected callee-save save/restore opcode!");
126 case AArch64::STPXi:
127 NewOpc = AArch64::STPXpre;
128 break;
129 case AArch64::STPDi:
130 NewOpc = AArch64::STPDpre;
131 break;
132 case AArch64::STPQi:
133 NewOpc = AArch64::STPQpre;
134 break;
135 case AArch64::STRXui:
136 NewOpc = AArch64::STRXpre;
137 break;
138 case AArch64::STRDui:
139 NewOpc = AArch64::STRDpre;
140 break;
141 case AArch64::STRQui:
142 NewOpc = AArch64::STRQpre;
143 break;
144 case AArch64::LDPXi:
145 NewOpc = AArch64::LDPXpost;
146 break;
147 case AArch64::LDPDi:
148 NewOpc = AArch64::LDPDpost;
149 break;
150 case AArch64::LDPQi:
151 NewOpc = AArch64::LDPQpost;
152 break;
153 case AArch64::LDRXui:
154 NewOpc = AArch64::LDRXpost;
155 break;
156 case AArch64::LDRDui:
157 NewOpc = AArch64::LDRDpost;
158 break;
159 case AArch64::LDRQui:
160 NewOpc = AArch64::LDRQpost;
161 break;
162 }
163 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
164 int64_t MinOffset, MaxOffset;
165 bool Success = TII->getMemOpInfo(NewOpc, Scale, Width, MinOffset, MaxOffset);
166 (void)Success;
167 assert(Success && "unknown load/store opcode");
168
169 // If the first store isn't right where we want SP then we can't fold the
170 // update in so create a normal arithmetic instruction instead.
171 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
172 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
173 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
174 // If we are destroying the frame, make sure we add the increment after the
175 // last frame operation.
176 if (FrameFlag == MachineInstr::FrameDestroy) {
177 ++MBBI;
178 // Also skip the SEH instruction, if needed
179 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
180 ++MBBI;
181 }
182 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
183 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
184 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
185 StackOffset::getFixed(CFAOffset));
186
187 return std::prev(MBBI);
188 }
189
190 // Get rid of the SEH code associated with the old instruction.
191 if (NeedsWinCFI) {
192 auto SEH = std::next(MBBI);
193 if (AArch64InstrInfo::isSEHInstruction(*SEH))
194 SEH->eraseFromParent();
195 }
196
197 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
198 MIB.addReg(AArch64::SP, RegState::Define);
199
200 // Copy all operands other than the immediate offset.
201 unsigned OpndIdx = 0;
202 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
203 ++OpndIdx)
204 MIB.add(MBBI->getOperand(OpndIdx));
205
206 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
207 "Unexpected immediate offset in first/last callee-save save/restore "
208 "instruction!");
209 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
210 "Unexpected base register in callee-save save/restore instruction!");
211 assert(CSStackSizeInc % Scale == 0);
212 MIB.addImm(CSStackSizeInc / (int)Scale);
213
214 MIB.setMIFlags(MBBI->getFlags());
215 MIB.setMemRefs(MBBI->memoperands());
216
217 // Generate a new SEH code that corresponds to the new instruction.
218 if (NeedsWinCFI) {
219 HasWinCFI = true;
220 AFL.insertSEH(*MIB, *TII, FrameFlag);
221 }
222
223 if (EmitCFI)
224 CFIInstBuilder(MBB, MBBI, FrameFlag)
225 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
226
227 return std::prev(MBB.erase(MBBI));
228}
229
230// Fix up the SEH opcode associated with the save/restore instruction.
232 unsigned LocalStackSize) {
233 MachineOperand *ImmOpnd = nullptr;
234 unsigned ImmIdx = MBBI->getNumOperands() - 1;
235 switch (MBBI->getOpcode()) {
236 default:
237 llvm_unreachable("Fix the offset in the SEH instruction");
238 case AArch64::SEH_SaveFPLR:
239 case AArch64::SEH_SaveRegP:
240 case AArch64::SEH_SaveReg:
241 case AArch64::SEH_SaveFRegP:
242 case AArch64::SEH_SaveFReg:
243 case AArch64::SEH_SaveAnyRegI:
244 case AArch64::SEH_SaveAnyRegIP:
245 case AArch64::SEH_SaveAnyRegQP:
246 case AArch64::SEH_SaveAnyRegQPX:
247 ImmOpnd = &MBBI->getOperand(ImmIdx);
248 break;
249 }
250 if (ImmOpnd)
251 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
252}
253
255 MachineInstr &MI, uint64_t LocalStackSize) const {
256 if (AArch64InstrInfo::isSEHInstruction(MI))
257 return;
258
259 unsigned Opc = MI.getOpcode();
260 unsigned Scale;
261 switch (Opc) {
262 case AArch64::STPXi:
263 case AArch64::STRXui:
264 case AArch64::STPDi:
265 case AArch64::STRDui:
266 case AArch64::LDPXi:
267 case AArch64::LDRXui:
268 case AArch64::LDPDi:
269 case AArch64::LDRDui:
270 Scale = 8;
271 break;
272 case AArch64::STPQi:
273 case AArch64::STRQui:
274 case AArch64::LDPQi:
275 case AArch64::LDRQui:
276 Scale = 16;
277 break;
278 default:
279 llvm_unreachable("Unexpected callee-save save/restore opcode!");
280 }
281
282 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
283 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
284 "Unexpected base register in callee-save save/restore instruction!");
285 // Last operand is immediate offset that needs fixing.
286 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
287 // All generated opcodes have scaled offsets.
288 assert(LocalStackSize % Scale == 0);
289 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
290
291 if (NeedsWinCFI) {
292 HasWinCFI = true;
293 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
294 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
295 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
296 "Expecting a SEH instruction");
297 fixupSEHOpcode(MBBI, LocalStackSize);
298 }
299}
300
302 uint64_t StackBumpBytes) const {
303 if (AFL.homogeneousPrologEpilog(MF))
304 return false;
305
306 if (AFI->getLocalStackSize() == 0)
307 return false;
308
309 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
310 // (to force a stp with predecrement) to match the packed unwind format,
311 // provided that there actually are any callee saved registers to merge the
312 // decrement with.
313 // This is potentially marginally slower, but allows using the packed
314 // unwind format for functions that both have a local area and callee saved
315 // registers. Using the packed unwind format notably reduces the size of
316 // the unwind info.
317 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
318 MF.getFunction().hasOptSize())
319 return false;
320
321 // 512 is the maximum immediate for stp/ldp that will be used for
322 // callee-save save/restores
323 if (StackBumpBytes >= 512 ||
324 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
325 return false;
326
327 if (MFI.hasVarSizedObjects())
328 return false;
329
330 if (RegInfo.hasStackRealignment(MF))
331 return false;
332
333 // This isn't strictly necessary, but it simplifies things a bit since the
334 // current RedZone handling code assumes the SP is adjusted by the
335 // callee-save save/restore code.
336 if (AFL.canUseRedZone(MF))
337 return false;
338
339 // When there is an SVE area on the stack, always allocate the
340 // callee-saves and spills/locals separately.
341 if (AFI->hasSVEStackSize())
342 return false;
343
344 return true;
345}
346
348 StackOffset PPRCalleeSavesSize =
349 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
350 StackOffset ZPRCalleeSavesSize =
351 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
352 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
353 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
355 return {{PPRCalleeSavesSize, PPRLocalsSize},
356 {ZPRCalleeSavesSize, ZPRLocalsSize}};
357 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
358 return {{PPRCalleeSavesSize, StackOffset{}},
359 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
360}
361
363 SVEFrameSizes const &SVE) {
364 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
365 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
366 StackOffset AfterPPRs = {};
368 BeforePPRs = SVE.PPR.CalleeSavesSize;
369 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
370 if (SVE.ZPR.CalleeSavesSize)
371 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
372 else
373 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
374 }
375 return {BeforePPRs, AfterPPRs, AfterZPRs};
376}
377
383
386 StackOffset PPRCalleeSavesSize,
387 StackOffset ZPRCalleeSavesSize,
388 bool IsEpilogue) {
391 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
392 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
393 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
394 if (PPRCalleeSavesSize) {
395 PPRsI = AdjustI(PPRsI);
396 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
397 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
398 IsEpilogue ? (--PPRsI) : (++PPRsI);
399 }
400 MachineBasicBlock::iterator ZPRsI = PPRsI;
401 if (ZPRCalleeSavesSize) {
402 ZPRsI = AdjustI(ZPRsI);
403 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
404 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
405 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
406 }
407 if (IsEpilogue)
408 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
409 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
410}
411
416 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
417 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
418 IsFunclet = MBB.isEHFuncletEntry();
419 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
420
421#ifndef NDEBUG
422 collectBlockLiveins();
423#endif
424}
425
426#ifndef NDEBUG
427/// Collect live registers from the end of \p MI's parent up to (including) \p
428/// MI in \p LiveRegs.
431
432 MachineBasicBlock &MBB = *MI.getParent();
433 LiveRegs.addLiveOuts(MBB);
434 for (const MachineInstr &MI :
435 reverse(make_range(MI.getIterator(), MBB.instr_end())))
436 LiveRegs.stepBackward(MI);
437}
438
439void AArch64PrologueEmitter::collectBlockLiveins() {
440 // Collect live register from the end of MBB up to the start of the existing
441 // frame setup instructions.
442 PrologueEndI = MBB.begin();
443 while (PrologueEndI != MBB.end() &&
444 PrologueEndI->getFlag(MachineInstr::FrameSetup))
445 ++PrologueEndI;
446
447 if (PrologueEndI != MBB.end()) {
448 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
449 // Ignore registers used for stack management for now.
450 LiveRegs.removeReg(AArch64::SP);
451 LiveRegs.removeReg(AArch64::X19);
452 LiveRegs.removeReg(AArch64::FP);
453 LiveRegs.removeReg(AArch64::LR);
454
455 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
456 // This is necessary to spill VG if required where SVE is unavailable, but
457 // X0 is preserved around this call.
458 if (requiresGetVGCall())
459 LiveRegs.removeReg(AArch64::X0);
460 }
461}
462
463void AArch64PrologueEmitter::verifyPrologueClobbers() const {
464 if (PrologueEndI == MBB.end())
465 return;
466 // Check if any of the newly instructions clobber any of the live registers.
467 for (MachineInstr &MI :
468 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
469 for (auto &Op : MI.operands())
470 if (Op.isReg() && Op.isDef())
471 assert(!LiveRegs.contains(Op.getReg()) &&
472 "live register clobbered by inserted prologue instructions");
473 }
474}
475#endif
476
477void AArch64PrologueEmitter::determineLocalsStackSize(
478 uint64_t StackSize, uint64_t PrologueSaveSize) {
479 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
480 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
481}
482
483// Return the maximum possible number of bytes for `Size` due to the
484// architectural limit on the size of a SVE register.
485static int64_t upperBound(StackOffset Size) {
486 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
487 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
488}
489
490void AArch64PrologueEmitter::allocateStackSpace(
491 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
492 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
493 bool FollowupAllocs) {
494
495 if (!AllocSize)
496 return;
497
498 DebugLoc DL;
499 const int64_t MaxAlign = MFI.getMaxAlign().value();
500 const uint64_t AndMask = ~(MaxAlign - 1);
501
503 Register TargetReg = RealignmentPadding
504 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
505 : AArch64::SP;
506 // SUB Xd/SP, SP, AllocSize
507 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
509 EmitCFI, InitialOffset);
510
511 if (RealignmentPadding) {
512 // AND SP, X9, 0b11111...0000
513 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
514 .addReg(TargetReg, RegState::Kill)
517 AFI->setStackRealigned(true);
518
519 // No need for SEH instructions here; if we're realigning the stack,
520 // we've set a frame pointer and already finished the SEH prologue.
522 }
523 return;
524 }
525
526 //
527 // Stack probing allocation.
528 //
529
530 // Fixed length allocation. If we don't need to re-align the stack and don't
531 // have SVE objects, we can use a more efficient sequence for stack probing.
532 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
533 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
534 assert(ScratchReg != AArch64::NoRegister);
535 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
536 .addDef(ScratchReg)
537 .addImm(AllocSize.getFixed())
538 .addImm(InitialOffset.getFixed())
539 .addImm(InitialOffset.getScalable());
540 // The fixed allocation may leave unprobed bytes at the top of the
541 // stack. If we have subsequent allocation (e.g. if we have variable-sized
542 // objects), we need to issue an extra probe, so these allocations start in
543 // a known state.
544 if (FollowupAllocs) {
545 // LDR XZR, [SP]
546 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
547 .addDef(AArch64::XZR)
548 .addReg(AArch64::SP)
549 .addImm(0)
550 .addMemOperand(MF.getMachineMemOperand(
553 Align(8)))
555 }
556
557 return;
558 }
559
560 // Variable length allocation.
561
562 // If the (unknown) allocation size cannot exceed the probe size, decrement
563 // the stack pointer right away.
564 int64_t ProbeSize = AFI->getStackProbeSize();
565 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
566 Register ScratchReg = RealignmentPadding
567 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
568 : AArch64::SP;
569 assert(ScratchReg != AArch64::NoRegister);
570 // SUB Xd, SP, AllocSize
571 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
573 EmitCFI, InitialOffset);
574 if (RealignmentPadding) {
575 // AND SP, Xn, 0b11111...0000
576 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
577 .addReg(ScratchReg, RegState::Kill)
580 AFI->setStackRealigned(true);
581 }
582 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
584 // LDR XZR, [SP]
585 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
586 .addDef(AArch64::XZR)
587 .addReg(AArch64::SP)
588 .addImm(0)
589 .addMemOperand(MF.getMachineMemOperand(
592 Align(8)))
594 }
595 return;
596 }
597
598 // Emit a variable-length allocation probing loop.
599 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
600 // each of them guaranteed to adjust the stack by less than the probe size.
601 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
602 assert(TargetReg != AArch64::NoRegister);
603 // SUB Xd, SP, AllocSize
604 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
606 EmitCFI, InitialOffset);
607 if (RealignmentPadding) {
608 // AND Xn, Xn, 0b11111...0000
609 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
610 .addReg(TargetReg, RegState::Kill)
613 }
614
615 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
616 .addReg(TargetReg);
617 if (EmitCFI) {
618 // Set the CFA register back to SP.
619 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
620 .buildDefCFARegister(AArch64::SP);
621 }
622 if (RealignmentPadding)
623 AFI->setStackRealigned(true);
624}
625
627 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
628 const MachineBasicBlock::iterator EndI = MBB.end();
629
630 // At this point, we're going to decide whether or not the function uses a
631 // redzone. In most cases, the function doesn't have a redzone so let's
632 // assume that's false and set it to true in the case that there's a redzone.
633 AFI->setHasRedZone(false);
634
635 // Debug location must be unknown since the first debug location is used
636 // to determine the end of the prologue.
637 DebugLoc DL;
638
639 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
640 // have a tail-call where the caller only needs to adjust the stack pointer in
641 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
642 // See `seh-minimal-prologue-epilogue.ll` test cases.
643 if (AFI->getArgumentStackToRestore())
644 HasWinCFI = true;
645
646 if (AFI->shouldSignReturnAddress(MF)) {
647 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
648 // are inserted by emitPacRetPlusLeafHardening().
649 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
650 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
652 }
653 // AArch64PointerAuth pass will insert SEH_PACSignLR
655 }
656
657 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
658 emitShadowCallStackPrologue(PrologueBeginI, DL);
660 }
661
662 if (EmitCFI && AFI->isMTETagged())
663 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
665
666 // We signal the presence of a Swift extended frame to external tools by
667 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
668 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
669 // bits so that is still true.
670 if (HasFP && AFI->hasSwiftAsyncContext())
671 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
672
673 // All calls are tail calls in GHC calling conv, and functions have no
674 // prologue/epilogue.
675 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
676 return;
677
678 // Set tagged base pointer to the requested stack slot. Ideally it should
679 // match SP value after prologue.
680 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
681 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
682 else
683 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
684
685 // getStackSize() includes all the locals in its size calculation. We don't
686 // include these locals when computing the stack size of a funclet, as they
687 // are allocated in the parent's stack frame and accessed via the frame
688 // pointer from the funclet. We only save the callee saved registers in the
689 // funclet, which are really the callee saved registers of the parent
690 // function, including the funclet.
691 int64_t NumBytes =
692 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
693 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
694 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
695
696 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
697 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
698
699 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
700 // All of the remaining stack allocations are for locals.
701 determineLocalsStackSize(NumBytes, PrologueSaveSize);
702
703 auto [PPR, ZPR] = getSVEStackFrameSizes();
704 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
705
706 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
708 assert(!SVEAllocs.AfterPPRs &&
709 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
710 // If we're doing SVE saves first, we need to immediately allocate space
711 // for fixed objects, then space for the SVE callee saves.
712 //
713 // Windows unwind requires that the scalable size is a multiple of 16;
714 // that's handled when the callee-saved size is computed.
715 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
716 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
717 /*FollowupAllocs=*/true);
718 NumBytes -= FixedObject;
719
720 // Now allocate space for the GPR callee saves.
721 MachineBasicBlock::iterator MBBI = PrologueBeginI;
722 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
723 ++MBBI;
725 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
726 NumBytes -= AFI->getCalleeSavedStackSize();
727 } else if (CombineSPBump) {
728 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
729 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
730 StackOffset::getFixed(-NumBytes), TII,
732 EmitAsyncCFI);
733 NumBytes = 0;
734 } else if (HomPrologEpilog) {
735 // Stack has been already adjusted.
736 NumBytes -= PrologueSaveSize;
737 } else if (PrologueSaveSize != 0) {
739 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
740 NumBytes -= PrologueSaveSize;
741 }
742 assert(NumBytes >= 0 && "Negative stack allocation size!?");
743
744 // Move past the saves of the callee-saved registers, fixing up the offsets
745 // and pre-inc if we decided to combine the callee-save and local stack
746 // pointer bump above.
747 auto &TLI = *Subtarget.getTargetLowering();
748
749 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
750 while (AfterGPRSavesI != EndI &&
751 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
752 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
753 if (CombineSPBump &&
754 // Only fix-up frame-setup load/store instructions.
755 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
756 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
757 AFI->getLocalStackSize());
758 ++AfterGPRSavesI;
759 }
760
761 // For funclets the FP belongs to the containing function. Only set up FP if
762 // we actually need to.
763 if (!IsFunclet && HasFP)
764 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
765
766 // Now emit the moves for whatever callee saved regs we have (including FP,
767 // LR if those are saved). Frame instructions for SVE register are emitted
768 // later, after the instruction which actually save SVE regs.
769 if (EmitAsyncCFI)
770 emitCalleeSavedGPRLocations(AfterGPRSavesI);
771
772 // Alignment is required for the parent frame, not the funclet
773 const bool NeedsRealignment =
774 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
775 const int64_t RealignmentPadding =
776 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
777 ? MFI.getMaxAlign().value() - 16
778 : 0;
779
780 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
781 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
782
783 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
784 SVEAllocs.AfterZPRs += NonSVELocalsSize;
785
786 StackOffset CFAOffset =
787 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
788 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
789 // Allocate space for the callee saves and PPR locals (if any).
791 auto [PPRRange, ZPRRange] =
792 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
793 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
794 AfterSVESavesI = ZPRRange.End;
795 if (EmitAsyncCFI)
796 emitCalleeSavedSVELocations(AfterSVESavesI);
797
798 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
799 EmitAsyncCFI && !HasFP, CFAOffset,
800 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
801 SVEAllocs.AfterZPRs);
802 CFAOffset += SVEAllocs.BeforePPRs;
803 assert(PPRRange.End == ZPRRange.Begin &&
804 "Expected ZPR callee saves after PPR locals");
805 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
806 EmitAsyncCFI && !HasFP, CFAOffset,
807 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
808 CFAOffset += SVEAllocs.AfterPPRs;
809 } else {
811 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
812 // already been allocated. PPR locals (included in AfterPPRs) are not
813 // supported (note: this is asserted above).
814 CFAOffset += SVEAllocs.BeforePPRs;
815 }
816
817 // Allocate space for the rest of the frame including ZPR locals. Align the
818 // stack as necessary.
819 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
820 "Cannot use redzone with stack realignment");
821 if (!AFL.canUseRedZone(MF)) {
822 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
823 // correct value here, as NumBytes also includes padding bytes, which
824 // shouldn't be counted here.
825 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
826 EmitAsyncCFI && !HasFP, CFAOffset,
827 MFI.hasVarSizedObjects());
828 }
829
830 // If we need a base pointer, set it up here. It's whatever the value of the
831 // stack pointer is at this point. Any variable size objects will be
832 // allocated after this, so we can still use the base pointer to reference
833 // locals.
834 //
835 // FIXME: Clarify FrameSetup flags here.
836 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
837 // needed.
838 // For funclets the BP belongs to the containing function.
839 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
840 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
841 AArch64::SP, false);
842 if (NeedsWinCFI) {
843 HasWinCFI = true;
844 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
846 }
847 }
848
849 // The very last FrameSetup instruction indicates the end of prologue. Emit a
850 // SEH opcode indicating the prologue end.
851 if (NeedsWinCFI && HasWinCFI) {
852 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
854 }
855
856 // SEH funclets are passed the frame pointer in X1. If the parent
857 // function uses the base register, then the base register is used
858 // directly, and is not retrieved from X1.
859 if (IsFunclet && F.hasPersonalityFn()) {
860 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
862 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
863 AArch64::FP)
864 .addReg(AArch64::X1)
866 MBB.addLiveIn(AArch64::X1);
867 }
868 }
869
870 if (EmitCFI && !EmitAsyncCFI) {
871 if (HasFP) {
872 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
873 } else {
874 StackOffset TotalSize =
875 AFL.getSVEStackSize(MF) +
876 StackOffset::getFixed((int64_t)MFI.getStackSize());
877 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
878 CFIBuilder.insertCFIInst(
879 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
880 TotalSize, /*LastAdjustmentWasScalable=*/false));
881 }
882 emitCalleeSavedGPRLocations(AfterSVESavesI);
883 emitCalleeSavedSVELocations(AfterSVESavesI);
884 }
885}
886
887void AArch64PrologueEmitter::emitShadowCallStackPrologue(
889 // Shadow call stack prolog: str x30, [x18], #8
890 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
891 .addReg(AArch64::X18, RegState::Define)
892 .addReg(AArch64::LR)
893 .addReg(AArch64::X18)
894 .addImm(8)
896
897 // This instruction also makes x18 live-in to the entry block.
898 MBB.addLiveIn(AArch64::X18);
899
900 if (NeedsWinCFI)
901 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
903
904 if (EmitCFI) {
905 // Emit a CFI instruction that causes 8 to be subtracted from the value of
906 // x18 when unwinding past this frame.
907 static const char CFIInst[] = {
908 dwarf::DW_CFA_val_expression,
909 18, // register
910 2, // length
911 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
912 static_cast<char>(-8) & 0x7f, // addend (sleb128)
913 };
914 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
915 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
916 }
917}
918
919void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
921 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
923 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
924 // The special symbol below is absolute and has a *value* that can be
925 // combined with the frame pointer to signal an extended frame.
926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
927 .addExternalSymbol("swift_async_extendedFramePointerFlags",
929 if (NeedsWinCFI) {
930 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
932 HasWinCFI = true;
933 }
934 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
935 .addUse(AArch64::FP)
936 .addUse(AArch64::X16)
937 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
938 if (NeedsWinCFI) {
939 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
941 HasWinCFI = true;
942 }
943 break;
944 }
945 [[fallthrough]];
946
948 // ORR x29, x29, #0x1000_0000_0000_0000
949 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
950 .addUse(AArch64::FP)
951 .addImm(0x1100)
953 if (NeedsWinCFI) {
954 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
956 HasWinCFI = true;
957 }
958 break;
959
961 break;
962 }
963}
964
965void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
966 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
967 const DebugLoc &DL) const {
968 assert(!HasFP && "unexpected function without stack frame but with FP");
969 assert(!AFL.getSVEStackSize(MF) &&
970 "unexpected function without stack frame but with SVE objects");
971 // All of the stack allocation is for locals.
972 AFI->setLocalStackSize(NumBytes);
973 if (!NumBytes) {
974 if (NeedsWinCFI && HasWinCFI) {
975 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
977 }
978 return;
979 }
980 // REDZONE: If the stack size is less than 128 bytes, we don't need
981 // to actually allocate.
982 if (AFL.canUseRedZone(MF)) {
983 AFI->setHasRedZone(true);
984 ++NumRedZoneFunctions;
985 } else {
986 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
987 StackOffset::getFixed(-NumBytes), TII,
989 if (EmitCFI) {
990 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
991 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
992 // Encode the stack size of the leaf function.
993 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
994 .buildDefCFAOffset(NumBytes, FrameLabel);
995 }
996 }
997
998 if (NeedsWinCFI) {
999 HasWinCFI = true;
1000 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1002 }
1003}
1004
1005void AArch64PrologueEmitter::emitFramePointerSetup(
1007 unsigned FixedObject) {
1008 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1009 if (CombineSPBump)
1010 FPOffset += AFI->getLocalStackSize();
1011
1012 if (AFI->hasSwiftAsyncContext()) {
1013 // Before we update the live FP we have to ensure there's a valid (or
1014 // null) asynchronous context in its slot just before FP in the frame
1015 // record, so store it now.
1016 const auto &Attrs = MF.getFunction().getAttributes();
1017 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1018 if (HaveInitialContext)
1019 MBB.addLiveIn(AArch64::X22);
1020 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1021 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1022 .addUse(Reg)
1023 .addUse(AArch64::SP)
1024 .addImm(FPOffset - 8)
1026 if (NeedsWinCFI) {
1027 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1028 // to multiple instructions, should be mutually-exclusive.
1029 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1030 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1032 HasWinCFI = true;
1033 }
1034 }
1035
1036 if (HomPrologEpilog) {
1037 auto Prolog = MBBI;
1038 --Prolog;
1039 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1040 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1041 } else {
1042 // Issue sub fp, sp, FPOffset or
1043 // mov fp,sp when FPOffset is zero.
1044 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1045 // This code marks the instruction(s) that set the FP also.
1046 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1047 StackOffset::getFixed(FPOffset), TII,
1049 if (NeedsWinCFI && HasWinCFI) {
1050 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1052 // After setting up the FP, the rest of the prolog doesn't need to be
1053 // included in the SEH unwind info.
1054 NeedsWinCFI = false;
1055 }
1056 }
1057 if (EmitAsyncCFI)
1058 emitDefineCFAWithFP(MBBI, FixedObject);
1059}
1060
1061// Define the current CFA rule to use the provided FP.
1062void AArch64PrologueEmitter::emitDefineCFAWithFP(
1063 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1064 const int OffsetToFirstCalleeSaveFromFP =
1065 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1066 AFI->getCalleeSavedStackSize();
1067 Register FramePtr = RegInfo.getFrameRegister(MF);
1068 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1069 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1070}
1071
1072void AArch64PrologueEmitter::emitWindowsStackProbe(
1073 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1074 int64_t RealignmentPadding) const {
1075 if (AFI->getSVECalleeSavedStackSize())
1076 report_fatal_error("SVE callee saves not yet supported with stack probing");
1077
1078 // Find an available register to spill the value of X15 to, if X15 is being
1079 // used already for nest.
1080 unsigned X15Scratch = AArch64::NoRegister;
1081 if (llvm::any_of(MBB.liveins(),
1082 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1083 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1084 LiveIn.PhysReg);
1085 })) {
1086 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1087 assert(X15Scratch != AArch64::NoRegister &&
1088 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1089#ifndef NDEBUG
1090 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1091#endif
1092 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1093 .addReg(AArch64::XZR)
1094 .addReg(AArch64::X15, RegState::Undef)
1095 .addReg(AArch64::X15, RegState::Implicit)
1097 }
1098
1099 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1100 if (NeedsWinCFI) {
1101 HasWinCFI = true;
1102 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1103 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1104 // This is at most two instructions, MOVZ followed by MOVK.
1105 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1106 // exceeding 256MB in size.
1107 if (NumBytes >= (1 << 28))
1108 report_fatal_error("Stack size cannot exceed 256MB for stack "
1109 "unwinding purposes");
1110
1111 uint32_t LowNumWords = NumWords & 0xFFFF;
1112 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1113 .addImm(LowNumWords)
1116 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1118 if ((NumWords & 0xFFFF0000) != 0) {
1119 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1120 .addReg(AArch64::X15)
1121 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1124 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1126 }
1127 } else {
1128 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1129 .addImm(NumWords)
1131 }
1132
1133 const AArch64TargetLowering *TLI = Subtarget.getTargetLowering();
1134 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1135 if (ChkStkLibcall == RTLIB::Unsupported)
1136 reportFatalUsageError("no available implementation of __chkstk");
1137
1138 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1139 switch (MF.getTarget().getCodeModel()) {
1140 case CodeModel::Tiny:
1141 case CodeModel::Small:
1142 case CodeModel::Medium:
1143 case CodeModel::Kernel:
1144 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1145 .addExternalSymbol(ChkStk)
1146 .addReg(AArch64::X15, RegState::Implicit)
1147 .addReg(AArch64::X16,
1149 .addReg(AArch64::X17,
1151 .addReg(AArch64::NZCV,
1154 if (NeedsWinCFI) {
1155 HasWinCFI = true;
1156 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1158 }
1159 break;
1160 case CodeModel::Large:
1161 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1162 .addReg(AArch64::X16, RegState::Define)
1163 .addExternalSymbol(ChkStk)
1164 .addExternalSymbol(ChkStk)
1166 if (NeedsWinCFI) {
1167 HasWinCFI = true;
1168 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1170 }
1171
1173 .addReg(AArch64::X16, RegState::Kill)
1175 .addReg(AArch64::X16,
1177 .addReg(AArch64::X17,
1179 .addReg(AArch64::NZCV,
1182 if (NeedsWinCFI) {
1183 HasWinCFI = true;
1184 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1186 }
1187 break;
1188 }
1189
1190 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1191 .addReg(AArch64::SP, RegState::Kill)
1192 .addReg(AArch64::X15, RegState::Kill)
1195 if (NeedsWinCFI) {
1196 HasWinCFI = true;
1197 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1198 .addImm(NumBytes)
1200 }
1201 NumBytes = 0;
1202
1203 if (RealignmentPadding > 0) {
1204 if (RealignmentPadding >= 4096) {
1205 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1206 .addReg(AArch64::X16, RegState::Define)
1207 .addImm(RealignmentPadding)
1209 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1210 .addReg(AArch64::SP)
1211 .addReg(AArch64::X16, RegState::Kill)
1214 } else {
1215 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1216 .addReg(AArch64::SP)
1217 .addImm(RealignmentPadding)
1218 .addImm(0)
1220 }
1221
1222 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1223 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1224 .addReg(AArch64::X15, RegState::Kill)
1226 AFI->setStackRealigned(true);
1227
1228 // No need for SEH instructions here; if we're realigning the stack,
1229 // we've set a frame pointer and already finished the SEH prologue.
1231 }
1232 if (X15Scratch != AArch64::NoRegister) {
1233 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1234 .addReg(AArch64::XZR)
1235 .addReg(X15Scratch, RegState::Undef)
1236 .addReg(X15Scratch, RegState::Implicit)
1238 }
1239}
1240
1241void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1243 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1244 if (CSI.empty())
1245 return;
1246
1247 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1248 for (const auto &Info : CSI) {
1249 unsigned FrameIdx = Info.getFrameIdx();
1250 if (MFI.hasScalableStackID(FrameIdx))
1251 continue;
1252
1253 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1254 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1255 CFIBuilder.buildOffset(Info.getReg(), Offset);
1256 }
1257}
1258
1259void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1261 // Add callee saved registers to move list.
1262 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1263 if (CSI.empty())
1264 return;
1265
1266 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1267
1268 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1269 if (AFL.requiresSaveVG(MF)) {
1270 auto IncomingVG = *find_if(
1271 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1272 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1273 AFL.getOffsetOfLocalArea();
1274 }
1275
1276 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1277 for (const auto &Info : CSI) {
1278 int FI = Info.getFrameIdx();
1279 if (!MFI.hasScalableStackID(FI))
1280 continue;
1281
1282 // Not all unwinders may know about SVE registers, so assume the lowest
1283 // common denominator.
1284 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1285 MCRegister Reg = Info.getReg();
1286 if (!RegInfo.regNeedsCFI(Reg, Reg))
1287 continue;
1288
1289 StackOffset Offset =
1290 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1291 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1292
1293 // The scalable vectors are below (lower address) the scalable predicates
1294 // with split SVE objects, so we must subtract the size of the predicates.
1296 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1297 Offset -= PPRStackSize;
1298
1299 CFIBuilder.insertCFIInst(
1300 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1301 }
1302}
1303
1305 switch (MI.getOpcode()) {
1306 default:
1307 return false;
1308 case AArch64::CATCHRET:
1309 case AArch64::CLEANUPRET:
1310 return true;
1311 }
1312}
1313
1318 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1319 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1320 SEHEpilogueStartI = MBB.end();
1321}
1322
1323void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1325 // Other combinations could be supported, but are not currently needed.
1326 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1327 "expected negative offset (with optional fixed portion)");
1328 Register Base = AArch64::FP;
1329 if (int64_t FixedOffset = Offset.getFixed()) {
1330 // If we have a negative fixed offset, we need to first subtract it in a
1331 // temporary register first (to avoid briefly deallocating the scalable
1332 // portion of the offset).
1333 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1334 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1335 StackOffset::getFixed(FixedOffset), TII,
1337 }
1338 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1339 StackOffset::getScalable(Offset.getScalable()), TII,
1341}
1342
1344 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1345 if (MBB.end() != EpilogueEndI) {
1346 DL = EpilogueEndI->getDebugLoc();
1347 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1348 }
1349
1350 int64_t NumBytes =
1351 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1352
1353 // All calls are tail calls in GHC calling conv, and functions have no
1354 // prologue/epilogue.
1355 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1356 return;
1357
1358 // How much of the stack used by incoming arguments this function is expected
1359 // to restore in this particular epilogue.
1360 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1361 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1362 MF.getFunction().isVarArg());
1363 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1364
1365 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1366 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1367 // We cannot rely on the local stack size set in emitPrologue if the function
1368 // has funclets, as funclets have different local stack size requirements, and
1369 // the current value set in emitPrologue may be that of the containing
1370 // function.
1371 if (MF.hasEHFunclets())
1372 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1373
1374 if (HomPrologEpilog) {
1376 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1377 if (FirstHomogenousEpilogI != MBB.begin()) {
1378 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1379 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1380 FirstHomogenousEpilogI = HomogeneousEpilog;
1381 }
1382
1383 // Adjust local stack
1384 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1385 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1387
1388 // SP has been already adjusted while restoring callee save regs.
1389 // We've bailed-out the case with adjusting SP for arguments.
1390 assert(AfterCSRPopSize == 0);
1391 return;
1392 }
1393
1394 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1395
1396 unsigned ProloguePopSize = PrologueSaveSize;
1398 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1399 // that needs to be popped until we reach the start of the SVE save area.
1400 // The "FixedObject" stack occurs after the SVE area and must be popped
1401 // later.
1402 ProloguePopSize -= FixedObject;
1403 AfterCSRPopSize += FixedObject;
1404 }
1405
1406 // Assume we can't combine the last pop with the sp restore.
1407 if (!CombineSPBump && ProloguePopSize != 0) {
1408 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1409 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1410 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1413 Pop = std::prev(Pop);
1414 // Converting the last ldp to a post-index ldp is valid only if the last
1415 // ldp's offset is 0.
1416 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1417 // If the offset is 0 and the AfterCSR pop is not actually trying to
1418 // allocate more stack for arguments (in space that an untimely interrupt
1419 // may clobber), convert it to a post-index ldp.
1420 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1422 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1423 ProloguePopSize);
1425 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1426 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1427 ++AfterLastPop;
1428 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1429 // callee-save non-SVE registers to move the stack pointer to the start of
1430 // the SVE area.
1431 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1432 StackOffset::getFixed(ProloguePopSize), TII,
1434 &HasWinCFI);
1435 } else {
1436 // Otherwise, make sure to emit an add after the last ldp.
1437 // We're doing this by transferring the size to be restored from the
1438 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1439 // pops.
1440 AfterCSRPopSize += ProloguePopSize;
1441 }
1442 }
1443
1444 // Move past the restores of the callee-saved registers.
1445 // If we plan on combining the sp bump of the local stack size and the callee
1446 // save stack size, we might need to adjust the CSR save and restore offsets.
1447 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1448 MachineBasicBlock::iterator Begin = MBB.begin();
1449 while (FirstGPRRestoreI != Begin) {
1450 --FirstGPRRestoreI;
1451 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1453 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1454 ++FirstGPRRestoreI;
1455 break;
1456 } else if (CombineSPBump)
1457 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1458 AFI->getLocalStackSize());
1459 }
1460
1461 if (NeedsWinCFI) {
1462 // Note that there are cases where we insert SEH opcodes in the
1463 // epilogue when we had no SEH opcodes in the prologue. For
1464 // example, when there is no stack frame but there are stack
1465 // arguments. Insert the SEH_EpilogStart and remove it later if it
1466 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1467 // functions that don't need it.
1468 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1470 SEHEpilogueStartI = FirstGPRRestoreI;
1471 --SEHEpilogueStartI;
1472 }
1473
1474 // Determine the ranges of SVE callee-saves. This is done before emitting any
1475 // code at the end of the epilogue (for Swift async), which can get in the way
1476 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1477 auto [PPR, ZPR] = getSVEStackFrameSizes();
1478 auto [PPRRange, ZPRRange] = partitionSVECS(
1479 MBB,
1481 ? MBB.getFirstTerminator()
1482 : FirstGPRRestoreI,
1483 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1484
1485 if (HasFP && AFI->hasSwiftAsyncContext())
1486 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1487
1488 // If there is a single SP update, insert it before the ret and we're done.
1489 if (CombineSPBump) {
1490 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1491
1492 // When we are about to restore the CSRs, the CFA register is SP again.
1493 if (EmitCFI && HasFP)
1495 .buildDefCFA(AArch64::SP, NumBytes);
1496
1497 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1498 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1500 EmitCFI, StackOffset::getFixed(NumBytes));
1501 return;
1502 }
1503
1504 NumBytes -= PrologueSaveSize;
1505 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1506
1507 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1508 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1509
1510 // Deallocate the SVE area.
1512 assert(!SVEAllocs.AfterPPRs &&
1513 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1514 // If the callee-save area is before FP, restoring the FP implicitly
1515 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1516 // explicitly.
1517 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1518 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1520 false, NeedsWinCFI, &HasWinCFI);
1521 }
1522
1523 // Deallocate callee-save SVE registers.
1524 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1526 false, NeedsWinCFI, &HasWinCFI);
1527 } else if (AFI->hasSVEStackSize()) {
1528 // If we have stack realignment or variable-sized objects we must use the FP
1529 // to restore SVE callee saves (as there is an unknown amount of
1530 // data/padding between the SP and SVE CS area).
1531 Register BaseForSVEDealloc =
1532 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1533 : AArch64::SP;
1534 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1535 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1536 // The offset from the frame-pointer to the start of the ZPR saves.
1537 StackOffset FPOffsetZPR =
1538 -SVECalleeSavesSize - PPR.LocalsSize -
1539 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1540 // Deallocate the stack space space by moving the SP to the start of the
1541 // ZPR/PPR callee-save area.
1542 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1543 }
1544 // With split SVE, the predicates are stored in a separate area above the
1545 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1546 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1547 // The offset from the frame-pointer to the start of the PPR saves.
1548 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1549 // Move to the start of the PPR area.
1550 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1551 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1552 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1553 }
1554 } else if (BaseForSVEDealloc == AArch64::SP) {
1555 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1556 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1557 SVEAllocs.totalSize();
1558
1559 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1560 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1561 // saves, but may also allow combining stack hazard bumps for split SVE.
1562 SVEAllocs.AfterZPRs += NonSVELocals;
1563 NumBytes -= NonSVELocals.getFixed();
1564 }
1565 // To deallocate the SVE stack adjust by the allocations in reverse.
1566 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1568 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1569 CFAOffset);
1570 CFAOffset -= SVEAllocs.AfterZPRs;
1571 assert(PPRRange.Begin == ZPRRange.End &&
1572 "Expected PPR restores after ZPR");
1573 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1575 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1576 CFAOffset);
1577 CFAOffset -= SVEAllocs.AfterPPRs;
1578 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1580 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1581 CFAOffset);
1582 }
1583
1584 if (EmitCFI)
1585 emitCalleeSavedSVERestores(
1586 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1587 }
1588
1589 if (!HasFP) {
1590 bool RedZone = AFL.canUseRedZone(MF);
1591 // If this was a redzone leaf function, we don't need to restore the
1592 // stack pointer (but we may need to pop stack args for fastcc).
1593 if (RedZone && AfterCSRPopSize == 0)
1594 return;
1595
1596 // Pop the local variables off the stack. If there are no callee-saved
1597 // registers, it means we are actually positioned at the terminator and can
1598 // combine stack increment for the locals and the stack increment for
1599 // callee-popped arguments into (possibly) a single instruction and be done.
1600 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1601 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1602 if (NoCalleeSaveRestore)
1603 StackRestoreBytes += AfterCSRPopSize;
1604
1606 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1607 StackOffset::getFixed(StackRestoreBytes), TII,
1609 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1610
1611 // If we were able to combine the local stack pop with the argument pop,
1612 // then we're done.
1613 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1614 return;
1615
1616 NumBytes = 0;
1617 }
1618
1619 // Restore the original stack pointer.
1620 // FIXME: Rather than doing the math here, we should instead just use
1621 // non-post-indexed loads for the restores if we aren't actually going to
1622 // be able to save any instructions.
1623 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1625 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1626 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1628 } else if (NumBytes)
1629 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1630 StackOffset::getFixed(NumBytes), TII,
1632
1633 // When we are about to restore the CSRs, the CFA register is SP again.
1634 if (EmitCFI && HasFP)
1636 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1637
1638 // This must be placed after the callee-save restore code because that code
1639 // assumes the SP is at the same location as it was after the callee-save save
1640 // code in the prologue.
1641 if (AfterCSRPopSize) {
1642 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1643 "interrupt may have clobbered");
1644
1646 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1648 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1649 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1650 }
1651}
1652
1653bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1654 uint64_t StackBumpBytes) const {
1656 StackBumpBytes))
1657 return false;
1658 if (MBB.empty())
1659 return true;
1660
1661 // Disable combined SP bump if the last instruction is an MTE tag store. It
1662 // is almost always better to merge SP adjustment into those instructions.
1663 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1664 MachineBasicBlock::iterator Begin = MBB.begin();
1665 while (LastI != Begin) {
1666 --LastI;
1667 if (LastI->isTransient())
1668 continue;
1669 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1670 break;
1671 }
1672 switch (LastI->getOpcode()) {
1673 case AArch64::STGloop:
1674 case AArch64::STZGloop:
1675 case AArch64::STGi:
1676 case AArch64::STZGi:
1677 case AArch64::ST2Gi:
1678 case AArch64::STZ2Gi:
1679 return false;
1680 default:
1681 return true;
1682 }
1683 llvm_unreachable("unreachable");
1684}
1685
1686void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1688 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1690 // Avoid the reload as it is GOT relative, and instead fall back to the
1691 // hardcoded value below. This allows a mismatch between the OS and
1692 // application without immediately terminating on the difference.
1693 [[fallthrough]];
1695 // We need to reset FP to its untagged state on return. Bit 60 is
1696 // currently used to show the presence of an extended frame.
1697
1698 // BIC x29, x29, #0x1000_0000_0000_0000
1699 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1700 AArch64::FP)
1701 .addUse(AArch64::FP)
1702 .addImm(0x10fe)
1704 if (NeedsWinCFI) {
1705 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1707 HasWinCFI = true;
1708 }
1709 break;
1710
1712 break;
1713 }
1714}
1715
1716void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1718 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1719 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1720 .addReg(AArch64::X18, RegState::Define)
1721 .addReg(AArch64::LR, RegState::Define)
1722 .addReg(AArch64::X18)
1723 .addImm(-8)
1725
1726 if (NeedsWinCFI)
1727 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1729
1730 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1731 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1732 .buildRestore(AArch64::X18);
1733}
1734
1735void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1736 MachineBasicBlock::iterator MBBI, bool SVE) const {
1737 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1738 if (CSI.empty())
1739 return;
1740
1741 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1742
1743 for (const auto &Info : CSI) {
1744 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1745 continue;
1746
1747 MCRegister Reg = Info.getReg();
1748 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1749 continue;
1750
1751 CFIBuilder.buildRestore(Info.getReg());
1752 }
1753}
1754
1755void AArch64EpilogueEmitter::finalizeEpilogue() const {
1756 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1757 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1759 }
1760 if (EmitCFI)
1761 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1762 if (AFI->shouldSignReturnAddress(MF)) {
1763 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1764 // are inserted by emitPacRetPlusLeafHardening().
1765 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1766 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1767 TII->get(AArch64::PAUTH_EPILOGUE))
1769 }
1770 // AArch64PointerAuth pass will insert SEH_PACSignLR
1772 }
1773 if (HasWinCFI) {
1774 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1776 if (!MF.hasWinCFI())
1777 MF.setHasWinCFI(true);
1778 }
1779 if (NeedsWinCFI) {
1780 assert(SEHEpilogueStartI != MBB.end());
1781 if (!HasWinCFI)
1782 MBB.erase(SEHEpilogueStartI);
1783 }
1784}
1785
1786} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:123
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin