LLVM 22.0.0git
AArch64PrologueEpilogue.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "AArch64Subtarget.h"
14#include "llvm/ADT/Statistic.h"
17#include "llvm/MC/MCContext.h"
18
19#define DEBUG_TYPE "frame-info"
20
21STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
22
23namespace llvm {
24
25static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO,
26 RTLIB::Libcall LC) {
27 return MO.isSymbol() &&
28 StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName();
29}
30
32 return AFI->hasStreamingModeChanges() &&
33 !MF.getSubtarget<AArch64Subtarget>().hasSVE();
34}
35
38 unsigned Opc = MBBI->getOpcode();
39 if (Opc == AArch64::CNTD_XPiI)
40 return true;
41
42 if (!requiresGetVGCall())
43 return false;
44
45 if (Opc == AArch64::BL)
46 return matchLibcall(TLI, MBBI->getOperand(0), RTLIB::SMEABI_GET_CURRENT_VG);
47
48 return Opc == TargetOpcode::COPY;
49}
50
51// Convenience function to determine whether I is part of the ZPR callee saves.
53 switch (I->getOpcode()) {
54 default:
55 return false;
56 case AArch64::LD1B_2Z_IMM:
57 case AArch64::ST1B_2Z_IMM:
58 case AArch64::STR_ZXI:
59 case AArch64::LDR_ZXI:
60 case AArch64::PTRUE_C_B:
61 return I->getFlag(MachineInstr::FrameSetup) ||
63 case AArch64::SEH_SaveZReg:
64 return true;
65 }
66}
67
68// Convenience function to determine whether I is part of the PPR callee saves.
70 switch (I->getOpcode()) {
71 default:
72 return false;
73 case AArch64::STR_PXI:
74 case AArch64::LDR_PXI:
75 return I->getFlag(MachineInstr::FrameSetup) ||
77 case AArch64::SEH_SavePReg:
78 return true;
79 }
80}
81
82// Convenience function to determine whether I is part of the SVE callee saves.
86
90 : MF(MF), MBB(MBB), MFI(MF.getFrameInfo()),
91 Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
92 RegInfo(*Subtarget.getRegisterInfo()) {
93 TII = Subtarget.getInstrInfo();
94 AFI = MF.getInfo<AArch64FunctionInfo>();
95
96 HasFP = AFL.hasFP(MF);
97 NeedsWinCFI = AFL.needsWinCFI(MF);
98
99 // Windows unwind can't represent the required stack adjustments if we have
100 // both SVE callee-saves and dynamic stack allocations, and the frame pointer
101 // is before the SVE spills. The allocation of the frame pointer must be the
102 // last instruction in the prologue so the unwinder can restore the stack
103 // pointer correctly. (And there isn't any unwind opcode for `addvl sp, x29,
104 // -17`.)
105 //
106 // Because of this, we do spills in the opposite order on Windows: first SVE,
107 // then GPRs. The main side-effect of this is that it makes accessing
108 // parameters passed on the stack more expensive.
109 //
110 // We could consider rearranging the spills for simpler cases.
111 if (Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize()) {
112 if (AFI->hasStackHazardSlotIndex())
113 reportFatalUsageError("SME hazard padding is not supported on Windows");
114 SVELayout = SVEStackLayout::CalleeSavesAboveFrameRecord;
115 } else if (AFI->hasSplitSVEObjects()) {
116 SVELayout = SVEStackLayout::Split;
117 }
118}
119
122 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc,
123 bool EmitCFI, MachineInstr::MIFlag FrameFlag, int CFAOffset) const {
124 unsigned NewOpc;
125
126 // If the function contains streaming mode changes, we expect instructions
127 // to calculate the value of VG before spilling. Move past these instructions
128 // if necessary.
129 if (AFL.requiresSaveVG(MF)) {
130 auto &TLI = *Subtarget.getTargetLowering();
131 while (isVGInstruction(MBBI, TLI))
132 ++MBBI;
133 }
134
135 switch (MBBI->getOpcode()) {
136 default:
137 llvm_unreachable("Unexpected callee-save save/restore opcode!");
138 case AArch64::STPXi:
139 NewOpc = AArch64::STPXpre;
140 break;
141 case AArch64::STPDi:
142 NewOpc = AArch64::STPDpre;
143 break;
144 case AArch64::STPQi:
145 NewOpc = AArch64::STPQpre;
146 break;
147 case AArch64::STRXui:
148 NewOpc = AArch64::STRXpre;
149 break;
150 case AArch64::STRDui:
151 NewOpc = AArch64::STRDpre;
152 break;
153 case AArch64::STRQui:
154 NewOpc = AArch64::STRQpre;
155 break;
156 case AArch64::LDPXi:
157 NewOpc = AArch64::LDPXpost;
158 break;
159 case AArch64::LDPDi:
160 NewOpc = AArch64::LDPDpost;
161 break;
162 case AArch64::LDPQi:
163 NewOpc = AArch64::LDPQpost;
164 break;
165 case AArch64::LDRXui:
166 NewOpc = AArch64::LDRXpost;
167 break;
168 case AArch64::LDRDui:
169 NewOpc = AArch64::LDRDpost;
170 break;
171 case AArch64::LDRQui:
172 NewOpc = AArch64::LDRQpost;
173 break;
174 }
175 TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
176 int64_t MinOffset, MaxOffset;
177 bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
178 NewOpc, Scale, Width, MinOffset, MaxOffset);
179 (void)Success;
180 assert(Success && "unknown load/store opcode");
181
182 // If the first store isn't right where we want SP then we can't fold the
183 // update in so create a normal arithmetic instruction instead.
184 if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
185 CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
186 CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
187 // If we are destroying the frame, make sure we add the increment after the
188 // last frame operation.
189 if (FrameFlag == MachineInstr::FrameDestroy) {
190 ++MBBI;
191 // Also skip the SEH instruction, if needed
192 if (NeedsWinCFI && AArch64InstrInfo::isSEHInstruction(*MBBI))
193 ++MBBI;
194 }
195 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
196 StackOffset::getFixed(CSStackSizeInc), TII, FrameFlag,
197 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
198 StackOffset::getFixed(CFAOffset));
199
200 return std::prev(MBBI);
201 }
202
203 // Get rid of the SEH code associated with the old instruction.
204 if (NeedsWinCFI) {
205 auto SEH = std::next(MBBI);
206 if (AArch64InstrInfo::isSEHInstruction(*SEH))
207 SEH->eraseFromParent();
208 }
209
210 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
211 MIB.addReg(AArch64::SP, RegState::Define);
212
213 // Copy all operands other than the immediate offset.
214 unsigned OpndIdx = 0;
215 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
216 ++OpndIdx)
217 MIB.add(MBBI->getOperand(OpndIdx));
218
219 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
220 "Unexpected immediate offset in first/last callee-save save/restore "
221 "instruction!");
222 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
223 "Unexpected base register in callee-save save/restore instruction!");
224 assert(CSStackSizeInc % Scale == 0);
225 MIB.addImm(CSStackSizeInc / (int)Scale);
226
227 MIB.setMIFlags(MBBI->getFlags());
228 MIB.setMemRefs(MBBI->memoperands());
229
230 // Generate a new SEH code that corresponds to the new instruction.
231 if (NeedsWinCFI) {
232 HasWinCFI = true;
233 AFL.insertSEH(*MIB, *TII, FrameFlag);
234 }
235
236 if (EmitCFI)
237 CFIInstBuilder(MBB, MBBI, FrameFlag)
238 .buildDefCFAOffset(CFAOffset - CSStackSizeInc);
239
240 return std::prev(MBB.erase(MBBI));
241}
242
243// Fix up the SEH opcode associated with the save/restore instruction.
245 unsigned LocalStackSize) {
246 MachineOperand *ImmOpnd = nullptr;
247 unsigned ImmIdx = MBBI->getNumOperands() - 1;
248 switch (MBBI->getOpcode()) {
249 default:
250 llvm_unreachable("Fix the offset in the SEH instruction");
251 case AArch64::SEH_SaveFPLR:
252 case AArch64::SEH_SaveRegP:
253 case AArch64::SEH_SaveReg:
254 case AArch64::SEH_SaveFRegP:
255 case AArch64::SEH_SaveFReg:
256 case AArch64::SEH_SaveAnyRegQP:
257 case AArch64::SEH_SaveAnyRegQPX:
258 ImmOpnd = &MBBI->getOperand(ImmIdx);
259 break;
260 }
261 if (ImmOpnd)
262 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
263}
264
266 MachineInstr &MI, uint64_t LocalStackSize) const {
267 if (AArch64InstrInfo::isSEHInstruction(MI))
268 return;
269
270 unsigned Opc = MI.getOpcode();
271 unsigned Scale;
272 switch (Opc) {
273 case AArch64::STPXi:
274 case AArch64::STRXui:
275 case AArch64::STPDi:
276 case AArch64::STRDui:
277 case AArch64::LDPXi:
278 case AArch64::LDRXui:
279 case AArch64::LDPDi:
280 case AArch64::LDRDui:
281 Scale = 8;
282 break;
283 case AArch64::STPQi:
284 case AArch64::STRQui:
285 case AArch64::LDPQi:
286 case AArch64::LDRQui:
287 Scale = 16;
288 break;
289 default:
290 llvm_unreachable("Unexpected callee-save save/restore opcode!");
291 }
292
293 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
294 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
295 "Unexpected base register in callee-save save/restore instruction!");
296 // Last operand is immediate offset that needs fixing.
297 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
298 // All generated opcodes have scaled offsets.
299 assert(LocalStackSize % Scale == 0);
300 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
301
302 if (NeedsWinCFI) {
303 HasWinCFI = true;
304 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
305 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
306 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
307 "Expecting a SEH instruction");
308 fixupSEHOpcode(MBBI, LocalStackSize);
309 }
310}
311
313 uint64_t StackBumpBytes) const {
314 if (AFL.homogeneousPrologEpilog(MF))
315 return false;
316
317 if (AFI->getLocalStackSize() == 0)
318 return false;
319
320 // For WinCFI, if optimizing for size, prefer to not combine the stack bump
321 // (to force a stp with predecrement) to match the packed unwind format,
322 // provided that there actually are any callee saved registers to merge the
323 // decrement with.
324 // This is potentially marginally slower, but allows using the packed
325 // unwind format for functions that both have a local area and callee saved
326 // registers. Using the packed unwind format notably reduces the size of
327 // the unwind info.
328 if (AFL.needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
329 MF.getFunction().hasOptSize())
330 return false;
331
332 // 512 is the maximum immediate for stp/ldp that will be used for
333 // callee-save save/restores
334 if (StackBumpBytes >= 512 ||
335 AFL.windowsRequiresStackProbe(MF, StackBumpBytes))
336 return false;
337
338 if (MFI.hasVarSizedObjects())
339 return false;
340
341 if (RegInfo.hasStackRealignment(MF))
342 return false;
343
344 // This isn't strictly necessary, but it simplifies things a bit since the
345 // current RedZone handling code assumes the SP is adjusted by the
346 // callee-save save/restore code.
347 if (AFL.canUseRedZone(MF))
348 return false;
349
350 // When there is an SVE area on the stack, always allocate the
351 // callee-saves and spills/locals separately.
352 if (AFI->hasSVEStackSize())
353 return false;
354
355 return true;
356}
357
359 StackOffset PPRCalleeSavesSize =
360 StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
361 StackOffset ZPRCalleeSavesSize =
362 StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
363 StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
364 StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
366 return {{PPRCalleeSavesSize, PPRLocalsSize},
367 {ZPRCalleeSavesSize, ZPRLocalsSize}};
368 // For simplicity, attribute all locals to ZPRs when split SVE is disabled.
369 return {{PPRCalleeSavesSize, StackOffset{}},
370 {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}};
371}
372
374 SVEFrameSizes const &SVE) {
375 StackOffset AfterZPRs = SVE.ZPR.LocalsSize;
376 StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize;
377 StackOffset AfterPPRs = {};
379 BeforePPRs = SVE.PPR.CalleeSavesSize;
380 // If there are no ZPR CSRs, place all local allocations after the ZPRs.
381 if (SVE.ZPR.CalleeSavesSize)
382 AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize;
383 else
384 AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals.
385 }
386 return {BeforePPRs, AfterPPRs, AfterZPRs};
387}
388
394
397 StackOffset PPRCalleeSavesSize,
398 StackOffset ZPRCalleeSavesSize,
399 bool IsEpilogue) {
402 IsEpilogue ? MBB.begin() : MBB.getFirstTerminator();
403 auto AdjustI = [&](auto MBBI) { return IsEpilogue ? std::prev(MBBI) : MBBI; };
404 // Process the SVE CS to find the starts/ends of the ZPR and PPR areas.
405 if (PPRCalleeSavesSize) {
406 PPRsI = AdjustI(PPRsI);
407 assert(isPartOfPPRCalleeSaves(*PPRsI) && "Unexpected instruction");
408 while (PPRsI != End && isPartOfPPRCalleeSaves(AdjustI(PPRsI)))
409 IsEpilogue ? (--PPRsI) : (++PPRsI);
410 }
411 MachineBasicBlock::iterator ZPRsI = PPRsI;
412 if (ZPRCalleeSavesSize) {
413 ZPRsI = AdjustI(ZPRsI);
414 assert(isPartOfZPRCalleeSaves(*ZPRsI) && "Unexpected instruction");
415 while (ZPRsI != End && isPartOfZPRCalleeSaves(AdjustI(ZPRsI)))
416 IsEpilogue ? (--ZPRsI) : (++ZPRsI);
417 }
418 if (IsEpilogue)
419 return {{PPRsI, MBBI}, {ZPRsI, PPRsI}};
420 return {{MBBI, PPRsI}, {PPRsI, ZPRsI}};
421}
422
427 EmitCFI = AFI->needsDwarfUnwindInfo(MF);
428 EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
429 IsFunclet = MBB.isEHFuncletEntry();
430 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
431
432#ifndef NDEBUG
433 collectBlockLiveins();
434#endif
435}
436
437#ifndef NDEBUG
438/// Collect live registers from the end of \p MI's parent up to (including) \p
439/// MI in \p LiveRegs.
442
443 MachineBasicBlock &MBB = *MI.getParent();
444 LiveRegs.addLiveOuts(MBB);
445 for (const MachineInstr &MI :
446 reverse(make_range(MI.getIterator(), MBB.instr_end())))
447 LiveRegs.stepBackward(MI);
448}
449
450void AArch64PrologueEmitter::collectBlockLiveins() {
451 // Collect live register from the end of MBB up to the start of the existing
452 // frame setup instructions.
453 PrologueEndI = MBB.begin();
454 while (PrologueEndI != MBB.end() &&
455 PrologueEndI->getFlag(MachineInstr::FrameSetup))
456 ++PrologueEndI;
457
458 if (PrologueEndI != MBB.end()) {
459 getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
460 // Ignore registers used for stack management for now.
461 LiveRegs.removeReg(AArch64::SP);
462 LiveRegs.removeReg(AArch64::X19);
463 LiveRegs.removeReg(AArch64::FP);
464 LiveRegs.removeReg(AArch64::LR);
465
466 // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
467 // This is necessary to spill VG if required where SVE is unavailable, but
468 // X0 is preserved around this call.
469 if (requiresGetVGCall())
470 LiveRegs.removeReg(AArch64::X0);
471 }
472}
473
474void AArch64PrologueEmitter::verifyPrologueClobbers() const {
475 if (PrologueEndI == MBB.end())
476 return;
477 // Check if any of the newly instructions clobber any of the live registers.
478 for (MachineInstr &MI :
479 make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
480 for (auto &Op : MI.operands())
481 if (Op.isReg() && Op.isDef())
482 assert(!LiveRegs.contains(Op.getReg()) &&
483 "live register clobbered by inserted prologue instructions");
484 }
485}
486#endif
487
488void AArch64PrologueEmitter::determineLocalsStackSize(
489 uint64_t StackSize, uint64_t PrologueSaveSize) {
490 AFI->setLocalStackSize(StackSize - PrologueSaveSize);
491 CombineSPBump = shouldCombineCSRLocalStackBump(StackSize);
492}
493
494// Return the maximum possible number of bytes for `Size` due to the
495// architectural limit on the size of a SVE register.
496static int64_t upperBound(StackOffset Size) {
497 static const int64_t MAX_BYTES_PER_SCALABLE_BYTE = 16;
498 return Size.getScalable() * MAX_BYTES_PER_SCALABLE_BYTE + Size.getFixed();
499}
500
501void AArch64PrologueEmitter::allocateStackSpace(
502 MachineBasicBlock::iterator MBBI, int64_t RealignmentPadding,
503 StackOffset AllocSize, bool EmitCFI, StackOffset InitialOffset,
504 bool FollowupAllocs) {
505
506 if (!AllocSize)
507 return;
508
509 DebugLoc DL;
510 const int64_t MaxAlign = MFI.getMaxAlign().value();
511 const uint64_t AndMask = ~(MaxAlign - 1);
512
514 Register TargetReg = RealignmentPadding
515 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
516 : AArch64::SP;
517 // SUB Xd/SP, SP, AllocSize
518 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
520 EmitCFI, InitialOffset);
521
522 if (RealignmentPadding) {
523 // AND SP, X9, 0b11111...0000
524 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
525 .addReg(TargetReg, RegState::Kill)
528 AFI->setStackRealigned(true);
529
530 // No need for SEH instructions here; if we're realigning the stack,
531 // we've set a frame pointer and already finished the SEH prologue.
533 }
534 return;
535 }
536
537 //
538 // Stack probing allocation.
539 //
540
541 // Fixed length allocation. If we don't need to re-align the stack and don't
542 // have SVE objects, we can use a more efficient sequence for stack probing.
543 if (AllocSize.getScalable() == 0 && RealignmentPadding == 0) {
544 Register ScratchReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
545 assert(ScratchReg != AArch64::NoRegister);
546 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC))
547 .addDef(ScratchReg)
548 .addImm(AllocSize.getFixed())
549 .addImm(InitialOffset.getFixed())
550 .addImm(InitialOffset.getScalable());
551 // The fixed allocation may leave unprobed bytes at the top of the
552 // stack. If we have subsequent allocation (e.g. if we have variable-sized
553 // objects), we need to issue an extra probe, so these allocations start in
554 // a known state.
555 if (FollowupAllocs) {
556 // STR XZR, [SP]
557 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
558 .addReg(AArch64::XZR)
559 .addReg(AArch64::SP)
560 .addImm(0)
562 }
563
564 return;
565 }
566
567 // Variable length allocation.
568
569 // If the (unknown) allocation size cannot exceed the probe size, decrement
570 // the stack pointer right away.
571 int64_t ProbeSize = AFI->getStackProbeSize();
572 if (upperBound(AllocSize) + RealignmentPadding <= ProbeSize) {
573 Register ScratchReg = RealignmentPadding
574 ? AFL.findScratchNonCalleeSaveRegister(&MBB)
575 : AArch64::SP;
576 assert(ScratchReg != AArch64::NoRegister);
577 // SUB Xd, SP, AllocSize
578 emitFrameOffset(MBB, MBBI, DL, ScratchReg, AArch64::SP, -AllocSize, TII,
580 EmitCFI, InitialOffset);
581 if (RealignmentPadding) {
582 // AND SP, Xn, 0b11111...0000
583 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
584 .addReg(ScratchReg, RegState::Kill)
587 AFI->setStackRealigned(true);
588 }
589 if (FollowupAllocs || upperBound(AllocSize) + RealignmentPadding >
591 // STR XZR, [SP]
592 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
593 .addReg(AArch64::XZR)
594 .addReg(AArch64::SP)
595 .addImm(0)
597 }
598 return;
599 }
600
601 // Emit a variable-length allocation probing loop.
602 // TODO: As an optimisation, the loop can be "unrolled" into a few parts,
603 // each of them guaranteed to adjust the stack by less than the probe size.
604 Register TargetReg = AFL.findScratchNonCalleeSaveRegister(&MBB);
605 assert(TargetReg != AArch64::NoRegister);
606 // SUB Xd, SP, AllocSize
607 emitFrameOffset(MBB, MBBI, DL, TargetReg, AArch64::SP, -AllocSize, TII,
609 EmitCFI, InitialOffset);
610 if (RealignmentPadding) {
611 // AND Xn, Xn, 0b11111...0000
612 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), TargetReg)
613 .addReg(TargetReg, RegState::Kill)
616 }
617
618 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PROBED_STACKALLOC_VAR))
619 .addReg(TargetReg);
620 if (EmitCFI) {
621 // Set the CFA register back to SP.
622 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
623 .buildDefCFARegister(AArch64::SP);
624 }
625 if (RealignmentPadding)
626 AFI->setStackRealigned(true);
627}
628
630 const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
631 const MachineBasicBlock::iterator EndI = MBB.end();
632
633 // At this point, we're going to decide whether or not the function uses a
634 // redzone. In most cases, the function doesn't have a redzone so let's
635 // assume that's false and set it to true in the case that there's a redzone.
636 AFI->setHasRedZone(false);
637
638 // Debug location must be unknown since the first debug location is used
639 // to determine the end of the prologue.
640 DebugLoc DL;
641
642 // In some cases, particularly with CallingConv::SwiftTail, it is possible to
643 // have a tail-call where the caller only needs to adjust the stack pointer in
644 // the epilogue. In this case, we still need to emit a SEH prologue sequence.
645 // See `seh-minimal-prologue-epilogue.ll` test cases.
646 if (AFI->getArgumentStackToRestore())
647 HasWinCFI = true;
648
649 if (AFI->shouldSignReturnAddress(MF)) {
650 // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
651 // are inserted by emitPacRetPlusLeafHardening().
652 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
653 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
655 }
656 // AArch64PointerAuth pass will insert SEH_PACSignLR
658 }
659
660 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
661 emitShadowCallStackPrologue(PrologueBeginI, DL);
663 }
664
665 if (EmitCFI && AFI->isMTETagged())
666 BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
668
669 // We signal the presence of a Swift extended frame to external tools by
670 // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
671 // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
672 // bits so that is still true.
673 if (HasFP && AFI->hasSwiftAsyncContext())
674 emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
675
676 // All calls are tail calls in GHC calling conv, and functions have no
677 // prologue/epilogue.
678 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
679 return;
680
681 // Set tagged base pointer to the requested stack slot. Ideally it should
682 // match SP value after prologue.
683 if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
684 AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
685 else
686 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
687
688 // getStackSize() includes all the locals in its size calculation. We don't
689 // include these locals when computing the stack size of a funclet, as they
690 // are allocated in the parent's stack frame and accessed via the frame
691 // pointer from the funclet. We only save the callee saved registers in the
692 // funclet, which are really the callee saved registers of the parent
693 // function, including the funclet.
694 int64_t NumBytes =
695 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
696 if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
697 return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
698
699 bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
700 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
701
702 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
703 // All of the remaining stack allocations are for locals.
704 determineLocalsStackSize(NumBytes, PrologueSaveSize);
705
706 auto [PPR, ZPR] = getSVEStackFrameSizes();
707 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
708
709 MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
711 assert(!SVEAllocs.AfterPPRs &&
712 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
713 // If we're doing SVE saves first, we need to immediately allocate space
714 // for fixed objects, then space for the SVE callee saves.
715 //
716 // Windows unwind requires that the scalable size is a multiple of 16;
717 // that's handled when the callee-saved size is computed.
718 auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject);
719 allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{},
720 /*FollowupAllocs=*/true);
721 NumBytes -= FixedObject;
722
723 // Now allocate space for the GPR callee saves.
724 MachineBasicBlock::iterator MBBI = PrologueBeginI;
725 while (MBBI != EndI && isPartOfSVECalleeSaves(MBBI))
726 ++MBBI;
728 MBBI, DL, -AFI->getCalleeSavedStackSize(), EmitAsyncCFI);
729 NumBytes -= AFI->getCalleeSavedStackSize();
730 } else if (CombineSPBump) {
731 assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
732 emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
733 StackOffset::getFixed(-NumBytes), TII,
735 EmitAsyncCFI);
736 NumBytes = 0;
737 } else if (HomPrologEpilog) {
738 // Stack has been already adjusted.
739 NumBytes -= PrologueSaveSize;
740 } else if (PrologueSaveSize != 0) {
742 PrologueBeginI, DL, -PrologueSaveSize, EmitAsyncCFI);
743 NumBytes -= PrologueSaveSize;
744 }
745 assert(NumBytes >= 0 && "Negative stack allocation size!?");
746
747 // Move past the saves of the callee-saved registers, fixing up the offsets
748 // and pre-inc if we decided to combine the callee-save and local stack
749 // pointer bump above.
750 auto &TLI = *Subtarget.getTargetLowering();
751
752 MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
753 while (AfterGPRSavesI != EndI &&
754 AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
755 !isPartOfSVECalleeSaves(AfterGPRSavesI)) {
756 if (CombineSPBump &&
757 // Only fix-up frame-setup load/store instructions.
758 (!AFL.requiresSaveVG(MF) || !isVGInstruction(AfterGPRSavesI, TLI)))
759 fixupCalleeSaveRestoreStackOffset(*AfterGPRSavesI,
760 AFI->getLocalStackSize());
761 ++AfterGPRSavesI;
762 }
763
764 // For funclets the FP belongs to the containing function. Only set up FP if
765 // we actually need to.
766 if (!IsFunclet && HasFP)
767 emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
768
769 // Now emit the moves for whatever callee saved regs we have (including FP,
770 // LR if those are saved). Frame instructions for SVE register are emitted
771 // later, after the instruction which actually save SVE regs.
772 if (EmitAsyncCFI)
773 emitCalleeSavedGPRLocations(AfterGPRSavesI);
774
775 // Alignment is required for the parent frame, not the funclet
776 const bool NeedsRealignment =
777 NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
778 const int64_t RealignmentPadding =
779 (NeedsRealignment && MFI.getMaxAlign() > Align(16))
780 ? MFI.getMaxAlign().value() - 16
781 : 0;
782
783 if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
784 emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
785
786 StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes);
787 SVEAllocs.AfterZPRs += NonSVELocalsSize;
788
789 StackOffset CFAOffset =
790 StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize;
791 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
792 // Allocate space for the callee saves and PPR locals (if any).
794 auto [PPRRange, ZPRRange] =
795 partitionSVECS(MBB, AfterGPRSavesI, PPR.CalleeSavesSize,
796 ZPR.CalleeSavesSize, /*IsEpilogue=*/false);
797 AfterSVESavesI = ZPRRange.End;
798 if (EmitAsyncCFI)
799 emitCalleeSavedSVELocations(AfterSVESavesI);
800
801 allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs,
802 EmitAsyncCFI && !HasFP, CFAOffset,
803 MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs ||
804 SVEAllocs.AfterZPRs);
805 CFAOffset += SVEAllocs.BeforePPRs;
806 assert(PPRRange.End == ZPRRange.Begin &&
807 "Expected ZPR callee saves after PPR locals");
808 allocateStackSpace(PPRRange.End, 0, SVEAllocs.AfterPPRs,
809 EmitAsyncCFI && !HasFP, CFAOffset,
810 MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs);
811 CFAOffset += SVEAllocs.AfterPPRs;
812 } else {
814 // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have
815 // already been allocated. PPR locals (included in AfterPPRs) are not
816 // supported (note: this is asserted above).
817 CFAOffset += SVEAllocs.BeforePPRs;
818 }
819
820 // Allocate space for the rest of the frame including ZPR locals. Align the
821 // stack as necessary.
822 assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
823 "Cannot use redzone with stack realignment");
824 if (!AFL.canUseRedZone(MF)) {
825 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the
826 // correct value here, as NumBytes also includes padding bytes, which
827 // shouldn't be counted here.
828 allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs,
829 EmitAsyncCFI && !HasFP, CFAOffset,
830 MFI.hasVarSizedObjects());
831 }
832
833 // If we need a base pointer, set it up here. It's whatever the value of the
834 // stack pointer is at this point. Any variable size objects will be
835 // allocated after this, so we can still use the base pointer to reference
836 // locals.
837 //
838 // FIXME: Clarify FrameSetup flags here.
839 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
840 // needed.
841 // For funclets the BP belongs to the containing function.
842 if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
843 TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
844 AArch64::SP, false);
845 if (NeedsWinCFI) {
846 HasWinCFI = true;
847 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
849 }
850 }
851
852 // The very last FrameSetup instruction indicates the end of prologue. Emit a
853 // SEH opcode indicating the prologue end.
854 if (NeedsWinCFI && HasWinCFI) {
855 BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
857 }
858
859 // SEH funclets are passed the frame pointer in X1. If the parent
860 // function uses the base register, then the base register is used
861 // directly, and is not retrieved from X1.
862 if (IsFunclet && F.hasPersonalityFn()) {
863 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
865 BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
866 AArch64::FP)
867 .addReg(AArch64::X1)
869 MBB.addLiveIn(AArch64::X1);
870 }
871 }
872
873 if (EmitCFI && !EmitAsyncCFI) {
874 if (HasFP) {
875 emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
876 } else {
877 StackOffset TotalSize =
878 AFL.getSVEStackSize(MF) +
879 StackOffset::getFixed((int64_t)MFI.getStackSize());
880 CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
881 CFIBuilder.insertCFIInst(
882 createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
883 TotalSize, /*LastAdjustmentWasScalable=*/false));
884 }
885 emitCalleeSavedGPRLocations(AfterSVESavesI);
886 emitCalleeSavedSVELocations(AfterSVESavesI);
887 }
888}
889
890void AArch64PrologueEmitter::emitShadowCallStackPrologue(
892 // Shadow call stack prolog: str x30, [x18], #8
893 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
894 .addReg(AArch64::X18, RegState::Define)
895 .addReg(AArch64::LR)
896 .addReg(AArch64::X18)
897 .addImm(8)
899
900 // This instruction also makes x18 live-in to the entry block.
901 MBB.addLiveIn(AArch64::X18);
902
903 if (NeedsWinCFI)
904 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
906
907 if (EmitCFI) {
908 // Emit a CFI instruction that causes 8 to be subtracted from the value of
909 // x18 when unwinding past this frame.
910 static const char CFIInst[] = {
911 dwarf::DW_CFA_val_expression,
912 18, // register
913 2, // length
914 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
915 static_cast<char>(-8) & 0x7f, // addend (sleb128)
916 };
917 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
918 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
919 }
920}
921
922void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
924 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
926 if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
927 // The special symbol below is absolute and has a *value* that can be
928 // combined with the frame pointer to signal an extended frame.
929 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
930 .addExternalSymbol("swift_async_extendedFramePointerFlags",
932 if (NeedsWinCFI) {
933 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
935 HasWinCFI = true;
936 }
937 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
938 .addUse(AArch64::FP)
939 .addUse(AArch64::X16)
940 .addImm(Subtarget.isTargetILP32() ? 32 : 0);
941 if (NeedsWinCFI) {
942 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
944 HasWinCFI = true;
945 }
946 break;
947 }
948 [[fallthrough]];
949
951 // ORR x29, x29, #0x1000_0000_0000_0000
952 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
953 .addUse(AArch64::FP)
954 .addImm(0x1100)
956 if (NeedsWinCFI) {
957 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
959 HasWinCFI = true;
960 }
961 break;
962
964 break;
965 }
966}
967
968void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
969 int64_t NumBytes, MachineBasicBlock::iterator MBBI,
970 const DebugLoc &DL) const {
971 assert(!HasFP && "unexpected function without stack frame but with FP");
972 assert(!AFL.getSVEStackSize(MF) &&
973 "unexpected function without stack frame but with SVE objects");
974 // All of the stack allocation is for locals.
975 AFI->setLocalStackSize(NumBytes);
976 if (!NumBytes) {
977 if (NeedsWinCFI && HasWinCFI) {
978 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
980 }
981 return;
982 }
983 // REDZONE: If the stack size is less than 128 bytes, we don't need
984 // to actually allocate.
985 if (AFL.canUseRedZone(MF)) {
986 AFI->setHasRedZone(true);
987 ++NumRedZoneFunctions;
988 } else {
989 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
990 StackOffset::getFixed(-NumBytes), TII,
992 if (EmitCFI) {
993 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
994 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
995 // Encode the stack size of the leaf function.
996 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
997 .buildDefCFAOffset(NumBytes, FrameLabel);
998 }
999 }
1000
1001 if (NeedsWinCFI) {
1002 HasWinCFI = true;
1003 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1005 }
1006}
1007
1008void AArch64PrologueEmitter::emitFramePointerSetup(
1010 unsigned FixedObject) {
1011 int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1012 if (CombineSPBump)
1013 FPOffset += AFI->getLocalStackSize();
1014
1015 if (AFI->hasSwiftAsyncContext()) {
1016 // Before we update the live FP we have to ensure there's a valid (or
1017 // null) asynchronous context in its slot just before FP in the frame
1018 // record, so store it now.
1019 const auto &Attrs = MF.getFunction().getAttributes();
1020 bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
1021 if (HaveInitialContext)
1022 MBB.addLiveIn(AArch64::X22);
1023 Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
1024 BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
1025 .addUse(Reg)
1026 .addUse(AArch64::SP)
1027 .addImm(FPOffset - 8)
1029 if (NeedsWinCFI) {
1030 // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
1031 // to multiple instructions, should be mutually-exclusive.
1032 assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
1033 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1035 HasWinCFI = true;
1036 }
1037 }
1038
1039 if (HomPrologEpilog) {
1040 auto Prolog = MBBI;
1041 --Prolog;
1042 assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1043 Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1044 } else {
1045 // Issue sub fp, sp, FPOffset or
1046 // mov fp,sp when FPOffset is zero.
1047 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1048 // This code marks the instruction(s) that set the FP also.
1049 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1050 StackOffset::getFixed(FPOffset), TII,
1052 if (NeedsWinCFI && HasWinCFI) {
1053 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1055 // After setting up the FP, the rest of the prolog doesn't need to be
1056 // included in the SEH unwind info.
1057 NeedsWinCFI = false;
1058 }
1059 }
1060 if (EmitAsyncCFI)
1061 emitDefineCFAWithFP(MBBI, FixedObject);
1062}
1063
1064// Define the current CFA rule to use the provided FP.
1065void AArch64PrologueEmitter::emitDefineCFAWithFP(
1066 MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
1067 const int OffsetToFirstCalleeSaveFromFP =
1068 AFI->getCalleeSaveBaseToFrameRecordOffset() -
1069 AFI->getCalleeSavedStackSize();
1070 Register FramePtr = RegInfo.getFrameRegister(MF);
1071 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
1072 .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
1073}
1074
1075void AArch64PrologueEmitter::emitWindowsStackProbe(
1076 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
1077 int64_t RealignmentPadding) const {
1078 if (AFI->getSVECalleeSavedStackSize())
1079 report_fatal_error("SVE callee saves not yet supported with stack probing");
1080
1081 // Find an available register to spill the value of X15 to, if X15 is being
1082 // used already for nest.
1083 unsigned X15Scratch = AArch64::NoRegister;
1084 if (llvm::any_of(MBB.liveins(),
1085 [this](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
1086 return RegInfo.isSuperOrSubRegisterEq(AArch64::X15,
1087 LiveIn.PhysReg);
1088 })) {
1089 X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
1090 assert(X15Scratch != AArch64::NoRegister &&
1091 (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
1092#ifndef NDEBUG
1093 LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
1094#endif
1095 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
1096 .addReg(AArch64::XZR)
1097 .addReg(AArch64::X15, RegState::Undef)
1098 .addReg(AArch64::X15, RegState::Implicit)
1100 }
1101
1102 uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
1103 if (NeedsWinCFI) {
1104 HasWinCFI = true;
1105 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1106 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1107 // This is at most two instructions, MOVZ followed by MOVK.
1108 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1109 // exceeding 256MB in size.
1110 if (NumBytes >= (1 << 28))
1111 report_fatal_error("Stack size cannot exceed 256MB for stack "
1112 "unwinding purposes");
1113
1114 uint32_t LowNumWords = NumWords & 0xFFFF;
1115 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1116 .addImm(LowNumWords)
1119 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1121 if ((NumWords & 0xFFFF0000) != 0) {
1122 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1123 .addReg(AArch64::X15)
1124 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1127 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1129 }
1130 } else {
1131 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1132 .addImm(NumWords)
1134 }
1135
1136 const char *ChkStk = Subtarget.getChkStkName();
1137 switch (MF.getTarget().getCodeModel()) {
1138 case CodeModel::Tiny:
1139 case CodeModel::Small:
1140 case CodeModel::Medium:
1141 case CodeModel::Kernel:
1142 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1143 .addExternalSymbol(ChkStk)
1144 .addReg(AArch64::X15, RegState::Implicit)
1145 .addReg(AArch64::X16,
1147 .addReg(AArch64::X17,
1149 .addReg(AArch64::NZCV,
1152 if (NeedsWinCFI) {
1153 HasWinCFI = true;
1154 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1156 }
1157 break;
1158 case CodeModel::Large:
1159 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1160 .addReg(AArch64::X16, RegState::Define)
1161 .addExternalSymbol(ChkStk)
1162 .addExternalSymbol(ChkStk)
1164 if (NeedsWinCFI) {
1165 HasWinCFI = true;
1166 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1168 }
1169
1171 .addReg(AArch64::X16, RegState::Kill)
1173 .addReg(AArch64::X16,
1175 .addReg(AArch64::X17,
1177 .addReg(AArch64::NZCV,
1180 if (NeedsWinCFI) {
1181 HasWinCFI = true;
1182 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1184 }
1185 break;
1186 }
1187
1188 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1189 .addReg(AArch64::SP, RegState::Kill)
1190 .addReg(AArch64::X15, RegState::Kill)
1193 if (NeedsWinCFI) {
1194 HasWinCFI = true;
1195 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1196 .addImm(NumBytes)
1198 }
1199 NumBytes = 0;
1200
1201 if (RealignmentPadding > 0) {
1202 if (RealignmentPadding >= 4096) {
1203 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
1204 .addReg(AArch64::X16, RegState::Define)
1205 .addImm(RealignmentPadding)
1207 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
1208 .addReg(AArch64::SP)
1209 .addReg(AArch64::X16, RegState::Kill)
1212 } else {
1213 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
1214 .addReg(AArch64::SP)
1215 .addImm(RealignmentPadding)
1216 .addImm(0)
1218 }
1219
1220 uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
1221 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1222 .addReg(AArch64::X15, RegState::Kill)
1224 AFI->setStackRealigned(true);
1225
1226 // No need for SEH instructions here; if we're realigning the stack,
1227 // we've set a frame pointer and already finished the SEH prologue.
1229 }
1230 if (X15Scratch != AArch64::NoRegister) {
1231 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
1232 .addReg(AArch64::XZR)
1233 .addReg(X15Scratch, RegState::Undef)
1234 .addReg(X15Scratch, RegState::Implicit)
1236 }
1237}
1238
1239void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
1241 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1242 if (CSI.empty())
1243 return;
1244
1245 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1246 for (const auto &Info : CSI) {
1247 unsigned FrameIdx = Info.getFrameIdx();
1248 if (MFI.hasScalableStackID(FrameIdx))
1249 continue;
1250
1251 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1252 int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
1253 CFIBuilder.buildOffset(Info.getReg(), Offset);
1254 }
1255}
1256
1257void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
1259 // Add callee saved registers to move list.
1260 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1261 if (CSI.empty())
1262 return;
1263
1264 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
1265
1266 std::optional<int64_t> IncomingVGOffsetFromDefCFA;
1267 if (AFL.requiresSaveVG(MF)) {
1268 auto IncomingVG = *find_if(
1269 reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
1270 IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
1271 AFL.getOffsetOfLocalArea();
1272 }
1273
1274 StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1275 for (const auto &Info : CSI) {
1276 int FI = Info.getFrameIdx();
1277 if (!MFI.hasScalableStackID(FI))
1278 continue;
1279
1280 // Not all unwinders may know about SVE registers, so assume the lowest
1281 // common denominator.
1282 assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
1283 MCRegister Reg = Info.getReg();
1284 if (!RegInfo.regNeedsCFI(Reg, Reg))
1285 continue;
1286
1287 StackOffset Offset =
1288 StackOffset::getScalable(MFI.getObjectOffset(FI)) -
1289 StackOffset::getFixed(AFI->getCalleeSavedStackSize(MFI));
1290
1291 // The scalable vectors are below (lower address) the scalable predicates
1292 // with split SVE objects, so we must subtract the size of the predicates.
1294 MFI.getStackID(FI) == TargetStackID::ScalableVector)
1295 Offset -= PPRStackSize;
1296
1297 CFIBuilder.insertCFIInst(
1298 createCFAOffset(RegInfo, Reg, Offset, IncomingVGOffsetFromDefCFA));
1299 }
1300}
1301
1303 switch (MI.getOpcode()) {
1304 default:
1305 return false;
1306 case AArch64::CATCHRET:
1307 case AArch64::CLEANUPRET:
1308 return true;
1309 }
1310}
1311
1316 EmitCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
1317 HomPrologEpilog = AFL.homogeneousPrologEpilog(MF, &MBB);
1318 SEHEpilogueStartI = MBB.end();
1319}
1320
1321void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI,
1323 // Other combinations could be supported, but are not currently needed.
1324 assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 &&
1325 "expected negative offset (with optional fixed portion)");
1326 Register Base = AArch64::FP;
1327 if (int64_t FixedOffset = Offset.getFixed()) {
1328 // If we have a negative fixed offset, we need to first subtract it in a
1329 // temporary register first (to avoid briefly deallocating the scalable
1330 // portion of the offset).
1331 Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1332 emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP,
1333 StackOffset::getFixed(FixedOffset), TII,
1335 }
1336 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base,
1337 StackOffset::getScalable(Offset.getScalable()), TII,
1339}
1340
1342 MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr();
1343 if (MBB.end() != EpilogueEndI) {
1344 DL = EpilogueEndI->getDebugLoc();
1345 IsFunclet = isFuncletReturnInstr(*EpilogueEndI);
1346 }
1347
1348 int64_t NumBytes =
1349 IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
1350
1351 // All calls are tail calls in GHC calling conv, and functions have no
1352 // prologue/epilogue.
1353 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1354 return;
1355
1356 // How much of the stack used by incoming arguments this function is expected
1357 // to restore in this particular epilogue.
1358 int64_t ArgumentStackToRestore = AFL.getArgumentStackToRestore(MF, MBB);
1359 bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
1360 MF.getFunction().isVarArg());
1361 unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1362
1363 int64_t AfterCSRPopSize = ArgumentStackToRestore;
1364 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1365 // We cannot rely on the local stack size set in emitPrologue if the function
1366 // has funclets, as funclets have different local stack size requirements, and
1367 // the current value set in emitPrologue may be that of the containing
1368 // function.
1369 if (MF.hasEHFunclets())
1370 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1371
1372 if (HomPrologEpilog) {
1374 auto FirstHomogenousEpilogI = MBB.getFirstTerminator();
1375 if (FirstHomogenousEpilogI != MBB.begin()) {
1376 auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI);
1377 if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1378 FirstHomogenousEpilogI = HomogeneousEpilog;
1379 }
1380
1381 // Adjust local stack
1382 emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP,
1383 StackOffset::getFixed(AFI->getLocalStackSize()), TII,
1385
1386 // SP has been already adjusted while restoring callee save regs.
1387 // We've bailed-out the case with adjusting SP for arguments.
1388 assert(AfterCSRPopSize == 0);
1389 return;
1390 }
1391
1392 bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1393
1394 unsigned ProloguePopSize = PrologueSaveSize;
1396 // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1397 // that needs to be popped until we reach the start of the SVE save area.
1398 // The "FixedObject" stack occurs after the SVE area and must be popped
1399 // later.
1400 ProloguePopSize -= FixedObject;
1401 AfterCSRPopSize += FixedObject;
1402 }
1403
1404 // Assume we can't combine the last pop with the sp restore.
1405 if (!CombineSPBump && ProloguePopSize != 0) {
1406 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1407 while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1408 AArch64InstrInfo::isSEHInstruction(*Pop) ||
1411 Pop = std::prev(Pop);
1412 // Converting the last ldp to a post-index ldp is valid only if the last
1413 // ldp's offset is 0.
1414 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1415 // If the offset is 0 and the AfterCSR pop is not actually trying to
1416 // allocate more stack for arguments (in space that an untimely interrupt
1417 // may clobber), convert it to a post-index ldp.
1418 if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
1420 Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1421 ProloguePopSize);
1423 MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1424 if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1425 ++AfterLastPop;
1426 // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1427 // callee-save non-SVE registers to move the stack pointer to the start of
1428 // the SVE area.
1429 emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1430 StackOffset::getFixed(ProloguePopSize), TII,
1432 &HasWinCFI);
1433 } else {
1434 // Otherwise, make sure to emit an add after the last ldp.
1435 // We're doing this by transferring the size to be restored from the
1436 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1437 // pops.
1438 AfterCSRPopSize += ProloguePopSize;
1439 }
1440 }
1441
1442 // Move past the restores of the callee-saved registers.
1443 // If we plan on combining the sp bump of the local stack size and the callee
1444 // save stack size, we might need to adjust the CSR save and restore offsets.
1445 MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator();
1446 MachineBasicBlock::iterator Begin = MBB.begin();
1447 while (FirstGPRRestoreI != Begin) {
1448 --FirstGPRRestoreI;
1449 if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) ||
1451 isPartOfSVECalleeSaves(FirstGPRRestoreI))) {
1452 ++FirstGPRRestoreI;
1453 break;
1454 } else if (CombineSPBump)
1455 fixupCalleeSaveRestoreStackOffset(*FirstGPRRestoreI,
1456 AFI->getLocalStackSize());
1457 }
1458
1459 if (NeedsWinCFI) {
1460 // Note that there are cases where we insert SEH opcodes in the
1461 // epilogue when we had no SEH opcodes in the prologue. For
1462 // example, when there is no stack frame but there are stack
1463 // arguments. Insert the SEH_EpilogStart and remove it later if it
1464 // we didn't emit any SEH opcodes to avoid generating WinCFI for
1465 // functions that don't need it.
1466 BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart))
1468 SEHEpilogueStartI = FirstGPRRestoreI;
1469 --SEHEpilogueStartI;
1470 }
1471
1472 // Determine the ranges of SVE callee-saves. This is done before emitting any
1473 // code at the end of the epilogue (for Swift async), which can get in the way
1474 // of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1475 auto [PPR, ZPR] = getSVEStackFrameSizes();
1476 auto [PPRRange, ZPRRange] = partitionSVECS(
1477 MBB,
1479 ? MBB.getFirstTerminator()
1480 : FirstGPRRestoreI,
1481 PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1482
1483 if (HasFP && AFI->hasSwiftAsyncContext())
1484 emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
1485
1486 // If there is a single SP update, insert it before the ret and we're done.
1487 if (CombineSPBump) {
1488 assert(!AFI->hasSVEStackSize() && "Cannot combine SP bump with SVE");
1489
1490 // When we are about to restore the CSRs, the CFA register is SP again.
1491 if (EmitCFI && HasFP)
1493 .buildDefCFA(AArch64::SP, NumBytes);
1494
1495 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1496 StackOffset::getFixed(NumBytes + AfterCSRPopSize), TII,
1498 EmitCFI, StackOffset::getFixed(NumBytes));
1499 return;
1500 }
1501
1502 NumBytes -= PrologueSaveSize;
1503 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1504
1505 StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
1506 SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR});
1507
1508 // Deallocate the SVE area.
1510 assert(!SVEAllocs.AfterPPRs &&
1511 "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord");
1512 // If the callee-save area is before FP, restoring the FP implicitly
1513 // deallocates non-callee-save SVE allocations. Otherwise, deallocate them
1514 // explicitly.
1515 if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1516 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1518 false, NeedsWinCFI, &HasWinCFI);
1519 }
1520
1521 // Deallocate callee-save SVE registers.
1522 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1524 false, NeedsWinCFI, &HasWinCFI);
1525 } else if (AFI->hasSVEStackSize()) {
1526 // If we have stack realignment or variable-sized objects we must use the FP
1527 // to restore SVE callee saves (as there is an unknown amount of
1528 // data/padding between the SP and SVE CS area).
1529 Register BaseForSVEDealloc =
1530 (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1531 : AArch64::SP;
1532 if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
1533 if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1534 // The offset from the frame-pointer to the start of the ZPR saves.
1535 StackOffset FPOffsetZPR =
1536 -SVECalleeSavesSize - PPR.LocalsSize -
1537 StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1538 // Deallocate the stack space space by moving the SP to the start of the
1539 // ZPR/PPR callee-save area.
1540 moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1541 }
1542 // With split SVE, the predicates are stored in a separate area above the
1543 // ZPR saves, so we must adjust the stack to the start of the PPRs.
1544 if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1545 // The offset from the frame-pointer to the start of the PPR saves.
1546 StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1547 // Move to the start of the PPR area.
1548 assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1549 emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1550 FPOffsetPPR, TII, MachineInstr::FrameDestroy);
1551 }
1552 } else if (BaseForSVEDealloc == AArch64::SP) {
1553 auto NonSVELocals = StackOffset::getFixed(NumBytes);
1554 auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) +
1555 SVEAllocs.totalSize();
1556
1557 if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) {
1558 // Deallocate non-SVE locals now. This is needed to reach the SVE callee
1559 // saves, but may also allow combining stack hazard bumps for split SVE.
1560 SVEAllocs.AfterZPRs += NonSVELocals;
1561 NumBytes -= NonSVELocals.getFixed();
1562 }
1563 // To deallocate the SVE stack adjust by the allocations in reverse.
1564 emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1566 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1567 CFAOffset);
1568 CFAOffset -= SVEAllocs.AfterZPRs;
1569 assert(PPRRange.Begin == ZPRRange.End &&
1570 "Expected PPR restores after ZPR");
1571 emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP,
1573 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1574 CFAOffset);
1575 CFAOffset -= SVEAllocs.AfterPPRs;
1576 emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP,
1578 false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1579 CFAOffset);
1580 }
1581
1582 if (EmitCFI)
1583 emitCalleeSavedSVERestores(
1584 SVELayout == SVEStackLayout::Split ? ZPRRange.End : PPRRange.End);
1585 }
1586
1587 if (!HasFP) {
1588 bool RedZone = AFL.canUseRedZone(MF);
1589 // If this was a redzone leaf function, we don't need to restore the
1590 // stack pointer (but we may need to pop stack args for fastcc).
1591 if (RedZone && AfterCSRPopSize == 0)
1592 return;
1593
1594 // Pop the local variables off the stack. If there are no callee-saved
1595 // registers, it means we are actually positioned at the terminator and can
1596 // combine stack increment for the locals and the stack increment for
1597 // callee-popped arguments into (possibly) a single instruction and be done.
1598 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1599 int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1600 if (NoCalleeSaveRestore)
1601 StackRestoreBytes += AfterCSRPopSize;
1602
1604 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1605 StackOffset::getFixed(StackRestoreBytes), TII,
1607 StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize));
1608
1609 // If we were able to combine the local stack pop with the argument pop,
1610 // then we're done.
1611 if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
1612 return;
1613
1614 NumBytes = 0;
1615 }
1616
1617 // Restore the original stack pointer.
1618 // FIXME: Rather than doing the math here, we should instead just use
1619 // non-post-indexed loads for the restores if we aren't actually going to
1620 // be able to save any instructions.
1621 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1623 MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP,
1624 StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()),
1626 } else if (NumBytes)
1627 emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1628 StackOffset::getFixed(NumBytes), TII,
1630
1631 // When we are about to restore the CSRs, the CFA register is SP again.
1632 if (EmitCFI && HasFP)
1634 .buildDefCFA(AArch64::SP, PrologueSaveSize);
1635
1636 // This must be placed after the callee-save restore code because that code
1637 // assumes the SP is at the same location as it was after the callee-save save
1638 // code in the prologue.
1639 if (AfterCSRPopSize) {
1640 assert(AfterCSRPopSize > 0 && "attempting to reallocate arg stack that an "
1641 "interrupt may have clobbered");
1642
1644 MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1646 false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1647 StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
1648 }
1649}
1650
1651bool AArch64EpilogueEmitter::shouldCombineCSRLocalStackBump(
1652 uint64_t StackBumpBytes) const {
1654 StackBumpBytes))
1655 return false;
1656 if (MBB.empty())
1657 return true;
1658
1659 // Disable combined SP bump if the last instruction is an MTE tag store. It
1660 // is almost always better to merge SP adjustment into those instructions.
1661 MachineBasicBlock::iterator LastI = MBB.getFirstTerminator();
1662 MachineBasicBlock::iterator Begin = MBB.begin();
1663 while (LastI != Begin) {
1664 --LastI;
1665 if (LastI->isTransient())
1666 continue;
1667 if (!LastI->getFlag(MachineInstr::FrameDestroy))
1668 break;
1669 }
1670 switch (LastI->getOpcode()) {
1671 case AArch64::STGloop:
1672 case AArch64::STZGloop:
1673 case AArch64::STGi:
1674 case AArch64::STZGi:
1675 case AArch64::ST2Gi:
1676 case AArch64::STZ2Gi:
1677 return false;
1678 default:
1679 return true;
1680 }
1681 llvm_unreachable("unreachable");
1682}
1683
1684void AArch64EpilogueEmitter::emitSwiftAsyncContextFramePointer(
1686 switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
1688 // Avoid the reload as it is GOT relative, and instead fall back to the
1689 // hardcoded value below. This allows a mismatch between the OS and
1690 // application without immediately terminating on the difference.
1691 [[fallthrough]];
1693 // We need to reset FP to its untagged state on return. Bit 60 is
1694 // currently used to show the presence of an extended frame.
1695
1696 // BIC x29, x29, #0x1000_0000_0000_0000
1697 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::ANDXri),
1698 AArch64::FP)
1699 .addUse(AArch64::FP)
1700 .addImm(0x10fe)
1702 if (NeedsWinCFI) {
1703 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1705 HasWinCFI = true;
1706 }
1707 break;
1708
1710 break;
1711 }
1712}
1713
1714void AArch64EpilogueEmitter::emitShadowCallStackEpilogue(
1716 // Shadow call stack epilog: ldr x30, [x18, #-8]!
1717 BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXpre))
1718 .addReg(AArch64::X18, RegState::Define)
1719 .addReg(AArch64::LR, RegState::Define)
1720 .addReg(AArch64::X18)
1721 .addImm(-8)
1723
1724 if (NeedsWinCFI)
1725 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1727
1728 if (AFI->needsAsyncDwarfUnwindInfo(MF))
1729 CFIInstBuilder(MBB, MBBI, MachineInstr::FrameDestroy)
1730 .buildRestore(AArch64::X18);
1731}
1732
1733void AArch64EpilogueEmitter::emitCalleeSavedRestores(
1734 MachineBasicBlock::iterator MBBI, bool SVE) const {
1735 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1736 if (CSI.empty())
1737 return;
1738
1739 CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameDestroy);
1740
1741 for (const auto &Info : CSI) {
1742 if (SVE != MFI.hasScalableStackID(Info.getFrameIdx()))
1743 continue;
1744
1745 MCRegister Reg = Info.getReg();
1746 if (SVE && !RegInfo.regNeedsCFI(Reg, Reg))
1747 continue;
1748
1749 CFIBuilder.buildRestore(Info.getReg());
1750 }
1751}
1752
1753void AArch64EpilogueEmitter::finalizeEpilogue() const {
1754 if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
1755 emitShadowCallStackEpilogue(MBB.getFirstTerminator(), DL);
1757 }
1758 if (EmitCFI)
1759 emitCalleeSavedGPRRestores(MBB.getFirstTerminator());
1760 if (AFI->shouldSignReturnAddress(MF)) {
1761 // If pac-ret+leaf is in effect, PAUTH_EPILOGUE pseudo instructions
1762 // are inserted by emitPacRetPlusLeafHardening().
1763 if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
1764 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1765 TII->get(AArch64::PAUTH_EPILOGUE))
1767 }
1768 // AArch64PointerAuth pass will insert SEH_PACSignLR
1770 }
1771 if (HasWinCFI) {
1772 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1774 if (!MF.hasWinCFI())
1775 MF.setHasWinCFI(true);
1776 }
1777 if (NeedsWinCFI) {
1778 assert(SEHEpilogueStartI != MBB.end());
1779 if (!HasWinCFI)
1780 MBB.erase(SEHEpilogueStartI);
1781 }
1782}
1783
1784} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the AArch64PrologueEmitter and AArch64EpilogueEmitter classes,...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains constants used for implementing Dwarf debug support.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static const unsigned FramePtr
AArch64EpilogueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
AArch64PrologueEmitter(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int CSStackSizeInc, bool EmitCFI, MachineInstr::MIFlag FrameFlag=MachineInstr::FrameSetup, int CFAOffset=0) const
bool isVGInstruction(MachineBasicBlock::iterator MBBI, const TargetLowering &TLI) const
AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64FrameLowering &AFL)
void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize) const
bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const
SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &)
const AArch64TargetLowering * getTargetLowering() const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void insertCFIInst(const MCCFIInstruction &CFIInst) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
A debug info location.
Definition DebugLoc.h:124
A set of physical registers with utility functions to track liveness when walking backward/forward th...
MachineInstrBundleIterator< MachineInstr > iterator
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
static MachineOperand CreateImm(int64_t Val)
const char * getSymbolName() const
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
const unsigned StackProbeMaxUnprobedStack
Maximum allowed number of unprobed bytes above SP at an ABI boundary.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, RTLIB::Libcall LC)
static bool isPartOfSVECalleeSaves(MachineBasicBlock::iterator I)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, LivePhysRegs &LiveRegs)
Collect live registers from the end of MI's parent up to (including) MI in LiveRegs.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
static bool isPartOfPPRCalleeSaves(MachineBasicBlock::iterator I)
@ Success
The lock was released successfully.
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
DWARFExpression::Operation Op
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
static SVEPartitions partitionSVECS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, StackOffset PPRCalleeSavesSize, StackOffset ZPRCalleeSavesSize, bool IsEpilogue)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
static bool isPartOfZPRCalleeSaves(MachineBasicBlock::iterator I)
static int64_t upperBound(StackOffset Size)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
MachineBasicBlock::iterator End
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 ZPR
struct llvm::SVEPartitions::@327166152017175235362202041204223104077330276266 PPR
MachineBasicBlock::iterator Begin