LLVM  13.0.0git
AArch64FrameLowering.cpp
Go to the documentation of this file.
1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
10 //
11 // On AArch64, stack frames are structured as follows:
12 //
13 // The stack grows downward.
14 //
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
17 // in the frame.
18 //
19 // At function entry, the "frame" looks as follows:
20 //
21 // | | Higher address
22 // |-----------------------------------|
23 // | |
24 // | arguments passed on the stack |
25 // | |
26 // |-----------------------------------| <- sp
27 // | | Lower address
28 //
29 //
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
34 // for completeness.
35 //
36 // | | Higher address
37 // |-----------------------------------|
38 // | |
39 // | arguments passed on the stack |
40 // | |
41 // |-----------------------------------|
42 // | |
43 // | (Win64 only) varargs from reg |
44 // | |
45 // |-----------------------------------|
46 // | |
47 // | callee-saved gpr registers | <--.
48 // | | | On Darwin platforms these
49 // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
50 // | | | (frame record first)
51 // | prev_fp, prev_lr | <--'
52 // | (a.k.a. "frame record") |
53 // |-----------------------------------| <- fp(=x29)
54 // | |
55 // | callee-saved fp/simd/SVE regs |
56 // | |
57 // |-----------------------------------|
58 // | |
59 // | SVE stack objects |
60 // | |
61 // |-----------------------------------|
62 // |.empty.space.to.make.part.below....|
63 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
64 // |.the.standard.16-byte.alignment....| compile time; if present)
65 // |-----------------------------------|
66 // | |
67 // | local variables of fixed size |
68 // | including spill slots |
69 // |-----------------------------------| <- bp(not defined by ABI,
70 // |.variable-sized.local.variables....| LLVM chooses X19)
71 // |.(VLAs)............................| (size of this area is unknown at
72 // |...................................| compile time)
73 // |-----------------------------------| <- sp
74 // | | Lower address
75 //
76 //
77 // To access the data in a frame, at-compile time, a constant offset must be
78 // computable from one of the pointers (fp, bp, sp) to access it. The size
79 // of the areas with a dotted background cannot be computed at compile-time
80 // if they are present, making it required to have all three of fp, bp and
81 // sp to be set up to be able to access all contents in the frame areas,
82 // assuming all of the frame areas are non-empty.
83 //
84 // For most functions, some of the frame areas are empty. For those functions,
85 // it may not be necessary to set up fp or bp:
86 // * A base pointer is definitely needed when there are both VLAs and local
87 // variables with more-than-default alignment requirements.
88 // * A frame pointer is definitely needed when there are local variables with
89 // more-than-default alignment requirements.
90 //
91 // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
92 // callee-saved area, since the unwind encoding does not allow for encoding
93 // this dynamically and existing tools depend on this layout. For other
94 // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
95 // area to allow SVE stack objects (allocated directly below the callee-saves,
96 // if available) to be accessed directly from the framepointer.
97 // The SVE spill/fill instructions have VL-scaled addressing modes such
98 // as:
99 // ldr z8, [fp, #-7 mul vl]
100 // For SVE the size of the vector length (VL) is not known at compile-time, so
101 // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
102 // layout, we don't need to add an unscaled offset to the framepointer before
103 // accessing the SVE object in the frame.
104 //
105 // In some cases when a base pointer is not strictly needed, it is generated
106 // anyway when offsets from the frame pointer to access local variables become
107 // so large that the offset can't be encoded in the immediate fields of loads
108 // or stores.
109 //
110 // Outgoing function arguments must be at the bottom of the stack frame when
111 // calling another function. If we do not have variable-sized stack objects, we
112 // can allocate a "reserved call frame" area at the bottom of the local
113 // variable area, large enough for all outgoing calls. If we do have VLAs, then
114 // the stack pointer must be decremented and incremented around each call to
115 // make space for the arguments below the VLAs.
116 //
117 // FIXME: also explain the redzone concept.
118 //
119 //===----------------------------------------------------------------------===//
120 
121 #include "AArch64FrameLowering.h"
122 #include "AArch64InstrInfo.h"
124 #include "AArch64RegisterInfo.h"
125 #include "AArch64Subtarget.h"
126 #include "AArch64TargetMachine.h"
128 #include "llvm/ADT/ScopeExit.h"
129 #include "llvm/ADT/SmallVector.h"
130 #include "llvm/ADT/Statistic.h"
146 #include "llvm/IR/Attributes.h"
147 #include "llvm/IR/CallingConv.h"
148 #include "llvm/IR/DataLayout.h"
149 #include "llvm/IR/DebugLoc.h"
150 #include "llvm/IR/Function.h"
151 #include "llvm/MC/MCAsmInfo.h"
152 #include "llvm/MC/MCDwarf.h"
154 #include "llvm/Support/Debug.h"
156 #include "llvm/Support/LEB128.h"
157 #include "llvm/Support/MathExtras.h"
161 #include <cassert>
162 #include <cstdint>
163 #include <iterator>
164 #include <vector>
165 
166 using namespace llvm;
167 
168 #define DEBUG_TYPE "frame-info"
169 
170 static cl::opt<bool> EnableRedZone("aarch64-redzone",
171  cl::desc("enable use of redzone on AArch64"),
172  cl::init(false), cl::Hidden);
173 
174 static cl::opt<bool>
175  ReverseCSRRestoreSeq("reverse-csr-restore-seq",
176  cl::desc("reverse the CSR restore sequence"),
177  cl::init(false), cl::Hidden);
178 
180  "stack-tagging-merge-settag",
181  cl::desc("merge settag instruction in function epilog"), cl::init(true),
182  cl::Hidden);
183 
184 static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
185  cl::desc("sort stack allocations"),
186  cl::init(true), cl::Hidden);
187 
189  "homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
190  cl::desc("Emit homogeneous prologue and epilogue for the size "
191  "optimization (default = off)"));
192 
193 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
194 
195 /// Returns the argument pop size.
199  bool IsTailCallReturn = false;
200  if (MBB.end() != MBBI) {
201  unsigned RetOpcode = MBBI->getOpcode();
202  IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
203  RetOpcode == AArch64::TCRETURNri ||
204  RetOpcode == AArch64::TCRETURNriBTI;
205  }
207 
208  uint64_t ArgumentPopSize = 0;
209  if (IsTailCallReturn) {
210  MachineOperand &StackAdjust = MBBI->getOperand(1);
211 
212  // For a tail-call in a callee-pops-arguments environment, some or all of
213  // the stack may actually be in use for the call's arguments, this is
214  // calculated during LowerCall and consumed here...
215  ArgumentPopSize = StackAdjust.getImm();
216  } else {
217  // ... otherwise the amount to pop is *all* of the argument space,
218  // conveniently stored in the MachineFunctionInfo by
219  // LowerFormalArguments. This will, of course, be zero for the C calling
220  // convention.
221  ArgumentPopSize = AFI->getArgumentStackToRestore();
222  }
223 
224  return ArgumentPopSize;
225 }
226 
228 static bool needsWinCFI(const MachineFunction &MF);
230 
231 /// Returns true if a homogeneous prolog or epilog code can be emitted
232 /// for the size optimization. If possible, a frame helper call is injected.
233 /// When Exit block is given, this check is for epilog.
234 bool AArch64FrameLowering::homogeneousPrologEpilog(
235  MachineFunction &MF, MachineBasicBlock *Exit) const {
236  if (!MF.getFunction().hasMinSize())
237  return false;
239  return false;
241  return false;
242  if (EnableRedZone)
243  return false;
244 
245  // TODO: Window is supported yet.
246  if (needsWinCFI(MF))
247  return false;
248  // TODO: SVE is not supported yet.
249  if (getSVEStackSize(MF))
250  return false;
251 
252  // Bail on stack adjustment needed on return for simplicity.
253  const MachineFrameInfo &MFI = MF.getFrameInfo();
254  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
255  if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF))
256  return false;
257  if (Exit && getArgumentPopSize(MF, *Exit))
258  return false;
259 
260  return true;
261 }
262 
263 /// Returns true if CSRs should be paired.
264 bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
265  return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
266 }
267 
268 /// This is the biggest offset to the stack pointer we can encode in aarch64
269 /// instructions (without using a separate calculation and a temp register).
270 /// Note that the exception here are vector stores/loads which cannot encode any
271 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
272 static const unsigned DefaultSafeSPDisplacement = 255;
273 
274 /// Look at each instruction that references stack frames and return the stack
275 /// size limit beyond which some of these instructions will require a scratch
276 /// register during their expansion later.
278  // FIXME: For now, just conservatively guestimate based on unscaled indexing
279  // range. We'll end up allocating an unnecessary spill slot a lot, but
280  // realistically that's not a big deal at this stage of the game.
281  for (MachineBasicBlock &MBB : MF) {
282  for (MachineInstr &MI : MBB) {
283  if (MI.isDebugInstr() || MI.isPseudo() ||
284  MI.getOpcode() == AArch64::ADDXri ||
285  MI.getOpcode() == AArch64::ADDSXri)
286  continue;
287 
288  for (const MachineOperand &MO : MI.operands()) {
289  if (!MO.isFI())
290  continue;
291 
293  if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
295  return 0;
296  }
297  }
298  }
300 }
301 
305 }
306 
307 /// Returns the size of the fixed object area (allocated next to sp on entry)
308 /// On Win64 this may include a var args area and an UnwindHelp object for EH.
309 static unsigned getFixedObjectSize(const MachineFunction &MF,
310  const AArch64FunctionInfo *AFI, bool IsWin64,
311  bool IsFunclet) {
312  if (!IsWin64 || IsFunclet) {
313  // Only Win64 uses fixed objects, and then only for the function (not
314  // funclets)
315  return 0;
316  } else {
317  // Var args are stored here in the primary function.
318  const unsigned VarArgsArea = AFI->getVarArgsGPRSize();
319  // To support EH funclets we allocate an UnwindHelp object
320  const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0);
321  return alignTo(VarArgsArea + UnwindHelpObject, 16);
322  }
323 }
324 
325 /// Returns the size of the entire SVE stackframe (calleesaves + spills).
328  return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE());
329 }
330 
332  if (!EnableRedZone)
333  return false;
334 
335  // Don't use the red zone if the function explicitly asks us not to.
336  // This is typically used for kernel code.
337  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
338  const unsigned RedZoneSize =
339  Subtarget.getTargetLowering()->getRedZoneSize(MF.getFunction());
340  if (!RedZoneSize)
341  return false;
342 
343  const MachineFrameInfo &MFI = MF.getFrameInfo();
345  uint64_t NumBytes = AFI->getLocalStackSize();
346 
347  return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize ||
348  getSVEStackSize(MF));
349 }
350 
351 /// hasFP - Return true if the specified function should have a dedicated frame
352 /// pointer register.
354  const MachineFrameInfo &MFI = MF.getFrameInfo();
355  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
356  // Win64 EH requires a frame pointer if funclets are present, as the locals
357  // are accessed off the frame pointer in both the parent function and the
358  // funclets.
359  if (MF.hasEHFunclets())
360  return true;
361  // Retain behavior of always omitting the FP for leaf functions when possible.
363  return true;
364  if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
365  MFI.hasStackMap() || MFI.hasPatchPoint() ||
366  RegInfo->hasStackRealignment(MF))
367  return true;
368  // With large callframes around we may need to use FP to access the scavenging
369  // emergency spillslot.
370  //
371  // Unfortunately some calls to hasFP() like machine verifier ->
372  // getReservedReg() -> hasFP in the middle of global isel are too early
373  // to know the max call frame size. Hopefully conservatively returning "true"
374  // in those cases is fine.
375  // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
376  if (!MFI.isMaxCallFrameSizeComputed() ||
378  return true;
379 
380  return false;
381 }
382 
383 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
384 /// not required, we reserve argument space for call sites in the function
385 /// immediately on entry to the current function. This eliminates the need for
386 /// add/sub sp brackets around call sites. Returns true if the call frame is
387 /// included as part of the stack frame.
388 bool
390  return !MF.getFrameInfo().hasVarSizedObjects();
391 }
392 
396  const AArch64InstrInfo *TII =
397  static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
398  DebugLoc DL = I->getDebugLoc();
399  unsigned Opc = I->getOpcode();
400  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
401  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
402 
403  if (!hasReservedCallFrame(MF)) {
404  int64_t Amount = I->getOperand(0).getImm();
405  Amount = alignTo(Amount, getStackAlign());
406  if (!IsDestroy)
407  Amount = -Amount;
408 
409  // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
410  // doesn't have to pop anything), then the first operand will be zero too so
411  // this adjustment is a no-op.
412  if (CalleePopAmount == 0) {
413  // FIXME: in-function stack adjustment for calls is limited to 24-bits
414  // because there's no guaranteed temporary register available.
415  //
416  // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
417  // 1) For offset <= 12-bit, we use LSL #0
418  // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
419  // LSL #0, and the other uses LSL #12.
420  //
421  // Most call frames will be allocated at the start of a function so
422  // this is OK, but it is a limitation that needs dealing with.
423  assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
424  emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
425  StackOffset::getFixed(Amount), TII);
426  }
427  } else if (CalleePopAmount != 0) {
428  // If the calling convention demands that the callee pops arguments from the
429  // stack, we want to add it back if we have a reserved call frame.
430  assert(CalleePopAmount < 0xffffff && "call frame too large");
431  emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
432  StackOffset::getFixed(-(int64_t)CalleePopAmount), TII);
433  }
434  return MBB.erase(I);
435 }
436 
437 // Convenience function to create a DWARF expression for
438 // Expr + NumBytes + NumVGScaledBytes * AArch64::VG
440  int NumBytes, int NumVGScaledBytes, unsigned VG,
441  llvm::raw_string_ostream &Comment) {
442  uint8_t buffer[16];
443 
444  if (NumBytes) {
445  Expr.push_back(dwarf::DW_OP_consts);
446  Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
447  Expr.push_back((uint8_t)dwarf::DW_OP_plus);
448  Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
449  }
450 
451  if (NumVGScaledBytes) {
452  Expr.push_back((uint8_t)dwarf::DW_OP_consts);
453  Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
454 
455  Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
456  Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
457  Expr.push_back(0);
458 
459  Expr.push_back((uint8_t)dwarf::DW_OP_mul);
460  Expr.push_back((uint8_t)dwarf::DW_OP_plus);
461 
462  Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
463  << std::abs(NumVGScaledBytes) << " * VG";
464  }
465 }
466 
467 // Creates an MCCFIInstruction:
468 // { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
469 MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
470  const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
471  int64_t NumBytes, NumVGScaledBytes;
473  NumVGScaledBytes);
474 
475  std::string CommentBuffer = "sp";
476  llvm::raw_string_ostream Comment(CommentBuffer);
477 
478  // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG)
479  SmallString<64> Expr;
480  Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31));
481  Expr.push_back(0);
482  appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
483  TRI.getDwarfRegNum(AArch64::VG, true), Comment);
484 
485  // Wrap this into DW_CFA_def_cfa.
486  SmallString<64> DefCfaExpr;
487  DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
488  uint8_t buffer[16];
489  DefCfaExpr.append(buffer,
490  buffer + encodeULEB128(Expr.size(), buffer));
491  DefCfaExpr.append(Expr.str());
492  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
493  Comment.str());
494 }
495 
496 MCCFIInstruction AArch64FrameLowering::createCfaOffset(
497  const TargetRegisterInfo &TRI, unsigned Reg,
498  const StackOffset &OffsetFromDefCFA) const {
499  int64_t NumBytes, NumVGScaledBytes;
501  OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
502 
503  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
504 
505  // Non-scalable offsets can use DW_CFA_offset directly.
506  if (!NumVGScaledBytes)
507  return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
508 
509  std::string CommentBuffer;
510  llvm::raw_string_ostream Comment(CommentBuffer);
511  Comment << printReg(Reg, &TRI) << " @ cfa";
512 
513  // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
514  SmallString<64> OffsetExpr;
515  appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
516  TRI.getDwarfRegNum(AArch64::VG, true), Comment);
517 
518  // Wrap this into DW_CFA_expression
519  SmallString<64> CfaExpr;
520  CfaExpr.push_back(dwarf::DW_CFA_expression);
521  uint8_t buffer[16];
522  CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
523  CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
524  CfaExpr.append(OffsetExpr.str());
525 
526  return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
527 }
528 
531  MachineFunction &MF = *MBB.getParent();
532  MachineFrameInfo &MFI = MF.getFrameInfo();
533  const TargetSubtargetInfo &STI = MF.getSubtarget();
534  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
535  const TargetInstrInfo *TII = STI.getInstrInfo();
537 
538  // Add callee saved registers to move list.
539  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
540  if (CSI.empty())
541  return;
542 
543  for (const auto &Info : CSI) {
544  unsigned Reg = Info.getReg();
545 
546  // Not all unwinders may know about SVE registers, so assume the lowest
547  // common demoninator.
548  unsigned NewReg;
549  if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg))
550  Reg = NewReg;
551  else
552  continue;
553 
555  if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::ScalableVector) {
557  Offset =
558  StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
560  } else {
561  Offset = StackOffset::getFixed(MFI.getObjectOffset(Info.getFrameIdx()) -
563  }
564  unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
565  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
566  .addCFIIndex(CFIIndex)
568  }
569 }
570 
571 // Find a scratch register that we can use at the start of the prologue to
572 // re-align the stack pointer. We avoid using callee-save registers since they
573 // may appear to be free when this is called from canUseAsPrologue (during
574 // shrink wrapping), but then no longer be free when this is called from
575 // emitPrologue.
576 //
577 // FIXME: This is a bit conservative, since in the above case we could use one
578 // of the callee-save registers as a scratch temp to re-align the stack pointer,
579 // but we would then have to make sure that we were in fact saving at least one
580 // callee-save register in the prologue, which is additional complexity that
581 // doesn't seem worth the benefit.
583  MachineFunction *MF = MBB->getParent();
584 
585  // If MBB is an entry block, use X9 as the scratch register
586  if (&MF->front() == MBB)
587  return AArch64::X9;
588 
589  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
590  const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
591  LivePhysRegs LiveRegs(TRI);
592  LiveRegs.addLiveIns(*MBB);
593 
594  // Mark callee saved registers as used so we will not choose them.
595  const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
596  for (unsigned i = 0; CSRegs[i]; ++i)
597  LiveRegs.addReg(CSRegs[i]);
598 
599  // Prefer X9 since it was historically used for the prologue scratch reg.
600  const MachineRegisterInfo &MRI = MF->getRegInfo();
601  if (LiveRegs.available(MRI, AArch64::X9))
602  return AArch64::X9;
603 
604  for (unsigned Reg : AArch64::GPR64RegClass) {
605  if (LiveRegs.available(MRI, Reg))
606  return Reg;
607  }
608  return AArch64::NoRegister;
609 }
610 
612  const MachineBasicBlock &MBB) const {
613  const MachineFunction *MF = MBB.getParent();
614  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
615  const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
616  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
617 
618  // Don't need a scratch register if we're not going to re-align the stack.
619  if (!RegInfo->hasStackRealignment(*MF))
620  return true;
621  // Otherwise, we can use any block as long as it has a scratch register
622  // available.
623  return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
624 }
625 
627  uint64_t StackSizeInBytes) {
628  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
629  if (!Subtarget.isTargetWindows())
630  return false;
631  const Function &F = MF.getFunction();
632  // TODO: When implementing stack protectors, take that into account
633  // for the probe threshold.
634  unsigned StackProbeSize = 4096;
635  if (F.hasFnAttribute("stack-probe-size"))
636  F.getFnAttribute("stack-probe-size")
637  .getValueAsString()
638  .getAsInteger(0, StackProbeSize);
639  return (StackSizeInBytes >= StackProbeSize) &&
640  !F.hasFnAttribute("no-stack-arg-probe");
641 }
642 
643 static bool needsWinCFI(const MachineFunction &MF) {
644  const Function &F = MF.getFunction();
645  return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
646  F.needsUnwindTableEntry();
647 }
648 
649 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
650  MachineFunction &MF, uint64_t StackBumpBytes) const {
652  const MachineFrameInfo &MFI = MF.getFrameInfo();
653  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
654  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
655  if (homogeneousPrologEpilog(MF))
656  return false;
657 
658  if (AFI->getLocalStackSize() == 0)
659  return false;
660 
661  // For WinCFI, if optimizing for size, prefer to not combine the stack bump
662  // (to force a stp with predecrement) to match the packed unwind format,
663  // provided that there actually are any callee saved registers to merge the
664  // decrement with.
665  // This is potentially marginally slower, but allows using the packed
666  // unwind format for functions that both have a local area and callee saved
667  // registers. Using the packed unwind format notably reduces the size of
668  // the unwind info.
669  if (needsWinCFI(MF) && AFI->getCalleeSavedStackSize() > 0 &&
670  MF.getFunction().hasOptSize())
671  return false;
672 
673  // 512 is the maximum immediate for stp/ldp that will be used for
674  // callee-save save/restores
675  if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
676  return false;
677 
678  if (MFI.hasVarSizedObjects())
679  return false;
680 
681  if (RegInfo->hasStackRealignment(MF))
682  return false;
683 
684  // This isn't strictly necessary, but it simplifies things a bit since the
685  // current RedZone handling code assumes the SP is adjusted by the
686  // callee-save save/restore code.
687  if (canUseRedZone(MF))
688  return false;
689 
690  // When there is an SVE area on the stack, always allocate the
691  // callee-saves and spills/locals separately.
692  if (getSVEStackSize(MF))
693  return false;
694 
695  return true;
696 }
697 
698 bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
699  MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
700  if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
701  return false;
702 
703  if (MBB.empty())
704  return true;
705 
706  // Disable combined SP bump if the last instruction is an MTE tag store. It
707  // is almost always better to merge SP adjustment into those instructions.
710  while (LastI != Begin) {
711  --LastI;
712  if (LastI->isTransient())
713  continue;
714  if (!LastI->getFlag(MachineInstr::FrameDestroy))
715  break;
716  }
717  switch (LastI->getOpcode()) {
718  case AArch64::STGloop:
719  case AArch64::STZGloop:
720  case AArch64::STGOffset:
721  case AArch64::STZGOffset:
722  case AArch64::ST2GOffset:
723  case AArch64::STZ2GOffset:
724  return false;
725  default:
726  return true;
727  }
728  llvm_unreachable("unreachable");
729 }
730 
731 // Given a load or a store instruction, generate an appropriate unwinding SEH
732 // code on Windows.
734  const TargetInstrInfo &TII,
736  unsigned Opc = MBBI->getOpcode();
738  MachineFunction &MF = *MBB->getParent();
739  DebugLoc DL = MBBI->getDebugLoc();
740  unsigned ImmIdx = MBBI->getNumOperands() - 1;
741  int Imm = MBBI->getOperand(ImmIdx).getImm();
743  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
744  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
745 
746  switch (Opc) {
747  default:
748  llvm_unreachable("No SEH Opcode for this instruction");
749  case AArch64::LDPDpost:
750  Imm = -Imm;
752  case AArch64::STPDpre: {
753  unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
754  unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
755  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
756  .addImm(Reg0)
757  .addImm(Reg1)
758  .addImm(Imm * 8)
759  .setMIFlag(Flag);
760  break;
761  }
762  case AArch64::LDPXpost:
763  Imm = -Imm;
765  case AArch64::STPXpre: {
766  Register Reg0 = MBBI->getOperand(1).getReg();
767  Register Reg1 = MBBI->getOperand(2).getReg();
768  if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
769  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
770  .addImm(Imm * 8)
771  .setMIFlag(Flag);
772  else
773  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
774  .addImm(RegInfo->getSEHRegNum(Reg0))
775  .addImm(RegInfo->getSEHRegNum(Reg1))
776  .addImm(Imm * 8)
777  .setMIFlag(Flag);
778  break;
779  }
780  case AArch64::LDRDpost:
781  Imm = -Imm;
783  case AArch64::STRDpre: {
784  unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
785  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
786  .addImm(Reg)
787  .addImm(Imm)
788  .setMIFlag(Flag);
789  break;
790  }
791  case AArch64::LDRXpost:
792  Imm = -Imm;
794  case AArch64::STRXpre: {
795  unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
796  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
797  .addImm(Reg)
798  .addImm(Imm)
799  .setMIFlag(Flag);
800  break;
801  }
802  case AArch64::STPDi:
803  case AArch64::LDPDi: {
804  unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
805  unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
806  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
807  .addImm(Reg0)
808  .addImm(Reg1)
809  .addImm(Imm * 8)
810  .setMIFlag(Flag);
811  break;
812  }
813  case AArch64::STPXi:
814  case AArch64::LDPXi: {
815  Register Reg0 = MBBI->getOperand(0).getReg();
816  Register Reg1 = MBBI->getOperand(1).getReg();
817  if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
818  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
819  .addImm(Imm * 8)
820  .setMIFlag(Flag);
821  else
822  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
823  .addImm(RegInfo->getSEHRegNum(Reg0))
824  .addImm(RegInfo->getSEHRegNum(Reg1))
825  .addImm(Imm * 8)
826  .setMIFlag(Flag);
827  break;
828  }
829  case AArch64::STRXui:
830  case AArch64::LDRXui: {
831  int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
832  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
833  .addImm(Reg)
834  .addImm(Imm * 8)
835  .setMIFlag(Flag);
836  break;
837  }
838  case AArch64::STRDui:
839  case AArch64::LDRDui: {
840  unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
841  MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
842  .addImm(Reg)
843  .addImm(Imm * 8)
844  .setMIFlag(Flag);
845  break;
846  }
847  }
848  auto I = MBB->insertAfter(MBBI, MIB);
849  return I;
850 }
851 
852 // Fix up the SEH opcode associated with the save/restore instruction.
854  unsigned LocalStackSize) {
855  MachineOperand *ImmOpnd = nullptr;
856  unsigned ImmIdx = MBBI->getNumOperands() - 1;
857  switch (MBBI->getOpcode()) {
858  default:
859  llvm_unreachable("Fix the offset in the SEH instruction");
860  case AArch64::SEH_SaveFPLR:
861  case AArch64::SEH_SaveRegP:
862  case AArch64::SEH_SaveReg:
863  case AArch64::SEH_SaveFRegP:
864  case AArch64::SEH_SaveFReg:
865  ImmOpnd = &MBBI->getOperand(ImmIdx);
866  break;
867  }
868  if (ImmOpnd)
869  ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
870 }
871 
872 // Convert callee-save register save/restore instruction to do stack pointer
873 // decrement/increment to allocate/deallocate the callee-save stack area by
874 // converting store/load to use pre/post increment version.
877  const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
878  bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
879  // Ignore instructions that do not operate on SP, i.e. shadow call stack
880  // instructions and associated CFI instruction.
881  while (MBBI->getOpcode() == AArch64::STRXpost ||
882  MBBI->getOpcode() == AArch64::LDRXpre ||
883  MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
884  if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
885  assert(MBBI->getOperand(0).getReg() != AArch64::SP);
886  ++MBBI;
887  }
888  unsigned NewOpc;
889  int Scale = 1;
890  switch (MBBI->getOpcode()) {
891  default:
892  llvm_unreachable("Unexpected callee-save save/restore opcode!");
893  case AArch64::STPXi:
894  NewOpc = AArch64::STPXpre;
895  Scale = 8;
896  break;
897  case AArch64::STPDi:
898  NewOpc = AArch64::STPDpre;
899  Scale = 8;
900  break;
901  case AArch64::STPQi:
902  NewOpc = AArch64::STPQpre;
903  Scale = 16;
904  break;
905  case AArch64::STRXui:
906  NewOpc = AArch64::STRXpre;
907  break;
908  case AArch64::STRDui:
909  NewOpc = AArch64::STRDpre;
910  break;
911  case AArch64::STRQui:
912  NewOpc = AArch64::STRQpre;
913  break;
914  case AArch64::LDPXi:
915  NewOpc = AArch64::LDPXpost;
916  Scale = 8;
917  break;
918  case AArch64::LDPDi:
919  NewOpc = AArch64::LDPDpost;
920  Scale = 8;
921  break;
922  case AArch64::LDPQi:
923  NewOpc = AArch64::LDPQpost;
924  Scale = 16;
925  break;
926  case AArch64::LDRXui:
927  NewOpc = AArch64::LDRXpost;
928  break;
929  case AArch64::LDRDui:
930  NewOpc = AArch64::LDRDpost;
931  break;
932  case AArch64::LDRQui:
933  NewOpc = AArch64::LDRQpost;
934  break;
935  }
936  // Get rid of the SEH code associated with the old instruction.
937  if (NeedsWinCFI) {
938  auto SEH = std::next(MBBI);
940  SEH->eraseFromParent();
941  }
942 
943  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
944  MIB.addReg(AArch64::SP, RegState::Define);
945 
946  // Copy all operands other than the immediate offset.
947  unsigned OpndIdx = 0;
948  for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
949  ++OpndIdx)
950  MIB.add(MBBI->getOperand(OpndIdx));
951 
952  assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
953  "Unexpected immediate offset in first/last callee-save save/restore "
954  "instruction!");
955  assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
956  "Unexpected base register in callee-save save/restore instruction!");
957  assert(CSStackSizeInc % Scale == 0);
958  MIB.addImm(CSStackSizeInc / Scale);
959 
960  MIB.setMIFlags(MBBI->getFlags());
961  MIB.setMemRefs(MBBI->memoperands());
962 
963  // Generate a new SEH code that corresponds to the new instruction.
964  if (NeedsWinCFI) {
965  *HasWinCFI = true;
966  InsertSEH(*MIB, *TII,
968  }
969 
970  return std::prev(MBB.erase(MBBI));
971 }
972 
973 // Fixup callee-save register save/restore instructions to take into account
974 // combined SP bump by adding the local stack size to the stack offsets.
976  uint64_t LocalStackSize,
977  bool NeedsWinCFI,
978  bool *HasWinCFI) {
980  return;
981 
982  unsigned Opc = MI.getOpcode();
983 
984  // Ignore instructions that do not operate on SP, i.e. shadow call stack
985  // instructions and associated CFI instruction.
986  if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
987  Opc == AArch64::CFI_INSTRUCTION) {
988  if (Opc != AArch64::CFI_INSTRUCTION)
989  assert(MI.getOperand(0).getReg() != AArch64::SP);
990  return;
991  }
992 
993  unsigned Scale;
994  switch (Opc) {
995  case AArch64::STPXi:
996  case AArch64::STRXui:
997  case AArch64::STPDi:
998  case AArch64::STRDui:
999  case AArch64::LDPXi:
1000  case AArch64::LDRXui:
1001  case AArch64::LDPDi:
1002  case AArch64::LDRDui:
1003  Scale = 8;
1004  break;
1005  case AArch64::STPQi:
1006  case AArch64::STRQui:
1007  case AArch64::LDPQi:
1008  case AArch64::LDRQui:
1009  Scale = 16;
1010  break;
1011  default:
1012  llvm_unreachable("Unexpected callee-save save/restore opcode!");
1013  }
1014 
1015  unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
1016  assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
1017  "Unexpected base register in callee-save save/restore instruction!");
1018  // Last operand is immediate offset that needs fixing.
1019  MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
1020  // All generated opcodes have scaled offsets.
1021  assert(LocalStackSize % Scale == 0);
1022  OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
1023 
1024  if (NeedsWinCFI) {
1025  *HasWinCFI = true;
1026  auto MBBI = std::next(MachineBasicBlock::iterator(MI));
1027  assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
1029  "Expecting a SEH instruction");
1030  fixupSEHOpcode(MBBI, LocalStackSize);
1031  }
1032 }
1033 
1035  MachineBasicBlock::iterator FirstSPPopI,
1036  MachineBasicBlock::iterator LastPopI) {
1037  // Sometimes (when we restore in the same order as we save), we can end up
1038  // with code like this:
1039  //
1040  // ldp x26, x25, [sp]
1041  // ldp x24, x23, [sp, #16]
1042  // ldp x22, x21, [sp, #32]
1043  // ldp x20, x19, [sp, #48]
1044  // add sp, sp, #64
1045  //
1046  // In this case, it is always better to put the first ldp at the end, so
1047  // that the load-store optimizer can run and merge the ldp and the add into
1048  // a post-index ldp.
1049  // If we managed to grab the first pop instruction, move it to the end.
1051  MBB.splice(FirstSPPopI, &MBB, LastPopI);
1052  // We should end up with something like this now:
1053  //
1054  // ldp x24, x23, [sp, #16]
1055  // ldp x22, x21, [sp, #32]
1056  // ldp x20, x19, [sp, #48]
1057  // ldp x26, x25, [sp]
1058  // add sp, sp, #64
1059  //
1060  // and the load-store optimizer can merge the last two instructions into:
1061  //
1062  // ldp x26, x25, [sp], #64
1063  //
1064 }
1065 
1066 static bool isTargetWindows(const MachineFunction &MF) {
1068 }
1069 
1070 // Convenience function to determine whether I is an SVE callee save.
1072  switch (I->getOpcode()) {
1073  default:
1074  return false;
1075  case AArch64::STR_ZXI:
1076  case AArch64::STR_PXI:
1077  case AArch64::LDR_ZXI:
1078  case AArch64::LDR_PXI:
1079  return I->getFlag(MachineInstr::FrameSetup) ||
1080  I->getFlag(MachineInstr::FrameDestroy);
1081  }
1082 }
1083 
1085  MachineBasicBlock &MBB) const {
1087  const MachineFrameInfo &MFI = MF.getFrameInfo();
1088  const Function &F = MF.getFunction();
1089  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1090  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1091  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1092  MachineModuleInfo &MMI = MF.getMMI();
1094  bool needsFrameMoves =
1096  bool HasFP = hasFP(MF);
1097  bool NeedsWinCFI = needsWinCFI(MF);
1098  bool HasWinCFI = false;
1099  auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
1100 
1101  bool IsFunclet = MBB.isEHFuncletEntry();
1102 
1103  // At this point, we're going to decide whether or not the function uses a
1104  // redzone. In most cases, the function doesn't have a redzone so let's
1105  // assume that's false and set it to true in the case that there's a redzone.
1106  AFI->setHasRedZone(false);
1107 
1108  // Debug location must be unknown since the first debug location is used
1109  // to determine the end of the prologue.
1110  DebugLoc DL;
1111 
1112  const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
1113  if (MFnI.shouldSignReturnAddress()) {
1114  if (MFnI.shouldSignWithBKey()) {
1115  BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
1117  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
1119  } else {
1120  BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
1122  }
1123 
1124  unsigned CFIIndex =
1126  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1127  .addCFIIndex(CFIIndex)
1129  }
1130 
1131  // All calls are tail calls in GHC calling conv, and functions have no
1132  // prologue/epilogue.
1134  return;
1135 
1136  // Set tagged base pointer to the requested stack slot.
1137  // Ideally it should match SP value after prologue.
1139  if (TBPI)
1140  AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
1141  else
1143 
1144  const StackOffset &SVEStackSize = getSVEStackSize(MF);
1145 
1146  // getStackSize() includes all the locals in its size calculation. We don't
1147  // include these locals when computing the stack size of a funclet, as they
1148  // are allocated in the parent's stack frame and accessed via the frame
1149  // pointer from the funclet. We only save the callee saved registers in the
1150  // funclet, which are really the callee saved registers of the parent
1151  // function, including the funclet.
1152  int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
1153  : MFI.getStackSize();
1154  if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
1155  assert(!HasFP && "unexpected function without stack frame but with FP");
1156  assert(!SVEStackSize &&
1157  "unexpected function without stack frame but with SVE objects");
1158  // All of the stack allocation is for locals.
1159  AFI->setLocalStackSize(NumBytes);
1160  if (!NumBytes)
1161  return;
1162  // REDZONE: If the stack size is less than 128 bytes, we don't need
1163  // to actually allocate.
1164  if (canUseRedZone(MF)) {
1165  AFI->setHasRedZone(true);
1166  ++NumRedZoneFunctions;
1167  } else {
1168  emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1169  StackOffset::getFixed(-NumBytes), TII,
1170  MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1171  if (!NeedsWinCFI && needsFrameMoves) {
1172  // Label used to tie together the PROLOG_LABEL and the MachineMoves.
1173  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
1174  // Encode the stack size of the leaf function.
1175  unsigned CFIIndex = MF.addFrameInst(
1176  MCCFIInstruction::cfiDefCfaOffset(FrameLabel, NumBytes));
1177  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1178  .addCFIIndex(CFIIndex)
1180  }
1181  }
1182 
1183  if (NeedsWinCFI) {
1184  HasWinCFI = true;
1185  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1187  }
1188 
1189  return;
1190  }
1191 
1192  bool IsWin64 =
1193  Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1194  unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1195 
1196  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1197  // All of the remaining stack allocations are for locals.
1198  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1199  bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1200  bool HomPrologEpilog = homogeneousPrologEpilog(MF);
1201  if (CombineSPBump) {
1202  assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1203  emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
1204  StackOffset::getFixed(-NumBytes), TII,
1205  MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1206  NumBytes = 0;
1207  } else if (HomPrologEpilog) {
1208  // Stack has been already adjusted.
1209  NumBytes -= PrologueSaveSize;
1210  } else if (PrologueSaveSize != 0) {
1212  MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
1213  NumBytes -= PrologueSaveSize;
1214  }
1215  assert(NumBytes >= 0 && "Negative stack allocation size!?");
1216 
1217  // Move past the saves of the callee-saved registers, fixing up the offsets
1218  // and pre-inc if we decided to combine the callee-save and local stack
1219  // pointer bump above.
1221  while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
1222  !IsSVECalleeSave(MBBI)) {
1223  if (CombineSPBump)
1225  NeedsWinCFI, &HasWinCFI);
1226  ++MBBI;
1227  }
1228 
1229  // For funclets the FP belongs to the containing function.
1230  if (!IsFunclet && HasFP) {
1231  // Only set up FP if we actually need to.
1232  int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
1233 
1234  if (CombineSPBump)
1235  FPOffset += AFI->getLocalStackSize();
1236 
1237  if (HomPrologEpilog) {
1238  auto Prolog = MBBI;
1239  --Prolog;
1240  assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1241  Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1242  } else {
1243  // Issue sub fp, sp, FPOffset or
1244  // mov fp,sp when FPOffset is zero.
1245  // Note: All stores of callee-saved registers are marked as "FrameSetup".
1246  // This code marks the instruction(s) that set the FP also.
1247  emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1248  StackOffset::getFixed(FPOffset), TII,
1249  MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1250  }
1251  }
1252 
1253  if (windowsRequiresStackProbe(MF, NumBytes)) {
1254  uint64_t NumWords = NumBytes >> 4;
1255  if (NeedsWinCFI) {
1256  HasWinCFI = true;
1257  // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1258  // exceed this amount. We need to move at most 2^24 - 1 into x15.
1259  // This is at most two instructions, MOVZ follwed by MOVK.
1260  // TODO: Fix to use multiple stack alloc unwind codes for stacks
1261  // exceeding 256MB in size.
1262  if (NumBytes >= (1 << 28))
1263  report_fatal_error("Stack size cannot exceed 256MB for stack "
1264  "unwinding purposes");
1265 
1266  uint32_t LowNumWords = NumWords & 0xFFFF;
1267  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1268  .addImm(LowNumWords)
1271  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1273  if ((NumWords & 0xFFFF0000) != 0) {
1274  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1275  .addReg(AArch64::X15)
1276  .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1279  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1281  }
1282  } else {
1283  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1284  .addImm(NumWords)
1286  }
1287 
1288  switch (MF.getTarget().getCodeModel()) {
1289  case CodeModel::Tiny:
1290  case CodeModel::Small:
1291  case CodeModel::Medium:
1292  case CodeModel::Kernel:
1293  BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1294  .addExternalSymbol("__chkstk")
1295  .addReg(AArch64::X15, RegState::Implicit)
1300  if (NeedsWinCFI) {
1301  HasWinCFI = true;
1302  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1304  }
1305  break;
1306  case CodeModel::Large:
1307  BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1308  .addReg(AArch64::X16, RegState::Define)
1309  .addExternalSymbol("__chkstk")
1310  .addExternalSymbol("__chkstk")
1312  if (NeedsWinCFI) {
1313  HasWinCFI = true;
1314  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1316  }
1317 
1318  BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
1319  .addReg(AArch64::X16, RegState::Kill)
1320  .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1325  if (NeedsWinCFI) {
1326  HasWinCFI = true;
1327  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1329  }
1330  break;
1331  }
1332 
1333  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1334  .addReg(AArch64::SP, RegState::Kill)
1335  .addReg(AArch64::X15, RegState::Kill)
1338  if (NeedsWinCFI) {
1339  HasWinCFI = true;
1340  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1341  .addImm(NumBytes)
1343  }
1344  NumBytes = 0;
1345  }
1346 
1347  StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
1348  MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
1349 
1350  // Process the SVE callee-saves to determine what space needs to be
1351  // allocated.
1352  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
1353  // Find callee save instructions in frame.
1354  CalleeSavesBegin = MBBI;
1355  assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
1357  ++MBBI;
1358  CalleeSavesEnd = MBBI;
1359 
1360  AllocateBefore = StackOffset::getScalable(CalleeSavedSize);
1361  AllocateAfter = SVEStackSize - AllocateBefore;
1362  }
1363 
1364  // Allocate space for the callee saves (if any).
1365  emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
1366  -AllocateBefore, TII,
1368 
1369  // Finally allocate remaining SVE stack space.
1370  emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
1371  -AllocateAfter, TII,
1373 
1374  // Allocate space for the rest of the frame.
1375  if (NumBytes) {
1376  // Alignment is required for the parent frame, not the funclet
1377  const bool NeedsRealignment =
1378  !IsFunclet && RegInfo->hasStackRealignment(MF);
1379  unsigned scratchSPReg = AArch64::SP;
1380 
1381  if (NeedsRealignment) {
1382  scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1383  assert(scratchSPReg != AArch64::NoRegister);
1384  }
1385 
1386  // If we're a leaf function, try using the red zone.
1387  if (!canUseRedZone(MF))
1388  // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1389  // the correct value here, as NumBytes also includes padding bytes,
1390  // which shouldn't be counted here.
1391  emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
1392  StackOffset::getFixed(-NumBytes), TII,
1393  MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1394 
1395  if (NeedsRealignment) {
1396  const unsigned NrBitsToZero = Log2(MFI.getMaxAlign());
1397  assert(NrBitsToZero > 1);
1398  assert(scratchSPReg != AArch64::SP);
1399 
1400  // SUB X9, SP, NumBytes
1401  // -- X9 is temporary register, so shouldn't contain any live data here,
1402  // -- free to use. This is already produced by emitFrameOffset above.
1403  // AND SP, X9, 0b11111...0000
1404  // The logical immediates have a non-trivial encoding. The following
1405  // formula computes the encoded immediate with all ones but
1406  // NrBitsToZero zero bits as least significant bits.
1407  uint32_t andMaskEncoded = (1 << 12) // = N
1408  | ((64 - NrBitsToZero) << 6) // immr
1409  | ((64 - NrBitsToZero - 1) << 0); // imms
1410 
1411  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1412  .addReg(scratchSPReg, RegState::Kill)
1413  .addImm(andMaskEncoded);
1414  AFI->setStackRealigned(true);
1415  if (NeedsWinCFI) {
1416  HasWinCFI = true;
1417  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1418  .addImm(NumBytes & andMaskEncoded)
1420  }
1421  }
1422  }
1423 
1424  // If we need a base pointer, set it up here. It's whatever the value of the
1425  // stack pointer is at this point. Any variable size objects will be allocated
1426  // after this, so we can still use the base pointer to reference locals.
1427  //
1428  // FIXME: Clarify FrameSetup flags here.
1429  // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1430  // needed.
1431  // For funclets the BP belongs to the containing function.
1432  if (!IsFunclet && RegInfo->hasBasePointer(MF)) {
1433  TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1434  false);
1435  if (NeedsWinCFI) {
1436  HasWinCFI = true;
1437  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1439  }
1440  }
1441 
1442  // The very last FrameSetup instruction indicates the end of prologue. Emit a
1443  // SEH opcode indicating the prologue end.
1444  if (NeedsWinCFI && HasWinCFI) {
1445  BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1447  }
1448 
1449  // SEH funclets are passed the frame pointer in X1. If the parent
1450  // function uses the base register, then the base register is used
1451  // directly, and is not retrieved from X1.
1452  if (IsFunclet && F.hasPersonalityFn()) {
1453  EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
1454  if (isAsynchronousEHPersonality(Per)) {
1455  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
1456  .addReg(AArch64::X1)
1458  MBB.addLiveIn(AArch64::X1);
1459  }
1460  }
1461 
1462  if (needsFrameMoves) {
1463  // An example of the prologue:
1464  //
1465  // .globl __foo
1466  // .align 2
1467  // __foo:
1468  // Ltmp0:
1469  // .cfi_startproc
1470  // .cfi_personality 155, ___gxx_personality_v0
1471  // Leh_func_begin:
1472  // .cfi_lsda 16, Lexception33
1473  //
1474  // stp xa,bx, [sp, -#offset]!
1475  // ...
1476  // stp x28, x27, [sp, #offset-32]
1477  // stp fp, lr, [sp, #offset-16]
1478  // add fp, sp, #offset - 16
1479  // sub sp, sp, #1360
1480  //
1481  // The Stack:
1482  // +-------------------------------------------+
1483  // 10000 | ........ | ........ | ........ | ........ |
1484  // 10004 | ........ | ........ | ........ | ........ |
1485  // +-------------------------------------------+
1486  // 10008 | ........ | ........ | ........ | ........ |
1487  // 1000c | ........ | ........ | ........ | ........ |
1488  // +===========================================+
1489  // 10010 | X28 Register |
1490  // 10014 | X28 Register |
1491  // +-------------------------------------------+
1492  // 10018 | X27 Register |
1493  // 1001c | X27 Register |
1494  // +===========================================+
1495  // 10020 | Frame Pointer |
1496  // 10024 | Frame Pointer |
1497  // +-------------------------------------------+
1498  // 10028 | Link Register |
1499  // 1002c | Link Register |
1500  // +===========================================+
1501  // 10030 | ........ | ........ | ........ | ........ |
1502  // 10034 | ........ | ........ | ........ | ........ |
1503  // +-------------------------------------------+
1504  // 10038 | ........ | ........ | ........ | ........ |
1505  // 1003c | ........ | ........ | ........ | ........ |
1506  // +-------------------------------------------+
1507  //
1508  // [sp] = 10030 :: >>initial value<<
1509  // sp = 10020 :: stp fp, lr, [sp, #-16]!
1510  // fp = sp == 10020 :: mov fp, sp
1511  // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
1512  // sp == 10010 :: >>final value<<
1513  //
1514  // The frame pointer (w29) points to address 10020. If we use an offset of
1515  // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1516  // for w27, and -32 for w28:
1517  //
1518  // Ltmp1:
1519  // .cfi_def_cfa w29, 16
1520  // Ltmp2:
1521  // .cfi_offset w30, -8
1522  // Ltmp3:
1523  // .cfi_offset w29, -16
1524  // Ltmp4:
1525  // .cfi_offset w27, -24
1526  // Ltmp5:
1527  // .cfi_offset w28, -32
1528 
1529  if (HasFP) {
1530  const int OffsetToFirstCalleeSaveFromFP =
1532  AFI->getCalleeSavedStackSize();
1533  Register FramePtr = RegInfo->getFrameRegister(MF);
1534 
1535  // Define the current CFA rule to use the provided FP.
1536  unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1537  unsigned CFIIndex = MF.addFrameInst(
1538  MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
1539  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1540  .addCFIIndex(CFIIndex)
1542  } else {
1543  unsigned CFIIndex;
1544  if (SVEStackSize) {
1545  const TargetSubtargetInfo &STI = MF.getSubtarget();
1546  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1547  StackOffset TotalSize =
1548  SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
1549  CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
1550  } else {
1551  // Encode the stack size of the leaf function.
1552  CFIIndex = MF.addFrameInst(
1554  }
1555  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1556  .addCFIIndex(CFIIndex)
1558  }
1559 
1560  // Now emit the moves for whatever callee saved regs we have (including FP,
1561  // LR if those are saved).
1563  }
1564 }
1565 
1568  const auto &MFI = *MF.getInfo<AArch64FunctionInfo>();
1569  if (!MFI.shouldSignReturnAddress())
1570  return;
1571  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1572  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1573 
1575  DebugLoc DL;
1576  if (MBBI != MBB.end())
1577  DL = MBBI->getDebugLoc();
1578 
1579  // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1580  // this instruction can safely used for any v8a architecture.
1581  // From v8.3a onwards there are optimised authenticate LR and return
1582  // instructions, namely RETA{A,B}, that can be used instead.
1583  if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
1584  MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1585  BuildMI(MBB, MBBI, DL,
1586  TII->get(MFI.shouldSignWithBKey() ? AArch64::RETAB : AArch64::RETAA))
1587  .copyImplicitOps(*MBBI);
1588  MBB.erase(MBBI);
1589  } else {
1590  BuildMI(
1591  MBB, MBBI, DL,
1592  TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
1594  }
1595 }
1596 
1597 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1598  switch (MI.getOpcode()) {
1599  default:
1600  return false;
1601  case AArch64::CATCHRET:
1602  case AArch64::CLEANUPRET:
1603  return true;
1604  }
1605 }
1606 
1608  MachineBasicBlock &MBB) const {
1610  MachineFrameInfo &MFI = MF.getFrameInfo();
1611  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1612  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1613  DebugLoc DL;
1614  bool NeedsWinCFI = needsWinCFI(MF);
1615  bool HasWinCFI = false;
1616  bool IsFunclet = false;
1617  auto WinCFI = make_scope_exit([&]() { assert(HasWinCFI == MF.hasWinCFI()); });
1618 
1619  if (MBB.end() != MBBI) {
1620  DL = MBBI->getDebugLoc();
1621  IsFunclet = isFuncletReturnInstr(*MBBI);
1622  }
1623 
1624  int64_t NumBytes = IsFunclet ? getWinEHFuncletFrameSize(MF)
1625  : MFI.getStackSize();
1627 
1628  // All calls are tail calls in GHC calling conv, and functions have no
1629  // prologue/epilogue.
1631  return;
1632 
1633  // Initial and residual are named for consistency with the prologue. Note that
1634  // in the epilogue, the residual adjustment is executed first.
1635  uint64_t ArgumentPopSize = getArgumentPopSize(MF, MBB);
1636 
1637  // The stack frame should be like below,
1638  //
1639  // ---------------------- ---
1640  // | | |
1641  // | BytesInStackArgArea| CalleeArgStackSize
1642  // | (NumReusableBytes) | (of tail call)
1643  // | | ---
1644  // | | |
1645  // ---------------------| --- |
1646  // | | | |
1647  // | CalleeSavedReg | | |
1648  // | (CalleeSavedStackSize)| | |
1649  // | | | |
1650  // ---------------------| | NumBytes
1651  // | | StackSize (StackAdjustUp)
1652  // | LocalStackSize | | |
1653  // | (covering callee | | |
1654  // | args) | | |
1655  // | | | |
1656  // ---------------------- --- ---
1657  //
1658  // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1659  // = StackSize + ArgumentPopSize
1660  //
1661  // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1662  // it as the 2nd argument of AArch64ISD::TC_RETURN.
1663 
1664  auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1665 
1666  bool IsWin64 =
1667  Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1668  unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
1669 
1670  uint64_t AfterCSRPopSize = ArgumentPopSize;
1671  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1672  // We cannot rely on the local stack size set in emitPrologue if the function
1673  // has funclets, as funclets have different local stack size requirements, and
1674  // the current value set in emitPrologue may be that of the containing
1675  // function.
1676  if (MF.hasEHFunclets())
1677  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1678  if (homogeneousPrologEpilog(MF, &MBB)) {
1679  assert(!NeedsWinCFI);
1680  auto LastPopI = MBB.getFirstTerminator();
1681  if (LastPopI != MBB.begin()) {
1682  auto HomogeneousEpilog = std::prev(LastPopI);
1683  if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1684  LastPopI = HomogeneousEpilog;
1685  }
1686 
1687  // Adjust local stack
1688  emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1690  MachineInstr::FrameDestroy, false, NeedsWinCFI);
1691 
1692  // SP has been already adjusted while restoring callee save regs.
1693  // We've bailed-out the case with adjusting SP for arguments.
1694  assert(AfterCSRPopSize == 0);
1695  return;
1696  }
1697  bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
1698  // Assume we can't combine the last pop with the sp restore.
1699 
1700  if (!CombineSPBump && PrologueSaveSize != 0) {
1703  Pop = std::prev(Pop);
1704  // Converting the last ldp to a post-index ldp is valid only if the last
1705  // ldp's offset is 0.
1706  const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1707  // If the offset is 0, convert it to a post-index ldp.
1708  if (OffsetOp.getImm() == 0)
1710  MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1711  else {
1712  // If not, make sure to emit an add after the last ldp.
1713  // We're doing this by transfering the size to be restored from the
1714  // adjustment *before* the CSR pops to the adjustment *after* the CSR
1715  // pops.
1716  AfterCSRPopSize += PrologueSaveSize;
1717  }
1718  }
1719 
1720  // Move past the restores of the callee-saved registers.
1721  // If we plan on combining the sp bump of the local stack size and the callee
1722  // save stack size, we might need to adjust the CSR save and restore offsets.
1725  while (LastPopI != Begin) {
1726  --LastPopI;
1727  if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
1728  IsSVECalleeSave(LastPopI)) {
1729  ++LastPopI;
1730  break;
1731  } else if (CombineSPBump)
1733  NeedsWinCFI, &HasWinCFI);
1734  }
1735 
1736  if (MF.hasWinCFI()) {
1737  // If the prologue didn't contain any SEH opcodes and didn't set the
1738  // MF.hasWinCFI() flag, assume the epilogue won't either, and skip the
1739  // EpilogStart - to avoid generating CFI for functions that don't need it.
1740  // (And as we didn't generate any prologue at all, it would be asymmetrical
1741  // to the epilogue.) By the end of the function, we assert that
1742  // HasWinCFI is equal to MF.hasWinCFI(), to verify this assumption.
1743  HasWinCFI = true;
1744  BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1746  }
1747 
1748  const StackOffset &SVEStackSize = getSVEStackSize(MF);
1749 
1750  // If there is a single SP update, insert it before the ret and we're done.
1751  if (CombineSPBump) {
1752  assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1753  emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1754  StackOffset::getFixed(NumBytes + (int64_t)AfterCSRPopSize),
1755  TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
1756  &HasWinCFI);
1757  if (HasWinCFI)
1759  TII->get(AArch64::SEH_EpilogEnd))
1761  return;
1762  }
1763 
1764  NumBytes -= PrologueSaveSize;
1765  assert(NumBytes >= 0 && "Negative stack allocation size!?");
1766 
1767  // Process the SVE callee-saves to determine what space needs to be
1768  // deallocated.
1769  StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1770  MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
1771  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
1772  RestoreBegin = std::prev(RestoreEnd);
1773  while (RestoreBegin != MBB.begin() &&
1774  IsSVECalleeSave(std::prev(RestoreBegin)))
1775  --RestoreBegin;
1776 
1777  assert(IsSVECalleeSave(RestoreBegin) &&
1778  IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
1779 
1780  StackOffset CalleeSavedSizeAsOffset =
1781  StackOffset::getScalable(CalleeSavedSize);
1782  DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1783  DeallocateAfter = CalleeSavedSizeAsOffset;
1784  }
1785 
1786  // Deallocate the SVE area.
1787  if (SVEStackSize) {
1788  if (AFI->isStackRealigned()) {
1789  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize())
1790  // Set SP to start of SVE callee-save area from which they can
1791  // be reloaded. The code below will deallocate the stack space
1792  // space by moving FP -> SP.
1793  emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
1794  StackOffset::getScalable(-CalleeSavedSize), TII,
1796  } else {
1797  if (AFI->getSVECalleeSavedStackSize()) {
1798  // Deallocate the non-SVE locals first before we can deallocate (and
1799  // restore callee saves) from the SVE area.
1800  emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1801  StackOffset::getFixed(NumBytes), TII,
1803  NumBytes = 0;
1804  }
1805 
1806  emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1807  DeallocateBefore, TII, MachineInstr::FrameDestroy);
1808 
1809  emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1810  DeallocateAfter, TII, MachineInstr::FrameDestroy);
1811  }
1812  }
1813 
1814  if (!hasFP(MF)) {
1815  bool RedZone = canUseRedZone(MF);
1816  // If this was a redzone leaf function, we don't need to restore the
1817  // stack pointer (but we may need to pop stack args for fastcc).
1818  if (RedZone && AfterCSRPopSize == 0)
1819  return;
1820 
1821  bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1822  int64_t StackRestoreBytes = RedZone ? 0 : NumBytes;
1823  if (NoCalleeSaveRestore)
1824  StackRestoreBytes += AfterCSRPopSize;
1825 
1826  // If we were able to combine the local stack pop with the argument pop,
1827  // then we're done.
1828  bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1829 
1830  // If we're done after this, make sure to help the load store optimizer.
1831  if (Done)
1832  adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1833 
1834  emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1835  StackOffset::getFixed(StackRestoreBytes), TII,
1836  MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1837  if (Done) {
1838  if (HasWinCFI) {
1840  TII->get(AArch64::SEH_EpilogEnd))
1842  }
1843  return;
1844  }
1845 
1846  NumBytes = 0;
1847  }
1848 
1849  // Restore the original stack pointer.
1850  // FIXME: Rather than doing the math here, we should instead just use
1851  // non-post-indexed loads for the restores if we aren't actually going to
1852  // be able to save any instructions.
1853  if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1855  MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1857  TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
1858  } else if (NumBytes)
1859  emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1860  StackOffset::getFixed(NumBytes), TII,
1861  MachineInstr::FrameDestroy, false, NeedsWinCFI);
1862 
1863  // This must be placed after the callee-save restore code because that code
1864  // assumes the SP is at the same location as it was after the callee-save save
1865  // code in the prologue.
1866  if (AfterCSRPopSize) {
1867  // Find an insertion point for the first ldp so that it goes before the
1868  // shadow call stack epilog instruction. This ensures that the restore of
1869  // lr from x18 is placed after the restore from sp.
1870  auto FirstSPPopI = MBB.getFirstTerminator();
1871  while (FirstSPPopI != Begin) {
1872  auto Prev = std::prev(FirstSPPopI);
1873  if (Prev->getOpcode() != AArch64::LDRXpre ||
1874  Prev->getOperand(0).getReg() == AArch64::SP)
1875  break;
1876  FirstSPPopI = Prev;
1877  }
1878 
1879  adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1880 
1881  emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1882  StackOffset::getFixed((int64_t)AfterCSRPopSize), TII,
1883  MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1884  }
1885  if (HasWinCFI)
1886  BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1888 }
1889 
1890 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1891 /// debug info. It's the same as what we use for resolving the code-gen
1892 /// references for now. FIXME: This can go wrong when references are
1893 /// SP-relative and simple call frames aren't used.
1896  Register &FrameReg) const {
1898  MF, FI, FrameReg,
1899  /*PreferFP=*/
1900  MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1901  /*ForSimm=*/false);
1902 }
1903 
1906  int FI) const {
1908 }
1909 
1911  int64_t ObjectOffset) {
1912  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1913  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1914  bool IsWin64 =
1915  Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1916  unsigned FixedObject =
1917  getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
1918  int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
1919  int64_t FPAdjust =
1920  CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
1921  return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust);
1922 }
1923 
1925  int64_t ObjectOffset) {
1926  const auto &MFI = MF.getFrameInfo();
1927  return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize());
1928 }
1929 
1930  // TODO: This function currently does not work for scalable vectors.
1932  int FI) const {
1933  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1934  MF.getSubtarget().getRegisterInfo());
1935  int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1936  return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1937  ? getFPOffset(MF, ObjectOffset).getFixed()
1938  : getStackOffset(MF, ObjectOffset).getFixed();
1939 }
1940 
1942  const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP,
1943  bool ForSimm) const {
1944  const auto &MFI = MF.getFrameInfo();
1945  int64_t ObjectOffset = MFI.getObjectOffset(FI);
1946  bool isFixed = MFI.isFixedObjectIndex(FI);
1947  bool isSVE = MFI.getStackID(FI) == TargetStackID::ScalableVector;
1948  return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
1949  PreferFP, ForSimm);
1950 }
1951 
1953  const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE,
1954  Register &FrameReg, bool PreferFP, bool ForSimm) const {
1955  const auto &MFI = MF.getFrameInfo();
1956  const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1957  MF.getSubtarget().getRegisterInfo());
1958  const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1959  const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1960 
1961  int64_t FPOffset = getFPOffset(MF, ObjectOffset).getFixed();
1962  int64_t Offset = getStackOffset(MF, ObjectOffset).getFixed();
1963  bool isCSR =
1964  !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
1965 
1966  const StackOffset &SVEStackSize = getSVEStackSize(MF);
1967 
1968  // Use frame pointer to reference fixed objects. Use it for locals if
1969  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1970  // reliable as a base). Make sure useFPForScavengingIndex() does the
1971  // right thing for the emergency spill slot.
1972  bool UseFP = false;
1973  if (AFI->hasStackFrame() && !isSVE) {
1974  // We shouldn't prefer using the FP when there is an SVE area
1975  // in between the FP and the non-SVE locals/spills.
1976  PreferFP &= !SVEStackSize;
1977 
1978  // Note: Keeping the following as multiple 'if' statements rather than
1979  // merging to a single expression for readability.
1980  //
1981  // Argument access should always use the FP.
1982  if (isFixed) {
1983  UseFP = hasFP(MF);
1984  } else if (isCSR && RegInfo->hasStackRealignment(MF)) {
1985  // References to the CSR area must use FP if we're re-aligning the stack
1986  // since the dynamically-sized alignment padding is between the SP/BP and
1987  // the CSR area.
1988  assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1989  UseFP = true;
1990  } else if (hasFP(MF) && !RegInfo->hasStackRealignment(MF)) {
1991  // If the FPOffset is negative and we're producing a signed immediate, we
1992  // have to keep in mind that the available offset range for negative
1993  // offsets is smaller than for positive ones. If an offset is available
1994  // via the FP and the SP, use whichever is closest.
1995  bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1996  PreferFP |= Offset > -FPOffset;
1997 
1998  if (MFI.hasVarSizedObjects()) {
1999  // If we have variable sized objects, we can use either FP or BP, as the
2000  // SP offset is unknown. We can use the base pointer if we have one and
2001  // FP is not preferred. If not, we're stuck with using FP.
2002  bool CanUseBP = RegInfo->hasBasePointer(MF);
2003  if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
2004  UseFP = PreferFP;
2005  else if (!CanUseBP) // Can't use BP. Forced to use FP.
2006  UseFP = true;
2007  // else we can use BP and FP, but the offset from FP won't fit.
2008  // That will make us scavenge registers which we can probably avoid by
2009  // using BP. If it won't fit for BP either, we'll scavenge anyway.
2010  } else if (FPOffset >= 0) {
2011  // Use SP or FP, whichever gives us the best chance of the offset
2012  // being in range for direct access. If the FPOffset is positive,
2013  // that'll always be best, as the SP will be even further away.
2014  UseFP = true;
2015  } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
2016  // Funclets access the locals contained in the parent's stack frame
2017  // via the frame pointer, so we have to use the FP in the parent
2018  // function.
2019  (void) Subtarget;
2020  assert(
2021  Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
2022  "Funclets should only be present on Win64");
2023  UseFP = true;
2024  } else {
2025  // We have the choice between FP and (SP or BP).
2026  if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
2027  UseFP = true;
2028  }
2029  }
2030  }
2031 
2032  assert(
2033  ((isFixed || isCSR) || !RegInfo->hasStackRealignment(MF) || !UseFP) &&
2034  "In the presence of dynamic stack pointer realignment, "
2035  "non-argument/CSR objects cannot be accessed through the frame pointer");
2036 
2037  if (isSVE) {
2038  StackOffset FPOffset =
2040  StackOffset SPOffset =
2041  SVEStackSize +
2042  StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
2043  ObjectOffset);
2044  // Always use the FP for SVE spills if available and beneficial.
2045  if (hasFP(MF) && (SPOffset.getFixed() ||
2046  FPOffset.getScalable() < SPOffset.getScalable() ||
2047  RegInfo->hasStackRealignment(MF))) {
2048  FrameReg = RegInfo->getFrameRegister(MF);
2049  return FPOffset;
2050  }
2051 
2052  FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
2053  : (unsigned)AArch64::SP;
2054  return SPOffset;
2055  }
2056 
2057  StackOffset ScalableOffset = {};
2058  if (UseFP && !(isFixed || isCSR))
2059  ScalableOffset = -SVEStackSize;
2060  if (!UseFP && (isFixed || isCSR))
2061  ScalableOffset = SVEStackSize;
2062 
2063  if (UseFP) {
2064  FrameReg = RegInfo->getFrameRegister(MF);
2065  return StackOffset::getFixed(FPOffset) + ScalableOffset;
2066  }
2067 
2068  // Use the base pointer if we have one.
2069  if (RegInfo->hasBasePointer(MF))
2070  FrameReg = RegInfo->getBaseRegister();
2071  else {
2072  assert(!MFI.hasVarSizedObjects() &&
2073  "Can't use SP when we have var sized objects.");
2074  FrameReg = AArch64::SP;
2075  // If we're using the red zone for this function, the SP won't actually
2076  // be adjusted, so the offsets will be negative. They're also all
2077  // within range of the signed 9-bit immediate instructions.
2078  if (canUseRedZone(MF))
2079  Offset -= AFI->getLocalStackSize();
2080  }
2081 
2082  return StackOffset::getFixed(Offset) + ScalableOffset;
2083 }
2084 
2085 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
2086  // Do not set a kill flag on values that are also marked as live-in. This
2087  // happens with the @llvm-returnaddress intrinsic and with arguments passed in
2088  // callee saved registers.
2089  // Omitting the kill flags is conservatively correct even if the live-in
2090  // is not used after all.
2091  bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
2092  return getKillRegState(!IsLiveIn);
2093 }
2094 
2096  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2098  return Subtarget.isTargetMachO() &&
2099  !(Subtarget.getTargetLowering()->supportSwiftError() &&
2100  Attrs.hasAttrSomewhere(Attribute::SwiftError));
2101 }
2102 
2103 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
2104  bool NeedsWinCFI, bool IsFirst) {
2105  // If we are generating register pairs for a Windows function that requires
2106  // EH support, then pair consecutive registers only. There are no unwind
2107  // opcodes for saves/restores of non-consectuve register pairs.
2108  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x,
2109  // save_lrpair.
2110  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
2111 
2112  if (Reg2 == AArch64::FP)
2113  return true;
2114  if (!NeedsWinCFI)
2115  return false;
2116  if (Reg2 == Reg1 + 1)
2117  return false;
2118  // If pairing a GPR with LR, the pair can be described by the save_lrpair
2119  // opcode. If this is the first register pair, it would end up with a
2120  // predecrement, but there's no save_lrpair_x opcode, so we can only do this
2121  // if LR is paired with something else than the first register.
2122  // The save_lrpair opcode requires the first register to be an odd one.
2123  if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
2124  (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
2125  return false;
2126  return true;
2127 }
2128 
2129 /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
2130 /// WindowsCFI requires that only consecutive registers can be paired.
2131 /// LR and FP need to be allocated together when the frame needs to save
2132 /// the frame-record. This means any other register pairing with LR is invalid.
2133 static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
2134  bool UsesWinAAPCS, bool NeedsWinCFI,
2135  bool NeedsFrameRecord, bool IsFirst) {
2136  if (UsesWinAAPCS)
2137  return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst);
2138 
2139  // If we need to store the frame record, don't pair any register
2140  // with LR other than FP.
2141  if (NeedsFrameRecord)
2142  return Reg2 == AArch64::LR;
2143 
2144  return false;
2145 }
2146 
2147 namespace {
2148 
2149 struct RegPairInfo {
2150  unsigned Reg1 = AArch64::NoRegister;
2151  unsigned Reg2 = AArch64::NoRegister;
2152  int FrameIdx;
2153  int Offset;
2154  enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
2155 
2156  RegPairInfo() = default;
2157 
2158  bool isPaired() const { return Reg2 != AArch64::NoRegister; }
2159 
2160  unsigned getScale() const {
2161  switch (Type) {
2162  case PPR:
2163  return 2;
2164  case GPR:
2165  case FPR64:
2166  return 8;
2167  case ZPR:
2168  case FPR128:
2169  return 16;
2170  }
2171  llvm_unreachable("Unsupported type");
2172  }
2173 
2174  bool isScalable() const { return Type == PPR || Type == ZPR; }
2175 };
2176 
2177 } // end anonymous namespace
2178 
2182  bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
2183 
2184  if (CSI.empty())
2185  return;
2186 
2187  bool IsWindows = isTargetWindows(MF);
2188  bool NeedsWinCFI = needsWinCFI(MF);
2190  MachineFrameInfo &MFI = MF.getFrameInfo();
2192  unsigned Count = CSI.size();
2193  (void)CC;
2194  // MachO's compact unwind format relies on all registers being stored in
2195  // pairs.
2197  CC == CallingConv::PreserveMost ||
2198  (Count & 1) == 0) &&
2199  "Odd number of callee-saved regs to spill!");
2200  int ByteOffset = AFI->getCalleeSavedStackSize();
2201  int StackFillDir = -1;
2202  int RegInc = 1;
2203  unsigned FirstReg = 0;
2204  if (NeedsWinCFI) {
2205  // For WinCFI, fill the stack from the bottom up.
2206  ByteOffset = 0;
2207  StackFillDir = 1;
2208  // As the CSI array is reversed to match PrologEpilogInserter, iterate
2209  // backwards, to pair up registers starting from lower numbered registers.
2210  RegInc = -1;
2211  FirstReg = Count - 1;
2212  }
2213  int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
2214 
2215  // When iterating backwards, the loop condition relies on unsigned wraparound.
2216  for (unsigned i = FirstReg; i < Count; i += RegInc) {
2217  RegPairInfo RPI;
2218  RPI.Reg1 = CSI[i].getReg();
2219 
2220  if (AArch64::GPR64RegClass.contains(RPI.Reg1))
2221  RPI.Type = RegPairInfo::GPR;
2222  else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
2223  RPI.Type = RegPairInfo::FPR64;
2224  else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
2225  RPI.Type = RegPairInfo::FPR128;
2226  else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
2227  RPI.Type = RegPairInfo::ZPR;
2228  else if (AArch64::PPRRegClass.contains(RPI.Reg1))
2229  RPI.Type = RegPairInfo::PPR;
2230  else
2231  llvm_unreachable("Unsupported register class.");
2232 
2233  // Add the next reg to the pair if it is in the same register class.
2234  if (unsigned(i + RegInc) < Count) {
2235  unsigned NextReg = CSI[i + RegInc].getReg();
2236  bool IsFirst = i == FirstReg;
2237  switch (RPI.Type) {
2238  case RegPairInfo::GPR:
2239  if (AArch64::GPR64RegClass.contains(NextReg) &&
2240  !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows,
2241  NeedsWinCFI, NeedsFrameRecord, IsFirst))
2242  RPI.Reg2 = NextReg;
2243  break;
2244  case RegPairInfo::FPR64:
2245  if (AArch64::FPR64RegClass.contains(NextReg) &&
2246  !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
2247  IsFirst))
2248  RPI.Reg2 = NextReg;
2249  break;
2250  case RegPairInfo::FPR128:
2251  if (AArch64::FPR128RegClass.contains(NextReg))
2252  RPI.Reg2 = NextReg;
2253  break;
2254  case RegPairInfo::PPR:
2255  case RegPairInfo::ZPR:
2256  break;
2257  }
2258  }
2259 
2260  // If either of the registers to be saved is the lr register, it means that
2261  // we also need to save lr in the shadow call stack.
2262  if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
2263  MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
2265  report_fatal_error("Must reserve x18 to use shadow call stack");
2266  NeedShadowCallStackProlog = true;
2267  }
2268 
2269  // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
2270  // list to come in sorted by frame index so that we can issue the store
2271  // pair instructions directly. Assert if we see anything otherwise.
2272  //
2273  // The order of the registers in the list is controlled by
2274  // getCalleeSavedRegs(), so they will always be in-order, as well.
2275  assert((!RPI.isPaired() ||
2276  (CSI[i].getFrameIdx() + RegInc == CSI[i + RegInc].getFrameIdx())) &&
2277  "Out of order callee saved regs!");
2278 
2279  assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
2280  RPI.Reg1 == AArch64::LR) &&
2281  "FrameRecord must be allocated together with LR");
2282 
2283  // Windows AAPCS has FP and LR reversed.
2284  assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg1 != AArch64::FP ||
2285  RPI.Reg2 == AArch64::LR) &&
2286  "FrameRecord must be allocated together with LR");
2287 
2288  // MachO's compact unwind format relies on all registers being stored in
2289  // adjacent register pairs.
2291  CC == CallingConv::PreserveMost ||
2292  (RPI.isPaired() &&
2293  ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
2294  RPI.Reg1 + 1 == RPI.Reg2))) &&
2295  "Callee-save registers not saved as adjacent register pair!");
2296 
2297  RPI.FrameIdx = CSI[i].getFrameIdx();
2298  if (NeedsWinCFI &&
2299  RPI.isPaired()) // RPI.FrameIdx must be the lower index of the pair
2300  RPI.FrameIdx = CSI[i + RegInc].getFrameIdx();
2301 
2302  int Scale = RPI.getScale();
2303 
2304  int OffsetPre = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2305  assert(OffsetPre % Scale == 0);
2306 
2307  if (RPI.isScalable())
2308  ScalableByteOffset += StackFillDir * Scale;
2309  else
2310  ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale);
2311 
2312  assert(!(RPI.isScalable() && RPI.isPaired()) &&
2313  "Paired spill/fill instructions don't exist for SVE vectors");
2314 
2315  // Round up size of non-pair to pair size if we need to pad the
2316  // callee-save area to ensure 16-byte alignment.
2317  if (AFI->hasCalleeSaveStackFreeSpace() && !NeedsWinCFI &&
2318  !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
2319  !RPI.isPaired()) {
2320  ByteOffset += 8 * StackFillDir;
2321  assert(ByteOffset % 16 == 0);
2322  assert(MFI.getObjectAlign(RPI.FrameIdx) <= Align(16));
2323  // A stack frame with a gap looks like this, bottom up:
2324  // d9, d8. x21, gap, x20, x19.
2325  // Set extra alignment on the x21 object (the only unpaired register)
2326  // to create the gap above it.
2327  MFI.setObjectAlignment(RPI.FrameIdx, Align(16));
2328  }
2329 
2330  int OffsetPost = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
2331  assert(OffsetPost % Scale == 0);
2332  // If filling top down (default), we want the offset after incrementing it.
2333  // If fillibg bootom up (WinCFI) we need the original offset.
2334  int Offset = NeedsWinCFI ? OffsetPre : OffsetPost;
2335  RPI.Offset = Offset / Scale;
2336 
2337  assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
2338  (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
2339  "Offset out of bounds for LDP/STP immediate");
2340 
2341  // Save the offset to frame record so that the FP register can point to the
2342  // innermost frame record (spilled FP and LR registers).
2343  if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
2344  RPI.Reg2 == AArch64::FP) ||
2345  (IsWindows && RPI.Reg1 == AArch64::FP &&
2346  RPI.Reg2 == AArch64::LR)))
2348 
2349  RegPairs.push_back(RPI);
2350  if (RPI.isPaired())
2351  i += RegInc;
2352  }
2353  if (NeedsWinCFI) {
2354  // If we need an alignment gap in the stack, align the topmost stack
2355  // object. A stack frame with a gap looks like this, bottom up:
2356  // x19, d8. d9, gap.
2357  // Set extra alignment on the topmost stack object (the first element in
2358  // CSI, which goes top down), to create the gap above it.
2359  if (AFI->hasCalleeSaveStackFreeSpace())
2360  MFI.setObjectAlignment(CSI[0].getFrameIdx(), Align(16));
2361  // We iterated bottom up over the registers; flip RegPairs back to top
2362  // down order.
2363  std::reverse(RegPairs.begin(), RegPairs.end());
2364  }
2365 }
2366 
2369  ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2370  MachineFunction &MF = *MBB.getParent();
2371  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2372  bool NeedsWinCFI = needsWinCFI(MF);
2373  DebugLoc DL;
2374  SmallVector<RegPairInfo, 8> RegPairs;
2375 
2376  bool NeedShadowCallStackProlog = false;
2377  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
2378  NeedShadowCallStackProlog, hasFP(MF));
2379  const MachineRegisterInfo &MRI = MF.getRegInfo();
2380 
2381  if (NeedShadowCallStackProlog) {
2382  // Shadow call stack prolog: str x30, [x18], #8
2383  BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
2384  .addReg(AArch64::X18, RegState::Define)
2385  .addReg(AArch64::LR)
2386  .addReg(AArch64::X18)
2387  .addImm(8)
2389 
2390  if (NeedsWinCFI)
2391  BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
2393 
2394  if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
2395  // Emit a CFI instruction that causes 8 to be subtracted from the value of
2396  // x18 when unwinding past this frame.
2397  static const char CFIInst[] = {
2398  dwarf::DW_CFA_val_expression,
2399  18, // register
2400  2, // length
2401  static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
2402  static_cast<char>(-8) & 0x7f, // addend (sleb128)
2403  };
2404  unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
2405  nullptr, StringRef(CFIInst, sizeof(CFIInst))));
2406  BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
2407  .addCFIIndex(CFIIndex)
2409  }
2410 
2411  // This instruction also makes x18 live-in to the entry block.
2412  MBB.addLiveIn(AArch64::X18);
2413  }
2414 
2415  if (homogeneousPrologEpilog(MF)) {
2416  auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
2418 
2419  for (auto &RPI : RegPairs) {
2420  MIB.addReg(RPI.Reg1);
2421  MIB.addReg(RPI.Reg2);
2422 
2423  // Update register live in.
2424  if (!MRI.isReserved(RPI.Reg1))
2425  MBB.addLiveIn(RPI.Reg1);
2426  if (!MRI.isReserved(RPI.Reg2))
2427  MBB.addLiveIn(RPI.Reg2);
2428  }
2429  return true;
2430  }
2431  for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
2432  ++RPII) {
2433  RegPairInfo RPI = *RPII;
2434  unsigned Reg1 = RPI.Reg1;
2435  unsigned Reg2 = RPI.Reg2;
2436  unsigned StrOpc;
2437 
2438  // Issue sequence of spills for cs regs. The first spill may be converted
2439  // to a pre-decrement store later by emitPrologue if the callee-save stack
2440  // area allocation can't be combined with the local stack area allocation.
2441  // For example:
2442  // stp x22, x21, [sp, #0] // addImm(+0)
2443  // stp x20, x19, [sp, #16] // addImm(+2)
2444  // stp fp, lr, [sp, #32] // addImm(+4)
2445  // Rationale: This sequence saves uop updates compared to a sequence of
2446  // pre-increment spills like stp xi,xj,[sp,#-16]!
2447  // Note: Similar rationale and sequence for restores in epilog.
2448  unsigned Size;
2449  Align Alignment;
2450  switch (RPI.Type) {
2451  case RegPairInfo::GPR:
2452  StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
2453  Size = 8;
2454  Alignment = Align(8);
2455  break;
2456  case RegPairInfo::FPR64:
2457  StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
2458  Size = 8;
2459  Alignment = Align(8);
2460  break;
2461  case RegPairInfo::FPR128:
2462  StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
2463  Size = 16;
2464  Alignment = Align(16);
2465  break;
2466  case RegPairInfo::ZPR:
2467  StrOpc = AArch64::STR_ZXI;
2468  Size = 16;
2469  Alignment = Align(16);
2470  break;
2471  case RegPairInfo::PPR:
2472  StrOpc = AArch64::STR_PXI;
2473  Size = 2;
2474  Alignment = Align(2);
2475  break;
2476  }
2477  LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
2478  if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
2479  dbgs() << ") -> fi#(" << RPI.FrameIdx;
2480  if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
2481  dbgs() << ")\n");
2482 
2483  assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2484  "Windows unwdinding requires a consecutive (FP,LR) pair");
2485  // Windows unwind codes require consecutive registers if registers are
2486  // paired. Make the switch here, so that the code below will save (x,x+1)
2487  // and not (x+1,x).
2488  unsigned FrameIdxReg1 = RPI.FrameIdx;
2489  unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2490  if (NeedsWinCFI && RPI.isPaired()) {
2491  std::swap(Reg1, Reg2);
2492  std::swap(FrameIdxReg1, FrameIdxReg2);
2493  }
2494  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
2495  if (!MRI.isReserved(Reg1))
2496  MBB.addLiveIn(Reg1);
2497  if (RPI.isPaired()) {
2498  if (!MRI.isReserved(Reg2))
2499  MBB.addLiveIn(Reg2);
2500  MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
2502  MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2503  MachineMemOperand::MOStore, Size, Alignment));
2504  }
2505  MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
2506  .addReg(AArch64::SP)
2507  .addImm(RPI.Offset) // [sp, #offset*scale],
2508  // where factor*scale is implicit
2511  MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2512  MachineMemOperand::MOStore, Size, Alignment));
2513  if (NeedsWinCFI)
2515 
2516  // Update the StackIDs of the SVE stack slots.
2517  MachineFrameInfo &MFI = MF.getFrameInfo();
2518  if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
2520 
2521  }
2522  return true;
2523 }
2524 
2528  MachineFunction &MF = *MBB.getParent();
2529  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2530  DebugLoc DL;
2531  SmallVector<RegPairInfo, 8> RegPairs;
2532  bool NeedsWinCFI = needsWinCFI(MF);
2533 
2534  if (MI != MBB.end())
2535  DL = MI->getDebugLoc();
2536 
2537  bool NeedShadowCallStackProlog = false;
2538  computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
2539  NeedShadowCallStackProlog, hasFP(MF));
2540 
2541  auto EmitMI = [&](const RegPairInfo &RPI) {
2542  unsigned Reg1 = RPI.Reg1;
2543  unsigned Reg2 = RPI.Reg2;
2544 
2545  // Issue sequence of restores for cs regs. The last restore may be converted
2546  // to a post-increment load later by emitEpilogue if the callee-save stack
2547  // area allocation can't be combined with the local stack area allocation.
2548  // For example:
2549  // ldp fp, lr, [sp, #32] // addImm(+4)
2550  // ldp x20, x19, [sp, #16] // addImm(+2)
2551  // ldp x22, x21, [sp, #0] // addImm(+0)
2552  // Note: see comment in spillCalleeSavedRegisters()
2553  unsigned LdrOpc;
2554  unsigned Size;
2555  Align Alignment;
2556  switch (RPI.Type) {
2557  case RegPairInfo::GPR:
2558  LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2559  Size = 8;
2560  Alignment = Align(8);
2561  break;
2562  case RegPairInfo::FPR64:
2563  LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2564  Size = 8;
2565  Alignment = Align(8);
2566  break;
2567  case RegPairInfo::FPR128:
2568  LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2569  Size = 16;
2570  Alignment = Align(16);
2571  break;
2572  case RegPairInfo::ZPR:
2573  LdrOpc = AArch64::LDR_ZXI;
2574  Size = 16;
2575  Alignment = Align(16);
2576  break;
2577  case RegPairInfo::PPR:
2578  LdrOpc = AArch64::LDR_PXI;
2579  Size = 2;
2580  Alignment = Align(2);
2581  break;
2582  }
2583  LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
2584  if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
2585  dbgs() << ") -> fi#(" << RPI.FrameIdx;
2586  if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
2587  dbgs() << ")\n");
2588 
2589  // Windows unwind codes require consecutive registers if registers are
2590  // paired. Make the switch here, so that the code below will save (x,x+1)
2591  // and not (x+1,x).
2592  unsigned FrameIdxReg1 = RPI.FrameIdx;
2593  unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2594  if (NeedsWinCFI && RPI.isPaired()) {
2595  std::swap(Reg1, Reg2);
2596  std::swap(FrameIdxReg1, FrameIdxReg2);
2597  }
2598  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2599  if (RPI.isPaired()) {
2600  MIB.addReg(Reg2, getDefRegState(true));
2602  MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2603  MachineMemOperand::MOLoad, Size, Alignment));
2604  }
2605  MIB.addReg(Reg1, getDefRegState(true))
2606  .addReg(AArch64::SP)
2607  .addImm(RPI.Offset) // [sp, #offset*scale]
2608  // where factor*scale is implicit
2611  MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2612  MachineMemOperand::MOLoad, Size, Alignment));
2613  if (NeedsWinCFI)
2615  };
2616 
2617  // SVE objects are always restored in reverse order.
2618  for (const RegPairInfo &RPI : reverse(RegPairs))
2619  if (RPI.isScalable())
2620  EmitMI(RPI);
2621 
2622  if (ReverseCSRRestoreSeq) {
2623  for (const RegPairInfo &RPI : reverse(RegPairs))
2624  if (!RPI.isScalable())
2625  EmitMI(RPI);
2626  } else if (homogeneousPrologEpilog(MF, &MBB)) {
2627  auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
2629  for (auto &RPI : RegPairs) {
2630  MIB.addReg(RPI.Reg1, RegState::Define);
2631  MIB.addReg(RPI.Reg2, RegState::Define);
2632  }
2633  return true;
2634  } else
2635  for (const RegPairInfo &RPI : RegPairs)
2636  if (!RPI.isScalable())
2637  EmitMI(RPI);
2638 
2639  if (NeedShadowCallStackProlog) {
2640  // Shadow call stack epilog: ldr x30, [x18, #-8]!
2641  BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2642  .addReg(AArch64::X18, RegState::Define)
2643  .addReg(AArch64::LR, RegState::Define)
2644  .addReg(AArch64::X18)
2645  .addImm(-8)
2647  }
2648 
2649  return true;
2650 }
2651 
2653  BitVector &SavedRegs,
2654  RegScavenger *RS) const {
2655  // All calls are tail calls in GHC calling conv, and functions have no
2656  // prologue/epilogue.
2658  return;
2659 
2660  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2661  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2662  MF.getSubtarget().getRegisterInfo());
2663  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
2665  unsigned UnspilledCSGPR = AArch64::NoRegister;
2666  unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2667 
2668  MachineFrameInfo &MFI = MF.getFrameInfo();
2669  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2670 
2671  unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2672  ? RegInfo->getBaseRegister()
2673  : (unsigned)AArch64::NoRegister;
2674 
2675  unsigned ExtraCSSpill = 0;
2676  // Figure out which callee-saved registers to save/restore.
2677  for (unsigned i = 0; CSRegs[i]; ++i) {
2678  const unsigned Reg = CSRegs[i];
2679 
2680  // Add the base pointer register to SavedRegs if it is callee-save.
2681  if (Reg == BasePointerReg)
2682  SavedRegs.set(Reg);
2683 
2684  bool RegUsed = SavedRegs.test(Reg);
2685  unsigned PairedReg = AArch64::NoRegister;
2686  if (AArch64::GPR64RegClass.contains(Reg) ||
2687  AArch64::FPR64RegClass.contains(Reg) ||
2688  AArch64::FPR128RegClass.contains(Reg))
2689  PairedReg = CSRegs[i ^ 1];
2690 
2691  if (!RegUsed) {
2692  if (AArch64::GPR64RegClass.contains(Reg) &&
2693  !RegInfo->isReservedReg(MF, Reg)) {
2694  UnspilledCSGPR = Reg;
2695  UnspilledCSGPRPaired = PairedReg;
2696  }
2697  continue;
2698  }
2699 
2700  // MachO's compact unwind format relies on all registers being stored in
2701  // pairs.
2702  // FIXME: the usual format is actually better if unwinding isn't needed.
2703  if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2704  !SavedRegs.test(PairedReg)) {
2705  SavedRegs.set(PairedReg);
2706  if (AArch64::GPR64RegClass.contains(PairedReg) &&
2707  !RegInfo->isReservedReg(MF, PairedReg))
2708  ExtraCSSpill = PairedReg;
2709  }
2710  }
2711 
2713  !Subtarget.isTargetWindows()) {
2714  // For Windows calling convention on a non-windows OS, where X18 is treated
2715  // as reserved, back up X18 when entering non-windows code (marked with the
2716  // Windows calling convention) and restore when returning regardless of
2717  // whether the individual function uses it - it might call other functions
2718  // that clobber it.
2719  SavedRegs.set(AArch64::X18);
2720  }
2721 
2722  // Calculates the callee saved stack size.
2723  unsigned CSStackSize = 0;
2724  unsigned SVECSStackSize = 0;
2726  const MachineRegisterInfo &MRI = MF.getRegInfo();
2727  for (unsigned Reg : SavedRegs.set_bits()) {
2728  auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
2729  if (AArch64::PPRRegClass.contains(Reg) ||
2730  AArch64::ZPRRegClass.contains(Reg))
2731  SVECSStackSize += RegSize;
2732  else
2733  CSStackSize += RegSize;
2734  }
2735 
2736  // Save number of saved regs, so we can easily update CSStackSize later.
2737  unsigned NumSavedRegs = SavedRegs.count();
2738 
2739  // The frame record needs to be created by saving the appropriate registers
2740  uint64_t EstimatedStackSize = MFI.estimateStackSize(MF);
2741  if (hasFP(MF) ||
2742  windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2743  SavedRegs.set(AArch64::FP);
2744  SavedRegs.set(AArch64::LR);
2745  }
2746 
2747  LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2748  for (unsigned Reg
2749  : SavedRegs.set_bits()) dbgs()
2750  << ' ' << printReg(Reg, RegInfo);
2751  dbgs() << "\n";);
2752 
2753  // If any callee-saved registers are used, the frame cannot be eliminated.
2754  int64_t SVEStackSize =
2755  alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
2756  bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
2757 
2758  // The CSR spill slots have not been allocated yet, so estimateStackSize
2759  // won't include them.
2760  unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2761 
2762  // Conservatively always assume BigStack when there are SVE spills.
2763  bool BigStack = SVEStackSize ||
2764  (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2765  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2766  AFI->setHasStackFrame(true);
2767 
2768  // Estimate if we might need to scavenge a register at some point in order
2769  // to materialize a stack offset. If so, either spill one additional
2770  // callee-saved register or reserve a special spill slot to facilitate
2771  // register scavenging. If we already spilled an extra callee-saved register
2772  // above to keep the number of spills even, we don't need to do anything else
2773  // here.
2774  if (BigStack) {
2775  if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2776  LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2777  << " to get a scratch register.\n");
2778  SavedRegs.set(UnspilledCSGPR);
2779  // MachO's compact unwind format relies on all registers being stored in
2780  // pairs, so if we need to spill one extra for BigStack, then we need to
2781  // store the pair.
2782  if (producePairRegisters(MF))
2783  SavedRegs.set(UnspilledCSGPRPaired);
2784  ExtraCSSpill = UnspilledCSGPR;
2785  }
2786 
2787  // If we didn't find an extra callee-saved register to spill, create
2788  // an emergency spill slot.
2789  if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2791  const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2792  unsigned Size = TRI->getSpillSize(RC);
2793  Align Alignment = TRI->getSpillAlign(RC);
2794  int FI = MFI.CreateStackObject(Size, Alignment, false);
2795  RS->addScavengingFrameIndex(FI);
2796  LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2797  << " as the emergency spill slot.\n");
2798  }
2799  }
2800 
2801  // Adding the size of additional 64bit GPR saves.
2802  CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2803  uint64_t AlignedCSStackSize = alignTo(CSStackSize, 16);
2804  LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2805  << EstimatedStackSize + AlignedCSStackSize
2806  << " bytes.\n");
2807 
2808  assert((!MFI.isCalleeSavedInfoValid() ||
2809  AFI->getCalleeSavedStackSize() == AlignedCSStackSize) &&
2810  "Should not invalidate callee saved info");
2811 
2812  // Round up to register pair alignment to avoid additional SP adjustment
2813  // instructions.
2814  AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2815  AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2816  AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
2817 }
2818 
2821  std::vector<CalleeSavedInfo> &CSI) const {
2822  bool NeedsWinCFI = needsWinCFI(MF);
2823  // To match the canonical windows frame layout, reverse the list of
2824  // callee saved registers to get them laid out by PrologEpilogInserter
2825  // in the right order. (PrologEpilogInserter allocates stack objects top
2826  // down. Windows canonical prologs store higher numbered registers at
2827  // the top, thus have the CSI array start from the highest registers.)
2828  if (NeedsWinCFI)
2829  std::reverse(CSI.begin(), CSI.end());
2830  // Let the generic code do the rest of the setup.
2831  return false;
2832 }
2833 
2835  const MachineFunction &MF) const {
2836  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2837  return AFI->hasCalleeSaveStackFreeSpace();
2838 }
2839 
2840 /// returns true if there are any SVE callee saves.
2842  int &Min, int &Max) {
2845 
2846  if (!MFI.isCalleeSavedInfoValid())
2847  return false;
2848 
2849  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2850  for (auto &CS : CSI) {
2851  if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
2852  AArch64::PPRRegClass.contains(CS.getReg())) {
2854  Max + 1 == CS.getFrameIdx()) &&
2855  "SVE CalleeSaves are not consecutive");
2856 
2857  Min = std::min(Min, CS.getFrameIdx());
2858  Max = std::max(Max, CS.getFrameIdx());
2859  }
2860  }
2861  return Min != std::numeric_limits<int>::max();
2862 }
2863 
2864 // Process all the SVE stack objects and determine offsets for each
2865 // object. If AssignOffsets is true, the offsets get assigned.
2866 // Fills in the first and last callee-saved frame indices into
2867 // Min/MaxCSFrameIndex, respectively.
2868 // Returns the size of the stack.
2870  int &MinCSFrameIndex,
2871  int &MaxCSFrameIndex,
2872  bool AssignOffsets) {
2873 #ifndef NDEBUG
2874  // First process all fixed stack objects.
2875  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
2877  "SVE vectors should never be passed on the stack by value, only by "
2878  "reference.");
2879 #endif
2880 
2881  auto Assign = [&MFI](int FI, int64_t Offset) {
2882  LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
2883  MFI.setObjectOffset(FI, Offset);
2884  };
2885 
2886  int64_t Offset = 0;
2887 
2888  // Then process all callee saved slots.
2889  if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
2890  // Assign offsets to the callee save slots.
2891  for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
2892  Offset += MFI.getObjectSize(I);
2894  if (AssignOffsets)
2895  Assign(I, -Offset);
2896  }
2897  }
2898 
2899  // Ensure that the Callee-save area is aligned to 16bytes.
2900  Offset = alignTo(Offset, Align(16U));
2901 
2902  // Create a buffer of SVE objects to allocate and sort it.
2903  SmallVector<int, 8> ObjectsToAllocate;
2904  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
2905  unsigned StackID = MFI.getStackID(I);
2906  if (StackID != TargetStackID::ScalableVector)
2907  continue;
2908  if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
2909  continue;
2910  if (MFI.isDeadObjectIndex(I))
2911  continue;
2912 
2913  ObjectsToAllocate.push_back(I);
2914  }
2915 
2916  // Allocate all SVE locals and spills
2917  for (unsigned FI : ObjectsToAllocate) {
2918  Align Alignment = MFI.getObjectAlign(FI);
2919  // FIXME: Given that the length of SVE vectors is not necessarily a power of
2920  // two, we'd need to align every object dynamically at runtime if the
2921  // alignment is larger than 16. This is not yet supported.
2922  if (Alignment > Align(16))
2924  "Alignment of scalable vectors > 16 bytes is not yet supported");
2925 
2926  Offset = alignTo(Offset + MFI.getObjectSize(FI), Alignment);
2927  if (AssignOffsets)
2928  Assign(FI, -Offset);
2929  }
2930 
2931  return Offset;
2932 }
2933 
2934 int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
2935  MachineFrameInfo &MFI) const {
2936  int MinCSFrameIndex, MaxCSFrameIndex;
2937  return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
2938 }
2939 
2940 int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
2941  MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
2942  return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
2943  true);
2944 }
2945 
2947  MachineFunction &MF, RegScavenger *RS) const {
2948  MachineFrameInfo &MFI = MF.getFrameInfo();
2949 
2951  "Upwards growing stack unsupported");
2952 
2953  int MinCSFrameIndex, MaxCSFrameIndex;
2954  int64_t SVEStackSize =
2955  assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
2956 
2958  AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
2959  AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
2960 
2961  // If this function isn't doing Win64-style C++ EH, we don't need to do
2962  // anything.
2963  if (!MF.hasEHFunclets())
2964  return;
2965  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2966  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2967 
2968  MachineBasicBlock &MBB = MF.front();
2969  auto MBBI = MBB.begin();
2970  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2971  ++MBBI;
2972 
2973  // Create an UnwindHelp object.
2974  // The UnwindHelp object is allocated at the start of the fixed object area
2975  int64_t FixedObject =
2976  getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false);
2977  int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8,
2978  /*SPOffset*/ -FixedObject,
2979  /*IsImmutable=*/false);
2980  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2981 
2982  // We need to store -2 into the UnwindHelp object at the start of the
2983  // function.
2984  DebugLoc DL;
2985  RS->enterBasicBlockEnd(MBB);
2986  RS->backward(std::prev(MBBI));
2987  unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2988  assert(DstReg && "There must be a free register after frame setup");
2989  BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2990  BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2991  .addReg(DstReg, getKillRegState(true))
2992  .addFrameIndex(UnwindHelpFI)
2993  .addImm(0);
2994 }
2995 
2996 namespace {
2997 struct TagStoreInstr {
2998  MachineInstr *MI;
2999  int64_t Offset, Size;
3000  explicit TagStoreInstr(MachineInstr *MI, int64_t Offset, int64_t Size)
3001  : MI(MI), Offset(Offset), Size(Size) {}
3002 };
3003 
3004 class TagStoreEdit {
3005  MachineFunction *MF;
3008  // Tag store instructions that are being replaced.
3010  // Combined memref arguments of the above instructions.
3011  SmallVector<MachineMemOperand *, 8> CombinedMemRefs;
3012 
3013  // Replace allocation tags in [FrameReg + FrameRegOffset, FrameReg +
3014  // FrameRegOffset + Size) with the address tag of SP.
3015  Register FrameReg;
3016  StackOffset FrameRegOffset;
3017  int64_t Size;
3018  // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
3019  Optional<int64_t> FrameRegUpdate;
3020  // MIFlags for any FrameReg updating instructions.
3021  unsigned FrameRegUpdateFlags;
3022 
3023  // Use zeroing instruction variants.
3024  bool ZeroData;
3025  DebugLoc DL;
3026 
3027  void emitUnrolled(MachineBasicBlock::iterator InsertI);
3028  void emitLoop(MachineBasicBlock::iterator InsertI);
3029 
3030 public:
3031  TagStoreEdit(MachineBasicBlock *MBB, bool ZeroData)
3032  : MBB(MBB), ZeroData(ZeroData) {
3033  MF = MBB->getParent();
3034  MRI = &MF->getRegInfo();
3035  }
3036  // Add an instruction to be replaced. Instructions must be added in the
3037  // ascending order of Offset, and have to be adjacent.
3038  void addInstruction(TagStoreInstr I) {
3039  assert((TagStores.empty() ||
3040  TagStores.back().Offset + TagStores.back().Size == I.Offset) &&
3041  "Non-adjacent tag store instructions.");
3042  TagStores.push_back(I);
3043  }
3044  void clear() { TagStores.clear(); }
3045  // Emit equivalent code at the given location, and erase the current set of
3046  // instructions. May skip if the replacement is not profitable. May invalidate
3047  // the input iterator and replace it with a valid one.
3048  void emitCode(MachineBasicBlock::iterator &InsertI,
3049  const AArch64FrameLowering *TFI, bool IsLast);
3050 };
3051 
3052 void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
3053  const AArch64InstrInfo *TII =
3054  MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
3055 
3056  const int64_t kMinOffset = -256 * 16;
3057  const int64_t kMaxOffset = 255 * 16;
3058 
3059  Register BaseReg = FrameReg;
3060  int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
3061  if (BaseRegOffsetBytes < kMinOffset ||
3062  BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
3063  Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3064  emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
3065  StackOffset::getFixed(BaseRegOffsetBytes), TII);
3066  BaseReg = ScratchReg;
3067  BaseRegOffsetBytes = 0;
3068  }
3069 
3070  MachineInstr *LastI = nullptr;
3071  while (Size) {
3072  int64_t InstrSize = (Size > 16) ? 32 : 16;
3073  unsigned Opcode =
3074  InstrSize == 16
3075  ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
3076  : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
3077  MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
3078  .addReg(AArch64::SP)
3079  .addReg(BaseReg)
3080  .addImm(BaseRegOffsetBytes / 16)
3081  .setMemRefs(CombinedMemRefs);
3082  // A store to [BaseReg, #0] should go last for an opportunity to fold the
3083  // final SP adjustment in the epilogue.
3084  if (BaseRegOffsetBytes == 0)
3085  LastI = I;
3086  BaseRegOffsetBytes += InstrSize;
3087  Size -= InstrSize;
3088  }
3089 
3090  if (LastI)
3091  MBB->splice(InsertI, MBB, LastI);
3092 }
3093 
3094 void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) {
3095  const AArch64InstrInfo *TII =
3096  MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
3097 
3098  Register BaseReg = FrameRegUpdate
3099  ? FrameReg
3100  : MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3101  Register SizeReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
3102 
3103  emitFrameOffset(*MBB, InsertI, DL, BaseReg, FrameReg, FrameRegOffset, TII);
3104 
3105  int64_t LoopSize = Size;
3106  // If the loop size is not a multiple of 32, split off one 16-byte store at
3107  // the end to fold BaseReg update into.
3108  if (FrameRegUpdate && *FrameRegUpdate)
3109  LoopSize -= LoopSize % 32;
3110  MachineInstr *LoopI = BuildMI(*MBB, InsertI, DL,
3111  TII->get(ZeroData ? AArch64::STZGloop_wback
3112  : AArch64::STGloop_wback))
3113  .addDef(SizeReg)
3114  .addDef(BaseReg)
3115  .addImm(LoopSize)
3116  .addReg(BaseReg)
3117  .setMemRefs(CombinedMemRefs);
3118  if (FrameRegUpdate)
3119  LoopI->setFlags(FrameRegUpdateFlags);
3120 
3121  int64_t ExtraBaseRegUpdate =
3122  FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0;
3123  if (LoopSize < Size) {
3124  assert(FrameRegUpdate);
3125  assert(Size - LoopSize == 16);
3126  // Tag 16 more bytes at BaseReg and update BaseReg.
3127  BuildMI(*MBB, InsertI, DL,
3128  TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex))
3129  .addDef(BaseReg)
3130  .addReg(BaseReg)
3131  .addReg(BaseReg)
3132  .addImm(1 + ExtraBaseRegUpdate / 16)
3133  .setMemRefs(CombinedMemRefs)
3134  .setMIFlags(FrameRegUpdateFlags);
3135  } else if (ExtraBaseRegUpdate) {
3136  // Update BaseReg.
3137  BuildMI(
3138  *MBB, InsertI, DL,
3139  TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri))
3140  .addDef(BaseReg)
3141  .addReg(BaseReg)
3142  .addImm(std::abs(ExtraBaseRegUpdate))
3143  .addImm(0)
3144  .setMIFlags(FrameRegUpdateFlags);
3145  }
3146 }
3147 
3148 // Check if *II is a register update that can be merged into STGloop that ends
3149 // at (Reg + Size). RemainingOffset is the required adjustment to Reg after the
3150 // end of the loop.
3151 bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg,
3152  int64_t Size, int64_t *TotalOffset) {
3153  MachineInstr &MI = *II;
3154  if ((MI.getOpcode() == AArch64::ADDXri ||
3155  MI.getOpcode() == AArch64::SUBXri) &&
3156  MI.getOperand(0).getReg() == Reg && MI.getOperand(1).getReg() == Reg) {
3157  unsigned Shift = AArch64_AM::getShiftValue(MI.getOperand(3).getImm());
3158  int64_t Offset = MI.getOperand(2).getImm() << Shift;
3159  if (MI.getOpcode() == AArch64::SUBXri)
3160  Offset = -Offset;
3161  int64_t AbsPostOffset = std::abs(Offset - Size);
3162  const int64_t kMaxOffset =
3163  0xFFF; // Max encoding for unshifted ADDXri / SUBXri
3164  if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) {
3165  *TotalOffset = Offset;
3166  return true;
3167  }
3168  }
3169  return false;
3170 }
3171 
3172 void mergeMemRefs(const SmallVectorImpl<TagStoreInstr> &TSE,
3174  MemRefs.clear();
3175  for (auto &TS : TSE) {
3176  MachineInstr *MI = TS.MI;
3177  // An instruction without memory operands may access anything. Be
3178  // conservative and return an empty list.
3179  if (MI->memoperands_empty()) {
3180  MemRefs.clear();
3181  return;
3182  }
3183  MemRefs.append(MI->memoperands_begin(), MI->memoperands_end());
3184  }
3185 }
3186 
3187 void TagStoreEdit::emitCode(MachineBasicBlock::iterator &InsertI,
3188  const AArch64FrameLowering *TFI, bool IsLast) {
3189  if (TagStores.empty())
3190  return;
3191  TagStoreInstr &FirstTagStore = TagStores[0];
3192  TagStoreInstr &LastTagStore = TagStores[TagStores.size() - 1];
3193  Size = LastTagStore.Offset - FirstTagStore.Offset + LastTagStore.Size;
3194  DL = TagStores[0].MI->getDebugLoc();
3195 
3196  Register Reg;
3197  FrameRegOffset = TFI->resolveFrameOffsetReference(
3198  *MF, FirstTagStore.Offset, false /*isFixed*/, false /*isSVE*/, Reg,
3199  /*PreferFP=*/false, /*ForSimm=*/true);
3200  FrameReg = Reg;
3201  FrameRegUpdate = None;
3202 
3203  mergeMemRefs(TagStores, CombinedMemRefs);
3204 
3205  LLVM_DEBUG(dbgs() << "Replacing adjacent STG instructions:\n";
3206  for (const auto &Instr
3207  : TagStores) { dbgs() << " " << *Instr.MI; });
3208 
3209  // Size threshold where a loop becomes shorter than a linear sequence of
3210  // tagging instructions.
3211  const int kSetTagLoopThreshold = 176;
3212  if (Size < kSetTagLoopThreshold) {
3213  if (TagStores.size() < 2)
3214  return;
3215  emitUnrolled(InsertI);
3216  } else {
3217  MachineInstr *UpdateInstr = nullptr;
3218  int64_t TotalOffset;
3219  if (IsLast) {
3220  // See if we can merge base register update into the STGloop.
3221  // This is done in AArch64LoadStoreOptimizer for "normal" stores,
3222  // but STGloop is way too unusual for that, and also it only
3223  // realistically happens in function epilogue. Also, STGloop is expanded
3224  // before that pass.
3225  if (InsertI != MBB->end() &&
3226  canMergeRegUpdate(InsertI, FrameReg, FrameRegOffset.getFixed() + Size,
3227  &TotalOffset)) {
3228  UpdateInstr = &*InsertI++;
3229  LLVM_DEBUG(dbgs() << "Folding SP update into loop:\n "
3230  << *UpdateInstr);
3231  }
3232  }
3233 
3234  if (!UpdateInstr && TagStores.size() < 2)
3235  return;
3236 
3237  if (UpdateInstr) {
3238  FrameRegUpdate = TotalOffset;
3239  FrameRegUpdateFlags = UpdateInstr->getFlags();
3240  }
3241  emitLoop(InsertI);
3242  if (UpdateInstr)
3243  UpdateInstr->eraseFromParent();
3244  }
3245 
3246  for (auto &TS : TagStores)
3247  TS.MI->eraseFromParent();
3248 }
3249 
3250 bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
3251  int64_t &Size, bool &ZeroData) {
3252  MachineFunction &MF = *MI.getParent()->getParent();
3253  const MachineFrameInfo &MFI = MF.getFrameInfo();
3254 
3255  unsigned Opcode = MI.getOpcode();
3256  ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
3257  Opcode == AArch64::STZ2GOffset);
3258 
3259  if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
3260  if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
3261  return false;
3262  if (!MI.getOperand(2).isImm() || !MI.getOperand(3).isFI())
3263  return false;
3264  Offset = MFI.getObjectOffset(MI.getOperand(3).getIndex());
3265  Size = MI.getOperand(2).getImm();
3266  return true;
3267  }
3268 
3269  if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
3270  Size = 16;
3271  else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
3272  Size = 32;
3273  else
3274  return false;
3275 
3276  if (MI.getOperand(0).getReg() != AArch64::SP || !MI.getOperand(1).isFI())
3277  return false;
3278 
3279  Offset = MFI.getObjectOffset(MI.getOperand(1).getIndex()) +
3280  16 * MI.getOperand(2).getImm();
3281  return true;
3282 }
3283 
3284 // Detect a run of memory tagging instructions for adjacent stack frame slots,
3285 // and replace them with a shorter instruction sequence:
3286 // * replace STG + STG with ST2G
3287 // * replace STGloop + STGloop with STGloop
3288 // This code needs to run when stack slot offsets are already known, but before
3289 // FrameIndex operands in STG instructions are eliminated.
3291  const AArch64FrameLowering *TFI,
3292  RegScavenger *RS) {
3293  bool FirstZeroData;
3294  int64_t Size, Offset;
3295  MachineInstr &MI = *II;
3296  MachineBasicBlock *MBB = MI.getParent();
3297  MachineBasicBlock::iterator NextI = ++II;
3298  if (&MI == &MBB->instr_back())
3299  return II;
3300  if (!isMergeableStackTaggingInstruction(MI, Offset, Size, FirstZeroData))
3301  return II;
3302 
3304  Instrs.emplace_back(&MI, Offset, Size);
3305 
3306  constexpr int kScanLimit = 10;
3307  int Count = 0;
3309  NextI != E && Count < kScanLimit; ++NextI) {
3310  MachineInstr &MI = *NextI;
3311  bool ZeroData;
3312  int64_t Size, Offset;
3313  // Collect instructions that update memory tags with a FrameIndex operand
3314  // and (when applicable) constant size, and whose output registers are dead
3315  // (the latter is almost always the case in practice). Since these
3316  // instructions effectively have no inputs or outputs, we are free to skip
3317  // any non-aliasing instructions in between without tracking used registers.
3318  if (isMergeableStackTaggingInstruction(MI, Offset, Size, ZeroData)) {
3319  if (ZeroData != FirstZeroData)
3320  break;
3321  Instrs.emplace_back(&MI, Offset, Size);
3322  continue;
3323  }
3324 
3325  // Only count non-transient, non-tagging instructions toward the scan
3326  // limit.
3327  if (!MI.isTransient())
3328  ++Count;
3329 
3330  // Just in case, stop before the epilogue code starts.
3331  if (MI.getFlag(MachineInstr::FrameSetup) ||
3332  MI.getFlag(MachineInstr::FrameDestroy))
3333  break;
3334 
3335  // Reject anything that may alias the collected instructions.
3336  if (MI.mayLoadOrStore() || MI.hasUnmodeledSideEffects())
3337  break;
3338  }
3339 
3340  // New code will be inserted after the last tagging instruction we've found.
3341  MachineBasicBlock::iterator InsertI = Instrs.back().MI;
3342  InsertI++;
3343 
3344  llvm::stable_sort(Instrs,
3345  [](const TagStoreInstr &Left, const TagStoreInstr &Right) {
3346  return Left.Offset < Right.Offset;
3347  });
3348 
3349  // Make sure that we don't have any overlapping stores.
3350  int64_t CurOffset = Instrs[0].Offset;
3351  for (auto &Instr : Instrs) {
3352  if (CurOffset > Instr.Offset)
3353  return NextI;
3354  CurOffset = Instr.Offset + Instr.Size;
3355  }
3356 
3357  // Find contiguous runs of tagged memory and emit shorter instruction
3358  // sequencies for them when possible.
3359  TagStoreEdit TSE(MBB, FirstZeroData);
3360  Optional<int64_t> EndOffset;
3361  for (auto &Instr : Instrs) {
3362  if (EndOffset && *EndOffset != Instr.Offset) {
3363  // Found a gap.
3364  TSE.emitCode(InsertI, TFI, /*IsLast = */ false);
3365  TSE.clear();
3366  }
3367 
3368  TSE.addInstruction(Instr);
3369  EndOffset = Instr.Offset + Instr.Size;
3370  }
3371 
3372  TSE.emitCode(InsertI, TFI, /*IsLast = */ true);
3373 
3374  return InsertI;
3375 }
3376 } // namespace
3377 
3379  MachineFunction &MF, RegScavenger *RS = nullptr) const {
3381  for (auto &BB : MF)
3382  for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();)
3383  II = tryMergeAdjacentSTG(II, this, RS);
3384 }
3385 
3386 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP
3387 /// before the update. This is easily retrieved as it is exactly the offset
3388 /// that is set in processFunctionBeforeFrameFinalized.
3390  const MachineFunction &MF, int FI, Register &FrameReg,
3391  bool IgnoreSPUpdates) const {
3392  const MachineFrameInfo &MFI = MF.getFrameInfo();
3393  if (IgnoreSPUpdates) {
3394  LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
3395  << MFI.getObjectOffset(FI) << "\n");
3396  FrameReg = AArch64::SP;
3397  return StackOffset::getFixed(MFI.getObjectOffset(FI));
3398  }
3399 
3400  return getFrameIndexReference(MF, FI, FrameReg);
3401 }
3402 
3403 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
3404 /// the parent's frame pointer
3406  const MachineFunction &MF) const {
3407  return 0;
3408 }
3409 
3410 /// Funclets only need to account for space for the callee saved registers,
3411 /// as the locals are accounted for in the parent's stack frame.
3413  const MachineFunction &MF) const {
3414  // This is the size of the pushed CSRs.
3415  unsigned CSSize =
3416  MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
3417  // This is the amount of stack a funclet needs to allocate.
3418  return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
3419  getStackAlign());
3420 }
3421 
3422 namespace {
3423 struct FrameObject {
3424  bool IsValid = false;
3425  // Index of the object in MFI.
3426  int ObjectIndex = 0;
3427  // Group ID this object belongs to.
3428  int GroupIndex = -1;
3429  // This object should be placed first (closest to SP).
3430  bool ObjectFirst = false;
3431  // This object's group (which always contains the object with
3432  // ObjectFirst==true) should be placed first.
3433  bool GroupFirst = false;
3434 };
3435 
3436 class GroupBuilder {
3437  SmallVector<int, 8> CurrentMembers;
3438  int NextGroupIndex = 0;
3439  std::vector<FrameObject> &Objects;
3440 
3441 public:
3442  GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
3443  void AddMember(int Index) { CurrentMembers.push_back(Index); }
3444  void EndCurrentGroup() {
3445  if (CurrentMembers.size() > 1) {
3446  // Create a new group with the current member list. This might remove them
3447  // from their pre-existing groups. That's OK, dealing with overlapping
3448  // groups is too hard and unlikely to make a difference.
3449  LLVM_DEBUG(dbgs() << "group:");
3450  for (int Index : CurrentMembers) {
3451  Objects[Index].GroupIndex = NextGroupIndex;
3452  LLVM_DEBUG(dbgs() << " " << Index);
3453  }
3454  LLVM_DEBUG(dbgs() << "\n");
3455  NextGroupIndex++;
3456  }
3457  CurrentMembers.clear();
3458  }
3459 };
3460 
3461 bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
3462  // Objects at a lower index are closer to FP; objects at a higher index are
3463  // closer to SP.
3464  //
3465  // For consistency in our comparison, all invalid objects are placed
3466  // at the end. This also allows us to stop walking when we hit the
3467  // first invalid item after it's all sorted.
3468  //
3469  // The "first" object goes first (closest to SP), followed by the members of
3470  // the "first" group.
3471  //
3472  // The rest are sorted by the group index to keep the groups together.
3473  // Higher numbered groups are more likely to be around longer (i.e. untagged
3474  // in the function epilogue and not at some earlier point). Place them closer
3475  // to SP.
3476  //
3477  // If all else equal, sort by the object index to keep the objects in the
3478  // original order.
3479  return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
3480  A.ObjectIndex) <
3481  std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
3482  B.ObjectIndex);
3483 }
3484 } // namespace
3485 
3487  const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3488  if (!OrderFrameObjects || ObjectsToAllocate.empty())
3489  return;
3490 
3491  const MachineFrameInfo &MFI = MF.getFrameInfo();
3492  std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
3493  for (auto &Obj : ObjectsToAllocate) {
3494  FrameObjects[Obj].IsValid = true;
3495  FrameObjects[Obj].ObjectIndex = Obj;
3496  }
3497 
3498  // Identify stack slots that are tagged at the same time.
3499  GroupBuilder GB(FrameObjects);
3500  for (auto &MBB : MF) {
3501  for (auto &MI : MBB) {
3502  if (MI.isDebugInstr())
3503  continue;
3504  int OpIndex;
3505  switch (MI.getOpcode()) {
3506  case AArch64::STGloop:
3507  case AArch64::STZGloop:
3508  OpIndex = 3;
3509  break;
3510  case AArch64::STGOffset:
3511  case AArch64::STZGOffset:
3512  case AArch64::ST2GOffset:
3513  case AArch64::STZ2GOffset:
3514  OpIndex = 1;
3515  break;
3516  default:
3517  OpIndex = -1;
3518  }
3519 
3520  int TaggedFI = -1;
3521  if (OpIndex >= 0) {
3522  const MachineOperand &MO = MI.getOperand(OpIndex);
3523  if (MO.isFI()) {
3524  int FI = MO.getIndex();
3525  if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
3526  FrameObjects[FI].IsValid)
3527  TaggedFI = FI;
3528  }
3529  }
3530 
3531  // If this is a stack tagging instruction for a slot that is not part of a
3532  // group yet, either start a new group or add it to the current one.
3533  if (TaggedFI >= 0)
3534  GB.AddMember(TaggedFI);
3535  else
3536  GB.EndCurrentGroup();
3537  }
3538  // Groups should never span multiple basic blocks.
3539  GB.EndCurrentGroup();
3540  }
3541 
3542  // If the function's tagged base pointer is pinned to a stack slot, we want to
3543  // put that slot first when possible. This will likely place it at SP + 0,
3544  // and save one instruction when generating the base pointer because IRG does
3545  // not allow an immediate offset.
3546  const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
3548  if (TBPI) {
3549  FrameObjects[*TBPI].ObjectFirst = true;
3550  FrameObjects[*TBPI].GroupFirst = true;
3551  int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
3552  if (FirstGroupIndex >= 0)
3553  for (FrameObject &Object : FrameObjects)
3554  if (Object.GroupIndex == FirstGroupIndex)
3555  Object.GroupFirst = true;
3556  }
3557 
3558  llvm::stable_sort(FrameObjects, FrameObjectCompare);
3559 
3560  int i = 0;
3561  for (auto &Obj : FrameObjects) {
3562  // All invalid items are sorted at the end, so it's safe to stop.
3563  if (!Obj.IsValid)
3564  break;
3565  ObjectsToAllocate[i++] = Obj.ObjectIndex;
3566  }
3567 
3568  LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
3569  : FrameObjects) {
3570  if (!Obj.IsValid)
3571  break;
3572  dbgs() << " " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
3573  if (Obj.ObjectFirst)
3574  dbgs() << ", first";
3575  if (Obj.GroupFirst)
3576  dbgs() << ", group-first";
3577  dbgs() << "\n";
3578  });
3579 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::MachineFunction::hasWinCFI
bool hasWinCFI() const
Definition: MachineFunction.h:637
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
llvm::isAsynchronousEHPersonality
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
Definition: EHPersonalities.h:50
llvm::MachineFrameInfo::isMaxCallFrameSizeComputed
bool isMaxCallFrameSizeComputed() const
Definition: MachineFrameInfo.h:650
llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition: MachineFrameInfo.h:351
llvm::AArch64Subtarget::isTargetWindows
bool isTargetWindows() const
Definition: AArch64Subtarget.h:484
AArch64RegisterInfo.h
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
MCDwarf.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm::MachineFrameInfo::estimateStackSize
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Definition: MachineFrameInfo.cpp:137
llvm
Definition: AllocatorList.h:23
llvm::MachineInstrBuilder::copyImplicitOps
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Definition: MachineInstrBuilder.h:316
AArch64MachineFunctionInfo.h
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
getFixedObjectSize
static unsigned getFixedObjectSize(const MachineFunction &MF, const AArch64FunctionInfo *AFI, bool IsWin64, bool IsFunclet)
Returns the size of the fixed object area (allocated next to sp on entry) On Win64 this may include a...
Definition: AArch64FrameLowering.cpp:309
llvm::LivePhysRegs::addReg
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:79
DefaultSafeSPDisplacement
static const unsigned DefaultSafeSPDisplacement
This is the biggest offset to the stack pointer we can encode in aarch64 instructions (without using ...
Definition: AArch64FrameLowering.cpp:272
llvm::AArch64_AM::LSL
@ LSL
Definition: AArch64AddressingModes.h:34
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::MachineModuleInfo::getContext
const MCContext & getContext() const
Definition: MachineModuleInfo.h:167
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
produceCompactUnwindFrame
static bool produceCompactUnwindFrame(MachineFunction &MF)
Definition: AArch64FrameLowering.cpp:2095
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:225
llvm::Function
Definition: Function.h:61
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:51
llvm::MachineInstrBuilder::addCFIIndex
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
Definition: MachineInstrBuilder.h:248
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::MachineBasicBlock::isEHFuncletEntry
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
Definition: MachineBasicBlock.h:461
contains
return AArch64::GPR64RegClass contains(Reg)
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:430
ErrorHandling.h
llvm::MCRegisterInfo::getDwarfRegNum
int getDwarfRegNum(MCRegister RegNum, bool isEH) const
Map a target register to an equivalent dwarf register number.
Definition: MCRegisterInfo.cpp:68
llvm::NVPTX::PTXCvtMode::RPI
@ RPI
Definition: NVPTX.h:135
llvm::getBLRCallOpcode
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
Definition: AArch64InstrInfo.cpp:7360
llvm::AArch64RegisterInfo::getBaseRegister
unsigned getBaseRegister() const
Definition: AArch64RegisterInfo.cpp:371
llvm::CallingConv::PreserveMost
@ PreserveMost
Definition: CallingConv.h:66
llvm::AArch64FunctionInfo::getTaggedBasePointerIndex
Optional< int > getTaggedBasePointerIndex() const
Definition: AArch64MachineFunctionInfo.h:349
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:132
MachineBasicBlock.h
llvm::LivePhysRegs
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:48
Right
Vector Shift Left Right
Definition: README_P9.txt:118
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:652
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1398
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
Shift
bool Shift
Definition: README.txt:468
llvm::AArch64FrameLowering::enableStackSlotScavenging
bool enableStackSlotScavenging(const MachineFunction &MF) const override
Returns true if the stack slot holes in the fixed and callee-save stack area should be used when allo...
Definition: AArch64FrameLowering.cpp:2834
llvm::AArch64Subtarget::getInstrInfo
const AArch64InstrInfo * getInstrInfo() const override
Definition: AArch64Subtarget.h:305
llvm::StackOffset::getFixed
ScalarTy getFixed() const
Definition: TypeSize.h:149
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
llvm::AttributeList
Definition: Attributes.h:385
TargetInstrInfo.h
fixupSEHOpcode
static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, unsigned LocalStackSize)
Definition: AArch64FrameLowering.cpp:853
getPrologueDeath
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg)
Definition: AArch64FrameLowering.cpp:2085
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:345
llvm::Optional< int >
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::AArch64FrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition: AArch64FrameLowering.cpp:353
llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition: MachineFrameInfo.h:391
determineSVEStackObjectOffsets
static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex, bool AssignOffsets)
Definition: AArch64FrameLowering.cpp:2869
llvm::CodeModel::Kernel
@ Kernel
Definition: CodeGen.h:28
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:328
llvm::AArch64FunctionInfo::setTaggedBasePointerOffset
void setTaggedBasePointerOffset(unsigned Offset)
Definition: AArch64MachineFunctionInfo.h:357
llvm::AArch64FrameLowering::emitCalleeSavedFrameMoves
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
With basic block sections, emit callee saved frame moves for basic blocks that are in a different sec...
Definition: AArch64FrameLowering.cpp:529
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::RegScavenger::FindUnusedReg
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
Definition: RegisterScavenging.cpp:268
llvm::MachineFrameInfo::getMaxCallFrameSize
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
Definition: MachineFrameInfo.h:643
llvm::ARCISD::BL
@ BL
Definition: ARCISelLowering.h:34
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::AArch64Subtarget::getTargetLowering
const AArch64TargetLowering * getTargetLowering() const override
Definition: AArch64Subtarget.h:302
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MCCFIInstruction::cfiDefCfaOffset
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:502
MachineRegisterInfo.h
llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition: MachineInstr.h:84
isTargetWindows
static bool isTargetWindows(const MachineFunction &MF)
Definition: AArch64FrameLowering.cpp:1066
llvm::MachineBasicBlock::erase
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBlock.cpp:1324
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::AArch64RegisterInfo::cannotEliminateFrame
bool cannotEliminateFrame(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:449
clear
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:233
llvm::classifyEHPersonality
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
Definition: EHPersonalities.cpp:21
llvm::AArch64FrameLowering
Definition: AArch64FrameLowering.h:23
llvm::AArch64FrameOffsetCannotUpdate
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
Definition: AArch64InstrInfo.h:378
llvm::MachineInstr::getFlags
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:325
llvm::AlignStyle::Left
@ Left
ReverseCSRRestoreSeq
static cl::opt< bool > ReverseCSRRestoreSeq("reverse-csr-restore-seq", cl::desc("reverse the CSR restore sequence"), cl::init(false), cl::Hidden)
CommandLine.h
llvm::AArch64FrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition: AArch64FrameLowering.cpp:2946
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:117
llvm::getDefRegState
unsigned getDefRegState(bool B)
Definition: MachineInstrBuilder.h:503
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:749
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
OrderFrameObjects
static cl::opt< bool > OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden)
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:784
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
AArch64TargetMachine.h
AArch64InstrInfo.h
llvm::AArch64FrameLowering::resolveFrameOffsetReference
StackOffset resolveFrameOffsetReference(const MachineFunction &MF, int64_t ObjectOffset, bool isFixed, bool isSVE, Register &FrameReg, bool PreferFP, bool ForSimm) const
Definition: AArch64FrameLowering.cpp:1952
llvm::TargetFrameLowering::getOffsetOfLocalArea
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Definition: TargetFrameLowering.h:147
TargetMachine.h
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
llvm::AArch64FunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition: AArch64MachineFunctionInfo.h:187
llvm::MachineOperand::CreateImm
static MachineOperand CreateImm(int64_t Val)
Definition: MachineOperand.h:770
llvm::AArch64InstrInfo
Definition: AArch64InstrInfo.h:38
llvm::CallingConv::GHC
@ GHC
Definition: CallingConv.h:51
llvm::encodeSLEB128
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::AArch64FrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition: AArch64FrameLowering.cpp:393
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:653
invalidateWindowsRegisterPairing
static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, bool NeedsWinCFI, bool IsFirst)
Definition: AArch64FrameLowering.cpp:2103
llvm::LivePhysRegs::addLiveIns
void addLiveIns(const MachineBasicBlock &MBB)
Adds all live-in registers of basic block MBB.
Definition: LivePhysRegs.cpp:236
llvm::ISD::CATCHRET
@ CATCHRET
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:989
llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition: TargetFrameLowering.h:99
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition: MachineRegisterInfo.h:900
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:154
llvm::AArch64TargetLowering::supportSwiftError
bool supportSwiftError() const override
Return true if the target supports swifterror attribute.
Definition: AArch64ISelLowering.h:777
llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:217
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::AArch64FunctionInfo::setHasRedZone
void setHasRedZone(bool s)
Definition: AArch64MachineFunctionInfo.h:279
getStackOffset
static StackOffset getStackOffset(const MachineFunction &MF, int64_t ObjectOffset)
Definition: AArch64FrameLowering.cpp:1924
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition: MachineInstr.h:82
llvm::make_scope_exit
LLVM_NODISCARD detail::scope_exit< typename std::decay< Callable >::type > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:58
llvm::MCID::Flag
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
llvm::MachineModuleInfo
This class contains meta information specific to a module.
Definition: MachineModuleInfo.h:78
getSVEStackSize
static StackOffset getSVEStackSize(const MachineFunction &MF)
Returns the size of the entire SVE stackframe (calleesaves + spills).
Definition: AArch64FrameLowering.cpp:326
findScratchNonCalleeSaveRegister
static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB)
Definition: AArch64FrameLowering.cpp:582
llvm::AArch64_AM::getShifterImm
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
Definition: AArch64AddressingModes.h:98
llvm::RegScavenger::backward
void backward()
Update internal register state and move MBB iterator backwards.
Definition: RegisterScavenging.cpp:241
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition: MachineFrameInfo.h:720
llvm::RegScavenger::enterBasicBlockEnd
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
Definition: RegisterScavenging.cpp:89
llvm::MachineBasicBlock::instr_back
MachineInstr & instr_back()
Definition: MachineBasicBlock.h:243
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:45
llvm::AArch64_AM::getShiftValue
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
Definition: AArch64AddressingModes.h:85
llvm::MachineFunction::setHasWinCFI
void setHasWinCFI(bool v)
Definition: MachineFunction.h:640
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:563
DebugLoc.h
llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition: MachineFrameInfo.h:504
llvm::AArch64FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
Definition: AArch64FrameLowering.cpp:1895
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::BitVector
Definition: BitVector.h:74
appendVGScaledOffsetExpr
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
Definition: AArch64FrameLowering.cpp:439
llvm::CallingConv::Win64
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:164
llvm::RegState::Implicit
@ Implicit
Not emitted register (e.g. carry, or temporary result).
Definition: MachineInstrBuilder.h:47
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::StackOffset::getScalable
ScalarTy getScalable() const
Definition: TypeSize.h:150
llvm::MCCFIInstruction::createEscape
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:576
llvm::emitFrameOffset
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
Definition: AArch64InstrInfo.cpp:3855
llvm::AArch64FunctionInfo::hasStackFrame
bool hasStackFrame() const
Definition: AArch64MachineFunctionInfo.h:184
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition: MachineFrameInfo.h:388
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:248
llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:185
llvm::None
const NoneType None
Definition: None.h:23
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::MachineFrameInfo::isCalleeSavedInfoValid
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Definition: MachineFrameInfo.h:809
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
fixupCalleeSaveRestoreStackOffset
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, bool *HasWinCFI)
Definition: AArch64FrameLowering.cpp:975
llvm::MCCFIInstruction
Definition: MCDwarf.h:441
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition: MachineFrameInfo.h:734
llvm::SmallString< 64 >
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:239
AArch64AddressingModes.h
llvm::OutputFileType::Object
@ Object
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:117
StackTaggingMergeSetTag
static cl::opt< bool > StackTaggingMergeSetTag("stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), cl::Hidden)
llvm::TargetOptions::DisableFramePointerElim
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition: TargetOptionsImpl.cpp:24
llvm::MachineFunction::getMMI
MachineModuleInfo & getMMI() const
Definition: MachineFunction.h:506
llvm::SmallString::append
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:67
llvm::TargetRegisterInfo::getSpillAlign
Align getSpillAlign(const TargetRegisterClass &RC) const
Return the minimum required alignment in bytes for a spill slot for a register of this class.
Definition: TargetRegisterInfo.h:292
llvm::AArch64FrameLowering::resolveFrameIndexReference
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, bool PreferFP, bool ForSimm) const
Definition: AArch64FrameLowering.cpp:1941
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition: MachineInstrBuilder.h:153
llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition: MachineInstrBuilder.h:279
llvm::isAArch64FrameOffsetLegal
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
Definition: AArch64InstrInfo.cpp:4075
llvm::AArch64FunctionInfo::getCalleeSaveBaseToFrameRecordOffset
int getCalleeSaveBaseToFrameRecordOffset() const
Definition: AArch64MachineFunctionInfo.h:361
llvm::cl::opt< bool >
llvm::TargetRegisterInfo::getSpillSize
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
Definition: TargetRegisterInfo.h:280
llvm::WinEHFuncInfo
Definition: WinEHFuncInfo.h:90
getSVECalleeSaveSlotRange
static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI, int &Min, int &Max)
returns true if there are any SVE callee saves.
Definition: AArch64FrameLowering.cpp:2841
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:70
llvm::MachineFrameInfo::getObjectSize
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
Definition: MachineFrameInfo.h:451
AArch64FrameLowering.h
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:228
computeCalleeSaveRegisterPairs
static void computeCalleeSaveRegisterPairs(MachineFunction &MF, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI, SmallVectorImpl< RegPairInfo > &RegPairs, bool &NeedShadowCallStackProlog, bool NeedsFrameRecord)
Definition: AArch64FrameLowering.cpp:2179
llvm::AArch64FrameLowering::spillCalleeSavedRegisters
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
Definition: AArch64FrameLowering.cpp:2367
getArgumentPopSize
static uint64_t getArgumentPopSize(MachineFunction &MF, MachineBasicBlock &MBB)
Returns the argument pop size.
Definition: AArch64FrameLowering.cpp:196
LEB128.h
llvm::LivePhysRegs::available
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
Definition: LivePhysRegs.cpp:139
llvm::MCCFIInstruction::createNegateRAState
static MCCFIInstruction createNegateRAState(MCSymbol *L)
.cfi_negate_ra_state AArch64 negate RA state.
Definition: MCDwarf.h:541
llvm::MachineRegisterInfo::getCalleeSavedRegs
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition: MachineRegisterInfo.cpp:620
llvm::AArch64FunctionInfo::setCalleeSaveBaseToFrameRecordOffset
void setCalleeSaveBaseToFrameRecordOffset(int Offset)
Definition: AArch64MachineFunctionInfo.h:364
llvm::ISD::CLEANUPRET
@ CLEANUPRET
CLEANUPRET - Represents a return from a cleanup block funclet.
Definition: ISDOpcodes.h:993
llvm::AArch64FunctionInfo
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
Definition: AArch64MachineFunctionInfo.h:37
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
llvm::RegScavenger
Definition: RegisterScavenging.h:34
llvm::MachineFrameInfo::getObjectAlign
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
Definition: MachineFrameInfo.h:471
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::MachineFrameInfo::setObjectOffset
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
Definition: MachineFrameInfo.h:538
llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition: TargetFrameLowering.h:30
llvm::StackOffset::getScalable
static StackOffset getScalable(ScalarTy Scalable)
Definition: TypeSize.h:144
windowsRequiresStackProbe
static bool windowsRequiresStackProbe(MachineFunction &MF, uint64_t StackSizeInBytes)
Definition: AArch64FrameLowering.cpp:626
llvm::MachineBasicBlock::getLastNonDebugInstr
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
Definition: MachineBasicBlock.cpp:266
llvm::AArch64FrameLowering::getStackIDForScalableVectors
TargetStackID::Value getStackIDForScalableVectors() const override
Returns the StackID that scalable vectors should be associated with.
Definition: AArch64FrameLowering.cpp:303
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:115
llvm::MachineInstr::setFlags
void setFlags(unsigned flags)
Definition: MachineInstr.h:339
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AArch64FunctionInfo::getLocalStackSize
uint64_t getLocalStackSize() const
Definition: AArch64MachineFunctionInfo.h:200
InsertSEH
static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, MachineInstr::MIFlag Flag)
Definition: AArch64FrameLowering.cpp:733
llvm::AArch64FrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
Definition: AArch64FrameLowering.cpp:2819
llvm::MachineFrameInfo::CreateFixedObject
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
Definition: MachineFrameInfo.cpp:83
llvm::AArch64FunctionInfo::setStackRealigned
void setStackRealigned(bool s)
Definition: AArch64MachineFunctionInfo.h:188
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:571
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::RegScavenger::addScavengingFrameIndex
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Definition: RegisterScavenging.h:123
llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition: MachineInstrBuilder.h:203
llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:88
llvm::AArch64FunctionInfo::getVarArgsGPRSize
unsigned getVarArgsGPRSize() const
Definition: AArch64MachineFunctionInfo.h:287
llvm::AArch64Subtarget::isCallingConvWin64
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: AArch64Subtarget.h:569
llvm::MCCFIInstruction::cfiDefCfa
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:488
MachineModuleInfo.h
llvm::WinEHFuncInfo::UnwindHelpFrameIdx
int UnwindHelpFrameIdx
Definition: WinEHFuncInfo.h:99
llvm::AArch64FrameLowering::getNonLocalFrameIndexReference
StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, int FI) const override
getNonLocalFrameIndexReference - This method returns the offset used to reference a frame index locat...
Definition: AArch64FrameLowering.cpp:1905
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::AArch64FrameLowering::canUseRedZone
bool canUseRedZone(const MachineFunction &MF) const
Can this function use the red zone for local allocations.
Definition: AArch64FrameLowering.cpp:331
llvm::MachineInstr::MIFlag
MIFlag
Definition: MachineInstr.h:80
llvm::MachineFunction
Definition: MachineFunction.h:227
convertCalleeSaveRestoreToSPPrePostIncDec
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool InProlog=true)
Definition: AArch64FrameLowering.cpp:875
TargetOptions.h
llvm::MachineFrameInfo::getCalleeSavedInfo
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
Definition: MachineFrameInfo.h:796
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:241
llvm::TargetMachine::getMCAsmInfo
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
Definition: TargetMachine.h:202
EnableRedZone
static cl::opt< bool > EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden)
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
adaptForLdStOpt
static void adaptForLdStOpt(MachineBasicBlock &MBB, MachineBasicBlock::iterator FirstSPPopI, MachineBasicBlock::iterator LastPopI)
Definition: AArch64FrameLowering.cpp:1034
llvm::AArch64Subtarget::isXRegisterReserved
bool isXRegisterReserved(size_t i) const
Definition: AArch64Subtarget.h:353
llvm::MachineFrameInfo::setStackID
void setStackID(int ObjectIdx, uint8_t ID)
Definition: MachineFrameInfo.h:725
llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:384
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
MCAsmInfo.h
DataLayout.h
llvm::MachineFrameInfo::CreateStackObject
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Definition: MachineFrameInfo.cpp:51
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:863
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AArch64Subtarget::hasPAuth
bool hasPAuth() const
Definition: AArch64Subtarget.h:509
llvm::AArch64RegisterInfo
Definition: AArch64RegisterInfo.h:26
llvm::AArch64FunctionInfo::setMinMaxSVECSFrameIndex
void setMinMaxSVECSFrameIndex(int Min, int Max)
Definition: AArch64MachineFunctionInfo.h:265
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::MCContext::createTempSymbol
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:268
llvm::CodeModel::Tiny
@ Tiny
Definition: CodeGen.h:28
llvm::EHPersonality
EHPersonality
Definition: EHPersonalities.h:22
llvm::StackOffset::getFixed
static StackOffset getFixed(ScalarTy Fixed)
Definition: TypeSize.h:143
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
Prolog
@ Prolog
Definition: AArch64LowerHomogeneousPrologEpilog.cpp:124
llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition: MachineMemOperand.h:134
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:585
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:706
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:366
llvm::AArch64_AM::getArithExtendImm
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
Definition: AArch64AddressingModes.h:170
llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition: MachineFrameInfo.h:602
llvm::AArch64FrameLowering::hasReservedCallFrame
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition: AArch64FrameLowering.cpp:389
CallingConv.h
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::AArch64Subtarget::getRegisterInfo
const AArch64RegisterInfo * getRegisterInfo() const override
Definition: AArch64Subtarget.h:306
Attributes.h
isFuncletReturnInstr
static bool isFuncletReturnInstr(const MachineInstr &MI)
Definition: AArch64FrameLowering.cpp:1597
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:447
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1640
llvm::AArch64_AM::UXTX
@ UXTX
Definition: AArch64AddressingModes.h:43
llvm::AArch64FrameLowering::getWinEHParentFrameOffset
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve the parent's frame pointer...
Definition: AArch64FrameLowering.cpp:3405
llvm::MachineRegisterInfo::isLiveIn
bool isLiveIn(Register Reg) const
Definition: MachineRegisterInfo.cpp:436
kSetTagLoopThreshold
static const int kSetTagLoopThreshold
Definition: AArch64SelectionDAGInfo.cpp:56
llvm::AArch64FunctionInfo::hasCalleeSaveStackFreeSpace
bool hasCalleeSaveStackFreeSpace() const
Definition: AArch64MachineFunctionInfo.h:190
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::MachineFunction::needsFrameMoves
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
Definition: MachineFunction.cpp:568
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:274
uint16_t
llvm::AArch64FunctionInfo::getStackSizeSVE
uint64_t getStackSizeSVE() const
Definition: AArch64MachineFunctionInfo.h:182
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
MachineFrameInfo.h
llvm::MachineFunction::getWinEHFuncInfo
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
Definition: MachineFunction.h:599
llvm::AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition: AArch64FrameLowering.cpp:3378
llvm::MachineOperand::getIndex
int getIndex() const
Definition: MachineOperand.h:554
Function.h
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AArch64FunctionInfo::setLocalStackSize
void setLocalStackSize(uint64_t Size)
Definition: AArch64MachineFunctionInfo.h:199
llvm::MachineInstrBuilder::setMemRefs
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
Definition: MachineInstrBuilder.h:209
llvm::AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
Definition: AArch64InstrInfo.cpp:3712
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.cpp:74
llvm::AArch64FunctionInfo::getSVECalleeSavedStackSize
unsigned getSVECalleeSavedStackSize() const
Definition: AArch64MachineFunctionInfo.h:261
llvm::MachineRegisterInfo::isPhysRegUsed
bool isPhysRegUsed(MCRegister PhysReg) const
Return true if the specified register is modified or read in this function.
Definition: MachineRegisterInfo.cpp:585
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::AArch64FrameLowering::getSEHFrameIndexOffset
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const
Definition: AArch64FrameLowering.cpp:1931
llvm::AArch64FrameLowering::getFrameIndexReferencePreferSP
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
For Win64 AArch64 EH, the offset to the Unwind object is from the SP before the update.
Definition: AArch64FrameLowering.cpp:3389
llvm::SmallString::str
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:259
llvm::MachineFunction::hasEHFunclets
bool hasEHFunclets() const
Definition: MachineFunction.h:965
llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition: MachineMemOperand.h:136
WinEHFuncInfo.h
llvm::AArch64FrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition: AArch64FrameLowering.cpp:1084
llvm::TargetRegisterInfo::hasStackRealignment
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
Definition: TargetRegisterInfo.h:920
llvm::AArch64Subtarget::isTargetMachO
bool isTargetMachO() const
Definition: AArch64Subtarget.h:490
llvm::AArch64FunctionInfo::getArgumentStackToRestore
unsigned getArgumentStackToRestore() const
Definition: AArch64MachineFunctionInfo.h:170
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
InsertReturnAddressAuth
static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB)
Definition: AArch64FrameLowering.cpp:1566
invalidateRegisterPairing
static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, bool NeedsFrameRecord, bool IsFirst)
Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
Definition: AArch64FrameLowering.cpp:2133
llvm::getKillRegState
unsigned getKillRegState(bool B)
Definition: MachineInstrBuilder.h:509
llvm::AArch64TargetLowering::getRedZoneSize
unsigned getRedZoneSize(const Function &F) const
Definition: AArch64ISelLowering.h:818
llvm::MachineFunction::addFrameInst
LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst)
Definition: MachineFunction.cpp:285
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
AArch64Subtarget.h
SmallVector.h
estimateRSStackSizeLimit
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
Definition: AArch64FrameLowering.cpp:277
llvm::MachinePointerInfo::getFixedStack
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition: MachineOperand.cpp:995
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::AArch64FrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition: AArch64FrameLowering.cpp:1607
llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition: MachineInstrBuilder.h:274
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
getFPOffset
static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
Definition: AArch64FrameLowering.cpp:1910
llvm::AArch64FrameLowering::canUseAsPrologue
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
Definition: AArch64FrameLowering.cpp:611
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::AArch64FrameLowering::getWinEHFuncletFrameSize
unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const
Funclets only need to account for space for the callee saved registers, as the locals are accounted f...
Definition: AArch64FrameLowering.cpp:3412
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::encodeULEB128
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
ScopeExit.h
llvm::AArch64FunctionInfo::getCalleeSavedStackSize
unsigned getCalleeSavedStackSize(const MachineFrameInfo &MFI) const
Definition: AArch64MachineFunctionInfo.h:215
MachineMemOperand.h
llvm::SmallVectorImpl< char >
MachineOperand.h
llvm::TargetFrameLowering::StackGrowsDown
@ StackGrowsDown
Definition: TargetFrameLowering.h:46
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:703
llvm::MCCFIInstruction::createOffset
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:515
llvm::AArch64FrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: AArch64FrameLowering.cpp:2652
llvm::AArch64FrameLowering::restoreCalleeSavedRegisters
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
Definition: AArch64FrameLowering.cpp:2525
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition: TargetFrameLoweringImpl.cpp:78
llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition: MachineFrameInfo.h:378
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:411
RegisterScavenging.h
llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38
raw_ostream.h
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::AArch64FrameLowering::orderFrameObjects
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
Definition: AArch64FrameLowering.cpp:3486
llvm::AArch64RegisterInfo::hasBasePointer
bool hasBasePointer(const MachineFunction &MF) const
Definition: AArch64RegisterInfo.cpp:373
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MCAsmInfo::usesWindowsCFI
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:686
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::HexagonInstrInfo::copyPhysReg
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
Emit instructions to copy a pair of physical registers.
Definition: HexagonInstrInfo.cpp:812
TargetRegisterInfo.h
EnableHomogeneousPrologEpilog
cl::opt< bool > EnableHomogeneousPrologEpilog("homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)"))
Debug.h
needsWinCFI
static bool needsWinCFI(const MachineFunction &MF)
Definition: AArch64FrameLowering.cpp:643
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::AArch64RegisterInfo::isReservedReg
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
Definition: AArch64RegisterInfo.cpp:331
llvm::AArch64InstrInfo::isSEHInstruction
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
Definition: AArch64InstrInfo.cpp:1004
llvm::AArch64FunctionInfo::setStackSizeSVE
void setStackSizeSVE(uint64_t S)
Definition: AArch64MachineFunctionInfo.h:177
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
IsSVECalleeSave
static bool IsSVECalleeSave(MachineBasicBlock::iterator I)
Definition: AArch64FrameLowering.cpp:1071
LivePhysRegs.h
llvm::StackOffset::get
static StackOffset get(ScalarTy Fixed, ScalarTy Scalable)
Definition: TypeSize.h:145