LLVM 17.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri;
261 }
263
264 int ArgumentPopSize = 0;
265 if (IsTailCallReturn) {
266 MachineOperand &StackAdjust = MBBI->getOperand(1);
267
268 // For a tail-call in a callee-pops-arguments environment, some or all of
269 // the stack may actually be in use for the call's arguments, this is
270 // calculated during LowerCall and consumed here...
271 ArgumentPopSize = StackAdjust.getImm();
272 } else {
273 // ... otherwise the amount to pop is *all* of the argument space,
274 // conveniently stored in the MachineFunctionInfo by
275 // LowerFormalArguments. This will, of course, be zero for the C calling
276 // convention.
277 ArgumentPopSize = AFI->getArgumentStackToRestore();
278 }
279
280 return ArgumentPopSize;
281}
282
283static bool needsWinCFI(const MachineFunction &MF) {
284 const Function &F = MF.getFunction();
285 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286 F.needsUnwindTableEntry();
287}
288
289// Given a load or a store instruction, generate an appropriate unwinding SEH
290// code on Windows.
292 const TargetInstrInfo &TII,
293 unsigned Flags) {
294 unsigned Opc = MBBI->getOpcode();
296 MachineFunction &MF = *MBB->getParent();
297 DebugLoc DL = MBBI->getDebugLoc();
299 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
303
304 switch (Opc) {
305 default:
306 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
307 break;
308 case ARM::t2ADDri: // add.w r11, sp, #xx
309 case ARM::t2ADDri12: // add.w r11, sp, #xx
310 case ARM::t2MOVTi16: // movt r4, #xx
311 case ARM::tBL: // bl __chkstk
312 // These are harmless if used for just setting up a frame pointer,
313 // but that frame pointer can't be relied upon for unwinding, unless
314 // set up with SEH_SaveSP.
315 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
316 .addImm(/*Wide=*/1)
318 break;
319
320 case ARM::t2MOVi16: { // mov(w) r4, #xx
321 bool Wide = MBBI->getOperand(1).getImm() >= 256;
322 if (!Wide) {
323 MachineInstrBuilder NewInstr =
324 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
325 NewInstr.add(MBBI->getOperand(0));
326 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
327 for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
328 NewInstr.add(MBBI->getOperand(i));
329 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
330 MBB->erase(MBBI);
331 MBBI = NewMBBI;
332 }
333 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334 break;
335 }
336
337 case ARM::tBLXr: // blx r12 (__chkstk)
338 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
339 .addImm(/*Wide=*/0)
341 break;
342
343 case ARM::t2MOVi32imm: // movw+movt
344 // This pseudo instruction expands into two mov instructions. If the
345 // second operand is a symbol reference, this will stay as two wide
346 // instructions, movw+movt. If they're immediates, the first one can
347 // end up as a narrow mov though.
348 // As two SEH instructions are appended here, they won't get interleaved
349 // between the two final movw/movt instructions, but it doesn't make any
350 // practical difference.
351 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
352 .addImm(/*Wide=*/1)
354 MBB->insertAfter(MBBI, MIB);
355 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
356 .addImm(/*Wide=*/1)
358 break;
359
360 case ARM::t2STR_PRE:
361 if (MBBI->getOperand(0).getReg() == ARM::SP &&
362 MBBI->getOperand(2).getReg() == ARM::SP &&
363 MBBI->getOperand(3).getImm() == -4) {
364 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
365 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
366 .addImm(1ULL << Reg)
367 .addImm(/*Wide=*/1)
369 } else {
370 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
371 }
372 break;
373
374 case ARM::t2LDR_POST:
375 if (MBBI->getOperand(1).getReg() == ARM::SP &&
376 MBBI->getOperand(2).getReg() == ARM::SP &&
377 MBBI->getOperand(3).getImm() == 4) {
378 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
379 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
380 .addImm(1ULL << Reg)
381 .addImm(/*Wide=*/1)
383 } else {
384 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
385 }
386 break;
387
388 case ARM::t2LDMIA_RET:
389 case ARM::t2LDMIA_UPD:
390 case ARM::t2STMDB_UPD: {
391 unsigned Mask = 0;
392 bool Wide = false;
393 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
394 const MachineOperand &MO = MBBI->getOperand(i);
395 if (!MO.isReg() || MO.isImplicit())
396 continue;
397 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
398 if (Reg == 15)
399 Reg = 14;
400 if (Reg >= 8 && Reg <= 13)
401 Wide = true;
402 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
403 Wide = true;
404 Mask |= 1 << Reg;
405 }
406 if (!Wide) {
407 unsigned NewOpc;
408 switch (Opc) {
409 case ARM::t2LDMIA_RET:
410 NewOpc = ARM::tPOP_RET;
411 break;
412 case ARM::t2LDMIA_UPD:
413 NewOpc = ARM::tPOP;
414 break;
415 case ARM::t2STMDB_UPD:
416 NewOpc = ARM::tPUSH;
417 break;
418 default:
420 }
421 MachineInstrBuilder NewInstr =
422 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
423 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
424 NewInstr.add(MBBI->getOperand(i));
425 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
426 MBB->erase(MBBI);
427 MBBI = NewMBBI;
428 }
429 unsigned SEHOpc =
430 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
432 .addImm(Mask)
433 .addImm(Wide ? 1 : 0)
435 break;
436 }
437 case ARM::VSTMDDB_UPD:
438 case ARM::VLDMDIA_UPD: {
439 int First = -1, Last = 0;
440 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
441 const MachineOperand &MO = MBBI->getOperand(i);
442 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
443 if (First == -1)
444 First = Reg;
445 Last = Reg;
446 }
447 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
448 .addImm(First)
449 .addImm(Last)
451 break;
452 }
453 case ARM::tSUBspi:
454 case ARM::tADDspi:
455 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
456 .addImm(MBBI->getOperand(2).getImm() * 4)
457 .addImm(/*Wide=*/0)
459 break;
460 case ARM::t2SUBspImm:
461 case ARM::t2SUBspImm12:
462 case ARM::t2ADDspImm:
463 case ARM::t2ADDspImm12:
464 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
465 .addImm(MBBI->getOperand(2).getImm())
466 .addImm(/*Wide=*/1)
468 break;
469
470 case ARM::tMOVr:
471 if (MBBI->getOperand(1).getReg() == ARM::SP &&
473 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
474 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
475 .addImm(Reg)
477 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
479 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
480 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
481 .addImm(Reg)
483 } else {
484 report_fatal_error("No SEH Opcode for MOV");
485 }
486 break;
487
488 case ARM::tBX_RET:
489 case ARM::TCRETURNri:
490 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
491 .addImm(/*Wide=*/0)
493 break;
494
495 case ARM::TCRETURNdi:
496 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
497 .addImm(/*Wide=*/1)
499 break;
500 }
501 return MBB->insertAfter(MBBI, MIB);
502}
503
506 if (MBBI == MBB.begin())
508 return std::prev(MBBI);
509}
510
514 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
515 if (Start.isValid())
516 Start = std::next(Start);
517 else
518 Start = MBB.begin();
519
520 for (auto MI = Start; MI != End;) {
521 auto Next = std::next(MI);
522 // Check if this instruction already has got a SEH opcode added. In that
523 // case, don't do this generic mapping.
524 if (Next != End && isSEHInstruction(*Next)) {
525 MI = std::next(Next);
526 while (MI != End && isSEHInstruction(*MI))
527 ++MI;
528 continue;
529 }
530 insertSEH(MI, TII, MIFlags);
531 MI = Next;
532 }
533}
534
537 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
538 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
539 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
540 if (isARM)
541 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
542 Pred, PredReg, TII, MIFlags);
543 else
544 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
545 Pred, PredReg, TII, MIFlags);
546}
547
548static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
550 const ARMBaseInstrInfo &TII, int NumBytes,
551 unsigned MIFlags = MachineInstr::NoFlags,
553 unsigned PredReg = 0) {
554 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
555 MIFlags, Pred, PredReg);
556}
557
559 int RegSize;
560 switch (MI.getOpcode()) {
561 case ARM::VSTMDDB_UPD:
562 RegSize = 8;
563 break;
564 case ARM::STMDB_UPD:
565 case ARM::t2STMDB_UPD:
566 RegSize = 4;
567 break;
568 case ARM::t2STR_PRE:
569 case ARM::STR_PRE_IMM:
570 return 4;
571 default:
572 llvm_unreachable("Unknown push or pop like instruction");
573 }
574
575 int count = 0;
576 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
577 // pred) so the list starts at 4.
578 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
579 count += RegSize;
580 return count;
581}
582
584 size_t StackSizeInBytes) {
585 const MachineFrameInfo &MFI = MF.getFrameInfo();
586 const Function &F = MF.getFunction();
587 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
588
589 StackProbeSize =
590 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
591 return (StackSizeInBytes >= StackProbeSize) &&
592 !F.hasFnAttribute("no-stack-arg-probe");
593}
594
595namespace {
596
597struct StackAdjustingInsts {
598 struct InstInfo {
600 unsigned SPAdjust;
601 bool BeforeFPSet;
602 };
603
605
606 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
607 bool BeforeFPSet = false) {
608 InstInfo Info = {I, SPAdjust, BeforeFPSet};
609 Insts.push_back(Info);
610 }
611
612 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
613 auto Info =
614 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
615 assert(Info != Insts.end() && "invalid sp adjusting instruction");
616 Info->SPAdjust += ExtraBytes;
617 }
618
619 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
620 const ARMBaseInstrInfo &TII, bool HasFP) {
622 unsigned CFAOffset = 0;
623 for (auto &Info : Insts) {
624 if (HasFP && !Info.BeforeFPSet)
625 return;
626
627 CFAOffset += Info.SPAdjust;
628 unsigned CFIIndex = MF.addFrameInst(
629 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
630 BuildMI(MBB, std::next(Info.I), dl,
631 TII.get(TargetOpcode::CFI_INSTRUCTION))
632 .addCFIIndex(CFIIndex)
634 }
635 }
636};
637
638} // end anonymous namespace
639
640/// Emit an instruction sequence that will align the address in
641/// register Reg by zero-ing out the lower bits. For versions of the
642/// architecture that support Neon, this must be done in a single
643/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
644/// single instruction. That function only gets called when optimizing
645/// spilling of D registers on a core with the Neon instruction set
646/// present.
648 const TargetInstrInfo &TII,
651 const DebugLoc &DL, const unsigned Reg,
652 const Align Alignment,
653 const bool MustBeSingleInstruction) {
654 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
655 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
656 const unsigned AlignMask = Alignment.value() - 1U;
657 const unsigned NrBitsToZero = Log2(Alignment);
658 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
659 if (!AFI->isThumbFunction()) {
660 // if the BFC instruction is available, use that to zero the lower
661 // bits:
662 // bfc Reg, #0, log2(Alignment)
663 // otherwise use BIC, if the mask to zero the required number of bits
664 // can be encoded in the bic immediate field
665 // bic Reg, Reg, Alignment-1
666 // otherwise, emit
667 // lsr Reg, Reg, log2(Alignment)
668 // lsl Reg, Reg, log2(Alignment)
669 if (CanUseBFC) {
670 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
672 .addImm(~AlignMask)
674 } else if (AlignMask <= 255) {
675 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
677 .addImm(AlignMask)
679 .add(condCodeOp());
680 } else {
681 assert(!MustBeSingleInstruction &&
682 "Shouldn't call emitAligningInstructions demanding a single "
683 "instruction to be emitted for large stack alignment for a target "
684 "without BFC.");
685 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
687 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
689 .add(condCodeOp());
690 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
692 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
694 .add(condCodeOp());
695 }
696 } else {
697 // Since this is only reached for Thumb-2 targets, the BFC instruction
698 // should always be available.
699 assert(CanUseBFC);
700 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
702 .addImm(~AlignMask)
704 }
705}
706
707/// We need the offset of the frame pointer relative to other MachineFrameInfo
708/// offsets which are encoded relative to SP at function begin.
709/// See also emitPrologue() for how the FP is set up.
710/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
711/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
712/// this to produce a conservative estimate that we check in an assert() later.
713static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
714 const MachineFunction &MF) {
715 // For Thumb1, push.w isn't available, so the first push will always push
716 // r7 and lr onto the stack first.
717 if (AFI.isThumb1OnlyFunction())
718 return -AFI.getArgRegsSaveSize() - (2 * 4);
719 // This is a conservative estimation: Assume the frame pointer being r7 and
720 // pc("r15") up to r8 getting spilled before (= 8 registers).
721 int MaxRegBytes = 8 * 4;
722 if (STI.splitFramePointerPush(MF)) {
723 // Here, r11 can be stored below all of r4-r15 (3 registers more than
724 // above), plus d8-d15.
725 MaxRegBytes = 11 * 4 + 8 * 8;
726 }
727 int FPCXTSaveSize =
728 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
729 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
730}
731
733 MachineBasicBlock &MBB) const {
735 MachineFrameInfo &MFI = MF.getFrameInfo();
737 MachineModuleInfo &MMI = MF.getMMI();
738 MCContext &Context = MMI.getContext();
739 const TargetMachine &TM = MF.getTarget();
740 const MCRegisterInfo *MRI = Context.getRegisterInfo();
741 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
744 "This emitPrologue does not support Thumb1!");
745 bool isARM = !AFI->isThumbFunction();
746 Align Alignment = STI.getFrameLowering()->getStackAlign();
747 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
748 unsigned NumBytes = MFI.getStackSize();
749 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
750 int FPCXTSaveSize = 0;
751 bool NeedsWinCFI = needsWinCFI(MF);
752
753 // Debug location must be unknown since the first debug location is used
754 // to determine the end of the prologue.
755 DebugLoc dl;
756
757 Register FramePtr = RegInfo->getFrameRegister(MF);
758
759 // Determine the sizes of each callee-save spill areas and record which frame
760 // belongs to which callee-save spill areas.
761 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
762 int FramePtrSpillFI = 0;
763 int D8SpillFI = 0;
764
765 // All calls are tail calls in GHC calling conv, and functions have no
766 // prologue/epilogue.
768 return;
769
770 StackAdjustingInsts DefCFAOffsetCandidates;
771 bool HasFP = hasFP(MF);
772
773 if (!AFI->hasStackFrame() &&
774 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
775 if (NumBytes != 0) {
776 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
778 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
779 }
780 if (!NeedsWinCFI)
781 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
782 if (NeedsWinCFI && MBBI != MBB.begin()) {
784 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
786 MF.setHasWinCFI(true);
787 }
788 return;
789 }
790
791 // Determine spill area sizes.
792 if (STI.splitFramePointerPush(MF)) {
793 for (const CalleeSavedInfo &I : CSI) {
794 Register Reg = I.getReg();
795 int FI = I.getFrameIdx();
796 switch (Reg) {
797 case ARM::R11:
798 case ARM::LR:
799 if (Reg == FramePtr)
800 FramePtrSpillFI = FI;
801 GPRCS2Size += 4;
802 break;
803 case ARM::R0:
804 case ARM::R1:
805 case ARM::R2:
806 case ARM::R3:
807 case ARM::R4:
808 case ARM::R5:
809 case ARM::R6:
810 case ARM::R7:
811 case ARM::R8:
812 case ARM::R9:
813 case ARM::R10:
814 case ARM::R12:
815 GPRCS1Size += 4;
816 break;
817 case ARM::FPCXTNS:
818 FPCXTSaveSize = 4;
819 break;
820 default:
821 // This is a DPR. Exclude the aligned DPRCS2 spills.
822 if (Reg == ARM::D8)
823 D8SpillFI = FI;
824 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
825 DPRCSSize += 8;
826 }
827 }
828 } else {
829 for (const CalleeSavedInfo &I : CSI) {
830 Register Reg = I.getReg();
831 int FI = I.getFrameIdx();
832 switch (Reg) {
833 case ARM::R8:
834 case ARM::R9:
835 case ARM::R10:
836 case ARM::R11:
837 case ARM::R12:
838 if (STI.splitFramePushPop(MF)) {
839 GPRCS2Size += 4;
840 break;
841 }
842 [[fallthrough]];
843 case ARM::R0:
844 case ARM::R1:
845 case ARM::R2:
846 case ARM::R3:
847 case ARM::R4:
848 case ARM::R5:
849 case ARM::R6:
850 case ARM::R7:
851 case ARM::LR:
852 if (Reg == FramePtr)
853 FramePtrSpillFI = FI;
854 GPRCS1Size += 4;
855 break;
856 case ARM::FPCXTNS:
857 FPCXTSaveSize = 4;
858 break;
859 default:
860 // This is a DPR. Exclude the aligned DPRCS2 spills.
861 if (Reg == ARM::D8)
862 D8SpillFI = FI;
863 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
864 DPRCSSize += 8;
865 }
866 }
867 }
868
869 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
870
871 // Move past the PAC computation.
872 if (AFI->shouldSignReturnAddress())
873 LastPush = MBBI++;
874
875 // Move past FPCXT area.
876 if (FPCXTSaveSize > 0) {
877 LastPush = MBBI++;
878 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
879 }
880
881 // Allocate the vararg register save area.
882 if (ArgRegsSaveSize) {
883 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
885 LastPush = std::prev(MBBI);
886 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
887 }
888
889 // Move past area 1.
890 if (GPRCS1Size > 0) {
891 GPRCS1Push = LastPush = MBBI++;
892 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
893 }
894
895 // Determine starting offsets of spill areas.
896 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
897 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
898 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
899 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
900 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
901 if (!STI.splitFramePointerPush(MF)) {
902 DPRGapSize += GPRCS2Size;
903 }
904 DPRGapSize %= DPRAlign.value();
905
906 unsigned DPRCSOffset;
907 if (STI.splitFramePointerPush(MF)) {
908 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
909 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
910 } else {
911 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
912 }
913 int FramePtrOffsetInPush = 0;
914 if (HasFP) {
915 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
916 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
917 "Max FP estimation is wrong");
918 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
919 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
920 NumBytes);
921 }
922 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
923 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
924 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
925
926 // Move past area 2.
927 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
928 GPRCS2Push = LastPush = MBBI++;
929 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
930 }
931
932 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
933 // .cfi_offset operations will reflect that.
934 if (DPRGapSize) {
935 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
936 if (LastPush != MBB.end() &&
937 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
938 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
939 else {
940 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
942 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
943 }
944 }
945
946 // Move past area 3.
947 if (DPRCSSize > 0) {
948 // Since vpush register list cannot have gaps, there may be multiple vpush
949 // instructions in the prologue.
950 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
951 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
952 LastPush = MBBI++;
953 }
954 }
955
956 // Move past the aligned DPRCS2 area.
957 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
959 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
960 // leaves the stack pointer pointing to the DPRCS2 area.
961 //
962 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
963 NumBytes += MFI.getObjectOffset(D8SpillFI);
964 } else
965 NumBytes = DPRCSOffset;
966
967 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
968 GPRCS2Push = LastPush = MBBI++;
969 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
970 }
971
972 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
973 if (STI.splitFramePointerPush(MF) && HasFP)
974 NeedsWinCFIStackAlloc = false;
975
976 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
977 uint32_t NumWords = NumBytes >> 2;
978
979 if (NumWords < 65536) {
980 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
981 .addImm(NumWords)
984 } else {
985 // Split into two instructions here, instead of using t2MOVi32imm,
986 // to allow inserting accurate SEH instructions (including accurate
987 // instruction size for each of them).
988 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
989 .addImm(NumWords & 0xffff)
992 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
993 .addReg(ARM::R4)
994 .addImm(NumWords >> 16)
997 }
998
999 switch (TM.getCodeModel()) {
1000 case CodeModel::Tiny:
1001 llvm_unreachable("Tiny code model not available on ARM.");
1002 case CodeModel::Small:
1003 case CodeModel::Medium:
1004 case CodeModel::Kernel:
1005 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1007 .addExternalSymbol("__chkstk")
1008 .addReg(ARM::R4, RegState::Implicit)
1010 break;
1011 case CodeModel::Large:
1012 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1013 .addExternalSymbol("__chkstk")
1015
1016 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1018 .addReg(ARM::R12, RegState::Kill)
1019 .addReg(ARM::R4, RegState::Implicit)
1021 break;
1022 }
1023
1024 MachineInstrBuilder Instr, SEH;
1025 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1026 .addReg(ARM::SP, RegState::Kill)
1027 .addReg(ARM::R4, RegState::Kill)
1030 .add(condCodeOp());
1031 if (NeedsWinCFIStackAlloc) {
1032 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1033 .addImm(NumBytes)
1034 .addImm(/*Wide=*/1)
1036 MBB.insertAfter(Instr, SEH);
1037 }
1038 NumBytes = 0;
1039 }
1040
1041 if (NumBytes) {
1042 // Adjust SP after all the callee-save spills.
1043 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1044 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1045 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1046 else {
1047 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1049 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1050 }
1051
1052 if (HasFP && isARM)
1053 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1054 // Note it's not safe to do this in Thumb2 mode because it would have
1055 // taken two instructions:
1056 // mov sp, r7
1057 // sub sp, #24
1058 // If an interrupt is taken between the two instructions, then sp is in
1059 // an inconsistent state (pointing to the middle of callee-saved area).
1060 // The interrupt handler can end up clobbering the registers.
1061 AFI->setShouldRestoreSPFromFP(true);
1062 }
1063
1064 // Set FP to point to the stack slot that contains the previous FP.
1065 // For iOS, FP is R7, which has now been stored in spill area 1.
1066 // Otherwise, if this is not iOS, all the callee-saved registers go
1067 // into spill area 1, including the FP in R11. In either case, it
1068 // is in area one and the adjustment needs to take place just after
1069 // that push.
1070 // FIXME: The above is not necessary true when PACBTI is enabled.
1071 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1072 // so FP ends up on area two.
1074 if (HasFP) {
1075 AfterPush = std::next(GPRCS1Push);
1076 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1077 int FPOffset = PushSize + FramePtrOffsetInPush;
1078 if (STI.splitFramePointerPush(MF)) {
1079 AfterPush = std::next(GPRCS2Push);
1080 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1081 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1082 } else {
1083 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1084 FramePtr, ARM::SP, FPOffset,
1086 }
1087 if (!NeedsWinCFI) {
1088 if (FramePtrOffsetInPush + PushSize != 0) {
1089 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1090 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1091 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1092 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1093 .addCFIIndex(CFIIndex)
1095 } else {
1096 unsigned CFIIndex =
1098 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1099 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1100 .addCFIIndex(CFIIndex)
1102 }
1103 }
1104 }
1105
1106 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1107 // instructions below don't need to be replayed to unwind the stack.
1108 if (NeedsWinCFI && MBBI != MBB.begin()) {
1110 if (HasFP && STI.splitFramePointerPush(MF))
1111 End = AfterPush;
1113 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1115 MF.setHasWinCFI(true);
1116 }
1117
1118 // Now that the prologue's actual instructions are finalised, we can insert
1119 // the necessary DWARF cf instructions to describe the situation. Start by
1120 // recording where each register ended up:
1121 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1122 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1123 int CFIIndex;
1124 for (const auto &Entry : CSI) {
1125 Register Reg = Entry.getReg();
1126 int FI = Entry.getFrameIdx();
1127 switch (Reg) {
1128 case ARM::R8:
1129 case ARM::R9:
1130 case ARM::R10:
1131 case ARM::R11:
1132 case ARM::R12:
1133 if (STI.splitFramePushPop(MF))
1134 break;
1135 [[fallthrough]];
1136 case ARM::R0:
1137 case ARM::R1:
1138 case ARM::R2:
1139 case ARM::R3:
1140 case ARM::R4:
1141 case ARM::R5:
1142 case ARM::R6:
1143 case ARM::R7:
1144 case ARM::LR:
1146 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1147 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148 .addCFIIndex(CFIIndex)
1150 break;
1151 }
1152 }
1153 }
1154
1155 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1156 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1157 for (const auto &Entry : CSI) {
1158 Register Reg = Entry.getReg();
1159 int FI = Entry.getFrameIdx();
1160 switch (Reg) {
1161 case ARM::R8:
1162 case ARM::R9:
1163 case ARM::R10:
1164 case ARM::R11:
1165 case ARM::R12:
1166 if (STI.splitFramePushPop(MF)) {
1167 unsigned DwarfReg = MRI->getDwarfRegNum(
1168 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1169 unsigned Offset = MFI.getObjectOffset(FI);
1170 unsigned CFIIndex = MF.addFrameInst(
1171 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1172 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1173 .addCFIIndex(CFIIndex)
1175 }
1176 break;
1177 }
1178 }
1179 }
1180
1181 if (DPRCSSize > 0 && !NeedsWinCFI) {
1182 // Since vpush register list cannot have gaps, there may be multiple vpush
1183 // instructions in the prologue.
1184 MachineBasicBlock::iterator Pos = std::next(LastPush);
1185 for (const auto &Entry : CSI) {
1186 Register Reg = Entry.getReg();
1187 int FI = Entry.getFrameIdx();
1188 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1189 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1190 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1191 unsigned Offset = MFI.getObjectOffset(FI);
1192 unsigned CFIIndex = MF.addFrameInst(
1193 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1194 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1195 .addCFIIndex(CFIIndex)
1197 }
1198 }
1199 }
1200
1201 // Now we can emit descriptions of where the canonical frame address was
1202 // throughout the process. If we have a frame pointer, it takes over the job
1203 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1204 // actually get emitted.
1205 if (!NeedsWinCFI)
1206 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1207
1208 if (STI.isTargetELF() && hasFP(MF))
1210 AFI->getFramePtrSpillOffset());
1211
1212 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1213 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1214 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1215 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1216 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1217
1218 // If we need dynamic stack realignment, do it here. Be paranoid and make
1219 // sure if we also have VLAs, we have a base pointer for frame access.
1220 // If aligned NEON registers were spilled, the stack has already been
1221 // realigned.
1222 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1223 Align MaxAlign = MFI.getMaxAlign();
1225 if (!AFI->isThumbFunction()) {
1226 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1227 false);
1228 } else {
1229 // We cannot use sp as source/dest register here, thus we're using r4 to
1230 // perform the calculations. We're emitting the following sequence:
1231 // mov r4, sp
1232 // -- use emitAligningInstructions to produce best sequence to zero
1233 // -- out lower bits in r4
1234 // mov sp, r4
1235 // FIXME: It will be better just to find spare register here.
1236 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1237 .addReg(ARM::SP, RegState::Kill)
1239 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1240 false);
1241 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1242 .addReg(ARM::R4, RegState::Kill)
1244 }
1245
1246 AFI->setShouldRestoreSPFromFP(true);
1247 }
1248
1249 // If we need a base pointer, set it up here. It's whatever the value
1250 // of the stack pointer is at this point. Any variable size objects
1251 // will be allocated after this, so we can still use the base pointer
1252 // to reference locals.
1253 // FIXME: Clarify FrameSetup flags here.
1254 if (RegInfo->hasBasePointer(MF)) {
1255 if (isARM)
1256 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1257 .addReg(ARM::SP)
1259 .add(condCodeOp());
1260 else
1261 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1262 .addReg(ARM::SP)
1264 }
1265
1266 // If the frame has variable sized objects then the epilogue must restore
1267 // the sp from fp. We can assume there's an FP here since hasFP already
1268 // checks for hasVarSizedObjects.
1269 if (MFI.hasVarSizedObjects())
1270 AFI->setShouldRestoreSPFromFP(true);
1271}
1272
1274 MachineBasicBlock &MBB) const {
1275 MachineFrameInfo &MFI = MF.getFrameInfo();
1277 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1278 const ARMBaseInstrInfo &TII =
1279 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1280 assert(!AFI->isThumb1OnlyFunction() &&
1281 "This emitEpilogue does not support Thumb1!");
1282 bool isARM = !AFI->isThumbFunction();
1283
1284 // Amount of stack space we reserved next to incoming args for either
1285 // varargs registers or stack arguments in tail calls made by this function.
1286 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1287
1288 // How much of the stack used by incoming arguments this function is expected
1289 // to restore in this particular epilogue.
1290 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1291 int NumBytes = (int)MFI.getStackSize();
1292 Register FramePtr = RegInfo->getFrameRegister(MF);
1293
1294 // All calls are tail calls in GHC calling conv, and functions have no
1295 // prologue/epilogue.
1297 return;
1298
1299 // First put ourselves on the first (from top) terminator instructions.
1301 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1302
1303 MachineBasicBlock::iterator RangeStart;
1304 if (!AFI->hasStackFrame()) {
1305 if (MF.hasWinCFI()) {
1306 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1308 RangeStart = initMBBRange(MBB, MBBI);
1309 }
1310
1311 if (NumBytes + IncomingArgStackToRestore != 0)
1312 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1313 NumBytes + IncomingArgStackToRestore,
1315 } else {
1316 // Unwind MBBI to point to first LDR / VLDRD.
1317 if (MBBI != MBB.begin()) {
1318 do {
1319 --MBBI;
1320 } while (MBBI != MBB.begin() &&
1322 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1323 ++MBBI;
1324 }
1325
1326 if (MF.hasWinCFI()) {
1327 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1329 RangeStart = initMBBRange(MBB, MBBI);
1330 }
1331
1332 // Move SP to start of FP callee save spill area.
1333 NumBytes -= (ReservedArgStack +
1334 AFI->getFPCXTSaveAreaSize() +
1339
1340 // Reset SP based on frame pointer only if the stack frame extends beyond
1341 // frame pointer stack slot or target is ELF and the function has FP.
1342 if (AFI->shouldRestoreSPFromFP()) {
1343 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1344 if (NumBytes) {
1345 if (isARM)
1346 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1347 ARMCC::AL, 0, TII,
1349 else {
1350 // It's not possible to restore SP from FP in a single instruction.
1351 // For iOS, this looks like:
1352 // mov sp, r7
1353 // sub sp, #24
1354 // This is bad, if an interrupt is taken after the mov, sp is in an
1355 // inconsistent state.
1356 // Use the first callee-saved register as a scratch register.
1357 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1358 "No scratch register to restore SP from FP!");
1359 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1361 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1362 .addReg(ARM::R4)
1365 }
1366 } else {
1367 // Thumb2 or ARM.
1368 if (isARM)
1369 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1372 .add(condCodeOp())
1374 else
1375 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1379 }
1380 } else if (NumBytes &&
1381 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1382 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1384
1385 // Increment past our save areas.
1387 MBBI++;
1388
1389 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1390 MBBI++;
1391 // Since vpop register list cannot have gaps, there may be multiple vpop
1392 // instructions in the epilogue.
1393 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1394 MBBI++;
1395 }
1396 if (AFI->getDPRCalleeSavedGapSize()) {
1397 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1398 "unexpected DPR alignment gap");
1399 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1401 }
1402
1404 MBBI++;
1405 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1406
1407 if (ReservedArgStack || IncomingArgStackToRestore) {
1408 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1409 "attempting to restore negative stack amount");
1410 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1411 ReservedArgStack + IncomingArgStackToRestore,
1413 }
1414
1415 // Validate PAC, It should have been already popped into R12. For CMSE entry
1416 // function, the validation instruction is emitted during expansion of the
1417 // tBXNS_RET, since the validation must use the value of SP at function
1418 // entry, before saving, resp. after restoring, FPCXTNS.
1419 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1420 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1421 }
1422
1423 if (MF.hasWinCFI()) {
1425 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1427 }
1428}
1429
1430/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1431/// debug info. It's the same as what we use for resolving the code-gen
1432/// references for now. FIXME: This can go wrong when references are
1433/// SP-relative and simple call frames aren't used.
1435 int FI,
1436 Register &FrameReg) const {
1437 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1438}
1439
1441 int FI, Register &FrameReg,
1442 int SPAdj) const {
1443 const MachineFrameInfo &MFI = MF.getFrameInfo();
1444 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1446 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1447 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1448 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1449 bool isFixed = MFI.isFixedObjectIndex(FI);
1450
1451 FrameReg = ARM::SP;
1452 Offset += SPAdj;
1453
1454 // SP can move around if there are allocas. We may also lose track of SP
1455 // when emergency spilling inside a non-reserved call frame setup.
1456 bool hasMovingSP = !hasReservedCallFrame(MF);
1457
1458 // When dynamically realigning the stack, use the frame pointer for
1459 // parameters, and the stack/base pointer for locals.
1460 if (RegInfo->hasStackRealignment(MF)) {
1461 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1462 if (isFixed) {
1463 FrameReg = RegInfo->getFrameRegister(MF);
1464 Offset = FPOffset;
1465 } else if (hasMovingSP) {
1466 assert(RegInfo->hasBasePointer(MF) &&
1467 "VLAs and dynamic stack alignment, but missing base pointer!");
1468 FrameReg = RegInfo->getBaseRegister();
1469 Offset -= SPAdj;
1470 }
1471 return Offset;
1472 }
1473
1474 // If there is a frame pointer, use it when we can.
1475 if (hasFP(MF) && AFI->hasStackFrame()) {
1476 // Use frame pointer to reference fixed objects. Use it for locals if
1477 // there are VLAs (and thus the SP isn't reliable as a base).
1478 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1479 FrameReg = RegInfo->getFrameRegister(MF);
1480 return FPOffset;
1481 } else if (hasMovingSP) {
1482 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1483 if (AFI->isThumb2Function()) {
1484 // Try to use the frame pointer if we can, else use the base pointer
1485 // since it's available. This is handy for the emergency spill slot, in
1486 // particular.
1487 if (FPOffset >= -255 && FPOffset < 0) {
1488 FrameReg = RegInfo->getFrameRegister(MF);
1489 return FPOffset;
1490 }
1491 }
1492 } else if (AFI->isThumbFunction()) {
1493 // Prefer SP to base pointer, if the offset is suitably aligned and in
1494 // range as the effective range of the immediate offset is bigger when
1495 // basing off SP.
1496 // Use add <rd>, sp, #<imm8>
1497 // ldr <rd>, [sp, #<imm8>]
1498 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1499 return Offset;
1500 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1501 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1502 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1503 FrameReg = RegInfo->getFrameRegister(MF);
1504 return FPOffset;
1505 }
1506 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1507 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1508 FrameReg = RegInfo->getFrameRegister(MF);
1509 return FPOffset;
1510 }
1511 }
1512 // Use the base pointer if we have one.
1513 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1514 // That can happen if we forced a base pointer for a large call frame.
1515 if (RegInfo->hasBasePointer(MF)) {
1516 FrameReg = RegInfo->getBaseRegister();
1517 Offset -= SPAdj;
1518 }
1519 return Offset;
1520}
1521
1522void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1525 unsigned StmOpc, unsigned StrOpc,
1526 bool NoGap, bool (*Func)(unsigned, bool),
1527 unsigned NumAlignedDPRCS2Regs,
1528 unsigned MIFlags) const {
1529 MachineFunction &MF = *MBB.getParent();
1532
1533 DebugLoc DL;
1534
1535 using RegAndKill = std::pair<unsigned, bool>;
1536
1538 unsigned i = CSI.size();
1539 while (i != 0) {
1540 unsigned LastReg = 0;
1541 for (; i != 0; --i) {
1542 Register Reg = CSI[i-1].getReg();
1543 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1544
1545 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1546 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1547 continue;
1548
1549 const MachineRegisterInfo &MRI = MF.getRegInfo();
1550 bool isLiveIn = MRI.isLiveIn(Reg);
1551 if (!isLiveIn && !MRI.isReserved(Reg))
1552 MBB.addLiveIn(Reg);
1553 // If NoGap is true, push consecutive registers and then leave the rest
1554 // for other instructions. e.g.
1555 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1556 if (NoGap && LastReg && LastReg != Reg-1)
1557 break;
1558 LastReg = Reg;
1559 // Do not set a kill flag on values that are also marked as live-in. This
1560 // happens with the @llvm-returnaddress intrinsic and with arguments
1561 // passed in callee saved registers.
1562 // Omitting the kill flags is conservatively correct even if the live-in
1563 // is not used after all.
1564 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1565 }
1566
1567 if (Regs.empty())
1568 continue;
1569
1570 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1571 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1572 });
1573
1574 if (Regs.size() > 1 || StrOpc== 0) {
1575 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1576 .addReg(ARM::SP)
1577 .setMIFlags(MIFlags)
1579 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1580 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1581 } else if (Regs.size() == 1) {
1582 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1583 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1584 .addReg(ARM::SP)
1585 .setMIFlags(MIFlags)
1586 .addImm(-4)
1588 }
1589 Regs.clear();
1590
1591 // Put any subsequent vpush instructions before this one: they will refer to
1592 // higher register numbers so need to be pushed first in order to preserve
1593 // monotonicity.
1594 if (MI != MBB.begin())
1595 --MI;
1596 }
1597}
1598
1599void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1602 unsigned LdmOpc, unsigned LdrOpc,
1603 bool isVarArg, bool NoGap,
1604 bool (*Func)(unsigned, bool),
1605 unsigned NumAlignedDPRCS2Regs) const {
1606 MachineFunction &MF = *MBB.getParent();
1610 bool hasPAC = AFI->shouldSignReturnAddress();
1611 DebugLoc DL;
1612 bool isTailCall = false;
1613 bool isInterrupt = false;
1614 bool isTrap = false;
1615 bool isCmseEntry = false;
1616 if (MBB.end() != MI) {
1617 DL = MI->getDebugLoc();
1618 unsigned RetOpcode = MI->getOpcode();
1619 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1620 isInterrupt =
1621 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1622 isTrap =
1623 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1624 RetOpcode == ARM::tTRAP;
1625 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1626 }
1627
1629 unsigned i = CSI.size();
1630 while (i != 0) {
1631 unsigned LastReg = 0;
1632 bool DeleteRet = false;
1633 for (; i != 0; --i) {
1634 CalleeSavedInfo &Info = CSI[i-1];
1635 Register Reg = Info.getReg();
1636 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1637
1638 // The aligned reloads from area DPRCS2 are not inserted here.
1639 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1640 continue;
1641 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1642 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1643 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1645 Reg = ARM::PC;
1646 // Fold the return instruction into the LDM.
1647 DeleteRet = true;
1648 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1649 // We 'restore' LR into PC so it is not live out of the return block:
1650 // Clear Restored bit.
1651 Info.setRestored(false);
1652 }
1653
1654 // If NoGap is true, pop consecutive registers and then leave the rest
1655 // for other instructions. e.g.
1656 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1657 if (NoGap && LastReg && LastReg != Reg-1)
1658 break;
1659
1660 LastReg = Reg;
1661 Regs.push_back(Reg);
1662 }
1663
1664 if (Regs.empty())
1665 continue;
1666
1667 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1668 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1669 });
1670
1671 if (Regs.size() > 1 || LdrOpc == 0) {
1672 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1673 .addReg(ARM::SP)
1676 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1677 MIB.addReg(Regs[i], getDefRegState(true));
1678 if (DeleteRet) {
1679 if (MI != MBB.end()) {
1680 MIB.copyImplicitOps(*MI);
1681 MI->eraseFromParent();
1682 }
1683 }
1684 MI = MIB;
1685 } else if (Regs.size() == 1) {
1686 // If we adjusted the reg to PC from LR above, switch it back here. We
1687 // only do that for LDM.
1688 if (Regs[0] == ARM::PC)
1689 Regs[0] = ARM::LR;
1691 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1692 .addReg(ARM::SP, RegState::Define)
1693 .addReg(ARM::SP)
1695 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1696 // that refactoring is complete (eventually).
1697 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1698 MIB.addReg(0);
1700 } else
1701 MIB.addImm(4);
1702 MIB.add(predOps(ARMCC::AL));
1703 }
1704 Regs.clear();
1705
1706 // Put any subsequent vpop instructions after this one: they will refer to
1707 // higher register numbers so need to be popped afterwards.
1708 if (MI != MBB.end())
1709 ++MI;
1710 }
1711}
1712
1713/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1714/// starting from d8. Also insert stack realignment code and leave the stack
1715/// pointer pointing to the d8 spill slot.
1718 unsigned NumAlignedDPRCS2Regs,
1720 const TargetRegisterInfo *TRI) {
1721 MachineFunction &MF = *MBB.getParent();
1723 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1725 MachineFrameInfo &MFI = MF.getFrameInfo();
1726
1727 // Mark the D-register spill slots as properly aligned. Since MFI computes
1728 // stack slot layout backwards, this can actually mean that the d-reg stack
1729 // slot offsets can be wrong. The offset for d8 will always be correct.
1730 for (const CalleeSavedInfo &I : CSI) {
1731 unsigned DNum = I.getReg() - ARM::D8;
1732 if (DNum > NumAlignedDPRCS2Regs - 1)
1733 continue;
1734 int FI = I.getFrameIdx();
1735 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1736 // registers will be 8-byte aligned.
1737 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1738
1739 // The stack slot for D8 needs to be maximally aligned because this is
1740 // actually the point where we align the stack pointer. MachineFrameInfo
1741 // computes all offsets relative to the incoming stack pointer which is a
1742 // bit weird when realigning the stack. Any extra padding for this
1743 // over-alignment is not realized because the code inserted below adjusts
1744 // the stack pointer by numregs * 8 before aligning the stack pointer.
1745 if (DNum == 0)
1746 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1747 }
1748
1749 // Move the stack pointer to the d8 spill slot, and align it at the same
1750 // time. Leave the stack slot address in the scratch register r4.
1751 //
1752 // sub r4, sp, #numregs * 8
1753 // bic r4, r4, #align - 1
1754 // mov sp, r4
1755 //
1756 bool isThumb = AFI->isThumbFunction();
1757 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1758 AFI->setShouldRestoreSPFromFP(true);
1759
1760 // sub r4, sp, #numregs * 8
1761 // The immediate is <= 64, so it doesn't need any special encoding.
1762 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1763 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1764 .addReg(ARM::SP)
1765 .addImm(8 * NumAlignedDPRCS2Regs)
1767 .add(condCodeOp());
1768
1769 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1770 // We must set parameter MustBeSingleInstruction to true, since
1771 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1772 // stack alignment. Luckily, this can always be done since all ARM
1773 // architecture versions that support Neon also support the BFC
1774 // instruction.
1775 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1776
1777 // mov sp, r4
1778 // The stack pointer must be adjusted before spilling anything, otherwise
1779 // the stack slots could be clobbered by an interrupt handler.
1780 // Leave r4 live, it is used below.
1781 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1782 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1783 .addReg(ARM::R4)
1785 if (!isThumb)
1786 MIB.add(condCodeOp());
1787
1788 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1789 // r4 holds the stack slot address.
1790 unsigned NextReg = ARM::D8;
1791
1792 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1793 // The writeback is only needed when emitting two vst1.64 instructions.
1794 if (NumAlignedDPRCS2Regs >= 6) {
1795 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1796 &ARM::QQPRRegClass);
1797 MBB.addLiveIn(SupReg);
1798 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1799 .addReg(ARM::R4, RegState::Kill)
1800 .addImm(16)
1801 .addReg(NextReg)
1804 NextReg += 4;
1805 NumAlignedDPRCS2Regs -= 4;
1806 }
1807
1808 // We won't modify r4 beyond this point. It currently points to the next
1809 // register to be spilled.
1810 unsigned R4BaseReg = NextReg;
1811
1812 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1813 if (NumAlignedDPRCS2Regs >= 4) {
1814 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1815 &ARM::QQPRRegClass);
1816 MBB.addLiveIn(SupReg);
1817 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1818 .addReg(ARM::R4)
1819 .addImm(16)
1820 .addReg(NextReg)
1823 NextReg += 4;
1824 NumAlignedDPRCS2Regs -= 4;
1825 }
1826
1827 // 16-byte aligned vst1.64 with 2 d-regs.
1828 if (NumAlignedDPRCS2Regs >= 2) {
1829 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1830 &ARM::QPRRegClass);
1831 MBB.addLiveIn(SupReg);
1832 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1833 .addReg(ARM::R4)
1834 .addImm(16)
1835 .addReg(SupReg)
1837 NextReg += 2;
1838 NumAlignedDPRCS2Regs -= 2;
1839 }
1840
1841 // Finally, use a vanilla vstr.64 for the odd last register.
1842 if (NumAlignedDPRCS2Regs) {
1843 MBB.addLiveIn(NextReg);
1844 // vstr.64 uses addrmode5 which has an offset scale of 4.
1845 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1846 .addReg(NextReg)
1847 .addReg(ARM::R4)
1848 .addImm((NextReg - R4BaseReg) * 2)
1850 }
1851
1852 // The last spill instruction inserted should kill the scratch register r4.
1853 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1854}
1855
1856/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1857/// iterator to the following instruction.
1860 unsigned NumAlignedDPRCS2Regs) {
1861 // sub r4, sp, #numregs * 8
1862 // bic r4, r4, #align - 1
1863 // mov sp, r4
1864 ++MI; ++MI; ++MI;
1865 assert(MI->mayStore() && "Expecting spill instruction");
1866
1867 // These switches all fall through.
1868 switch(NumAlignedDPRCS2Regs) {
1869 case 7:
1870 ++MI;
1871 assert(MI->mayStore() && "Expecting spill instruction");
1872 [[fallthrough]];
1873 default:
1874 ++MI;
1875 assert(MI->mayStore() && "Expecting spill instruction");
1876 [[fallthrough]];
1877 case 1:
1878 case 2:
1879 case 4:
1880 assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1881 ++MI;
1882 }
1883 return MI;
1884}
1885
1886/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1887/// starting from d8. These instructions are assumed to execute while the
1888/// stack is still aligned, unlike the code inserted by emitPopInst.
1891 unsigned NumAlignedDPRCS2Regs,
1893 const TargetRegisterInfo *TRI) {
1894 MachineFunction &MF = *MBB.getParent();
1896 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1898
1899 // Find the frame index assigned to d8.
1900 int D8SpillFI = 0;
1901 for (const CalleeSavedInfo &I : CSI)
1902 if (I.getReg() == ARM::D8) {
1903 D8SpillFI = I.getFrameIdx();
1904 break;
1905 }
1906
1907 // Materialize the address of the d8 spill slot into the scratch register r4.
1908 // This can be fairly complicated if the stack frame is large, so just use
1909 // the normal frame index elimination mechanism to do it. This code runs as
1910 // the initial part of the epilog where the stack and base pointers haven't
1911 // been changed yet.
1912 bool isThumb = AFI->isThumbFunction();
1913 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1914
1915 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1916 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1917 .addFrameIndex(D8SpillFI)
1918 .addImm(0)
1920 .add(condCodeOp());
1921
1922 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1923 unsigned NextReg = ARM::D8;
1924
1925 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1926 if (NumAlignedDPRCS2Regs >= 6) {
1927 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1928 &ARM::QQPRRegClass);
1929 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1930 .addReg(ARM::R4, RegState::Define)
1931 .addReg(ARM::R4, RegState::Kill)
1932 .addImm(16)
1935 NextReg += 4;
1936 NumAlignedDPRCS2Regs -= 4;
1937 }
1938
1939 // We won't modify r4 beyond this point. It currently points to the next
1940 // register to be spilled.
1941 unsigned R4BaseReg = NextReg;
1942
1943 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1944 if (NumAlignedDPRCS2Regs >= 4) {
1945 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1946 &ARM::QQPRRegClass);
1947 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1948 .addReg(ARM::R4)
1949 .addImm(16)
1952 NextReg += 4;
1953 NumAlignedDPRCS2Regs -= 4;
1954 }
1955
1956 // 16-byte aligned vld1.64 with 2 d-regs.
1957 if (NumAlignedDPRCS2Regs >= 2) {
1958 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1959 &ARM::QPRRegClass);
1960 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1961 .addReg(ARM::R4)
1962 .addImm(16)
1964 NextReg += 2;
1965 NumAlignedDPRCS2Regs -= 2;
1966 }
1967
1968 // Finally, use a vanilla vldr.64 for the remaining odd register.
1969 if (NumAlignedDPRCS2Regs)
1970 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1971 .addReg(ARM::R4)
1972 .addImm(2 * (NextReg - R4BaseReg))
1974
1975 // Last store kills r4.
1976 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1977}
1978
1982 if (CSI.empty())
1983 return false;
1984
1985 MachineFunction &MF = *MBB.getParent();
1987
1988 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1989 unsigned PushOneOpc = AFI->isThumbFunction() ?
1990 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1991 unsigned FltOpc = ARM::VSTMDDB_UPD;
1992 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1993 // Compute PAC in R12.
1994 if (AFI->shouldSignReturnAddress()) {
1995 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1997 }
1998 // Save the non-secure floating point context.
1999 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2000 return C.getReg() == ARM::FPCXTNS;
2001 })) {
2002 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2003 ARM::SP)
2004 .addReg(ARM::SP)
2005 .addImm(-4)
2007 }
2008 if (STI.splitFramePointerPush(MF)) {
2009 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2011 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2012 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2013 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2015 } else {
2016 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2018 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2020 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2021 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2022 }
2023
2024 // The code above does not insert spill code for the aligned DPRCS2 registers.
2025 // The stack realignment code will be inserted between the push instructions
2026 // and these spills.
2027 if (NumAlignedDPRCS2Regs)
2028 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2029
2030 return true;
2031}
2032
2036 if (CSI.empty())
2037 return false;
2038
2039 MachineFunction &MF = *MBB.getParent();
2041 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2042 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2043
2044 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2045 // registers. Do that here instead.
2046 if (NumAlignedDPRCS2Regs)
2047 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2048
2049 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2050 unsigned LdrOpc =
2051 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2052 unsigned FltOpc = ARM::VLDMDIA_UPD;
2053 if (STI.splitFramePointerPush(MF)) {
2054 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2056 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2057 NumAlignedDPRCS2Regs);
2058 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2060 } else {
2061 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2062 NumAlignedDPRCS2Regs);
2063 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2064 &isARMArea2Register, 0);
2065 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2066 &isARMArea1Register, 0);
2067 }
2068
2069 return true;
2070}
2071
2072// FIXME: Make generic?
2074 const ARMBaseInstrInfo &TII) {
2075 unsigned FnSize = 0;
2076 for (auto &MBB : MF) {
2077 for (auto &MI : MBB)
2078 FnSize += TII.getInstSizeInBytes(MI);
2079 }
2080 if (MF.getJumpTableInfo())
2081 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2082 FnSize += Table.MBBs.size() * 4;
2083 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2084 return FnSize;
2085}
2086
2087/// estimateRSStackSizeLimit - Look at each instruction that references stack
2088/// frames and return the stack size limit beyond which some of these
2089/// instructions will require a scratch register during their expansion later.
2090// FIXME: Move to TII?
2092 const TargetFrameLowering *TFI,
2093 bool &HasNonSPFrameIndex) {
2094 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2095 const ARMBaseInstrInfo &TII =
2096 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2098 unsigned Limit = (1 << 12) - 1;
2099 for (auto &MBB : MF) {
2100 for (auto &MI : MBB) {
2101 if (MI.isDebugInstr())
2102 continue;
2103 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2104 if (!MI.getOperand(i).isFI())
2105 continue;
2106
2107 // When using ADDri to get the address of a stack object, 255 is the
2108 // largest offset guaranteed to fit in the immediate offset.
2109 if (MI.getOpcode() == ARM::ADDri) {
2110 Limit = std::min(Limit, (1U << 8) - 1);
2111 break;
2112 }
2113 // t2ADDri will not require an extra register, it can reuse the
2114 // destination.
2115 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2116 break;
2117
2118 const MCInstrDesc &MCID = MI.getDesc();
2119 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2120 if (RegClass && !RegClass->contains(ARM::SP))
2121 HasNonSPFrameIndex = true;
2122
2123 // Otherwise check the addressing mode.
2124 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2126 case ARMII::AddrMode2:
2127 // Default 12 bit limit.
2128 break;
2129 case ARMII::AddrMode3:
2131 Limit = std::min(Limit, (1U << 8) - 1);
2132 break;
2134 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2135 break;
2136 case ARMII::AddrMode5:
2139 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2140 break;
2142 // i12 supports only positive offset so these will be converted to
2143 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2144 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2145 Limit = std::min(Limit, (1U << 8) - 1);
2146 break;
2147 case ARMII::AddrMode4:
2148 case ARMII::AddrMode6:
2149 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2150 // immediate offset for stack references.
2151 return 0;
2153 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2154 break;
2156 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2157 break;
2159 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2160 break;
2161 default:
2162 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2163 }
2164 break; // At most one FI per instruction
2165 }
2166 }
2167 }
2168
2169 return Limit;
2170}
2171
2172// In functions that realign the stack, it can be an advantage to spill the
2173// callee-saved vector registers after realigning the stack. The vst1 and vld1
2174// instructions take alignment hints that can improve performance.
2175static void
2177 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2179 return;
2180
2181 // Naked functions don't spill callee-saved registers.
2182 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2183 return;
2184
2185 // We are planning to use NEON instructions vst1 / vld1.
2186 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2187 return;
2188
2189 // Don't bother if the default stack alignment is sufficiently high.
2191 return;
2192
2193 // Aligned spills require stack realignment.
2194 if (!static_cast<const ARMBaseRegisterInfo *>(
2196 return;
2197
2198 // We always spill contiguous d-registers starting from d8. Count how many
2199 // needs spilling. The register allocator will almost always use the
2200 // callee-saved registers in order, but it can happen that there are holes in
2201 // the range. Registers above the hole will be spilled to the standard DPRCS
2202 // area.
2203 unsigned NumSpills = 0;
2204 for (; NumSpills < 8; ++NumSpills)
2205 if (!SavedRegs.test(ARM::D8 + NumSpills))
2206 break;
2207
2208 // Don't do this for just one d-register. It's not worth it.
2209 if (NumSpills < 2)
2210 return;
2211
2212 // Spill the first NumSpills D-registers after realigning the stack.
2213 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2214
2215 // A scratch register is required for the vst1 / vld1 instructions.
2216 SavedRegs.set(ARM::R4);
2217}
2218
2220 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2221 // upon function entry (resp. restore it immmediately before return)
2222 if (STI.hasV8_1MMainlineOps() &&
2224 return false;
2225
2226 // We are disabling shrinkwrapping for now when PAC is enabled, as
2227 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2228 // generated. A follow-up patch will fix this in a more performant manner.
2230 true /* SpillsLR */))
2231 return false;
2232
2233 return true;
2234}
2235
2237 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238 return Subtarget.createAAPCSFrameChainLeaf() ||
2239 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2240}
2241
2242// Thumb1 may require a spill when storing to a frame index through FP, for
2243// cases where FP is a high register (R11). This scans the function for cases
2244// where this may happen.
2246 const TargetFrameLowering &TFI) {
2247 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2248 if (!AFI->isThumb1OnlyFunction())
2249 return false;
2250
2251 for (const auto &MBB : MF)
2252 for (const auto &MI : MBB)
2253 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi)
2254 for (const auto &Op : MI.operands())
2255 if (Op.isFI()) {
2256 Register Reg;
2257 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2258 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2259 return true;
2260 }
2261 return false;
2262}
2263
2265 BitVector &SavedRegs,
2266 RegScavenger *RS) const {
2268 // This tells PEI to spill the FP as if it is any other callee-save register
2269 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2270 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2271 // to combine multiple loads / stores.
2272 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2273 bool CS1Spilled = false;
2274 bool LRSpilled = false;
2275 unsigned NumGPRSpills = 0;
2276 unsigned NumFPRSpills = 0;
2277 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2278 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2279 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2281 const ARMBaseInstrInfo &TII =
2282 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2284 MachineFrameInfo &MFI = MF.getFrameInfo();
2287 (void)TRI; // Silence unused warning in non-assert builds.
2288 Register FramePtr = RegInfo->getFrameRegister(MF);
2289
2290 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2291 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2292 // since it's not always possible to restore sp from fp in a single
2293 // instruction.
2294 // FIXME: It will be better just to find spare register here.
2295 if (AFI->isThumb2Function() &&
2296 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2297 SavedRegs.set(ARM::R4);
2298
2299 // If a stack probe will be emitted, spill R4 and LR, since they are
2300 // clobbered by the stack probe call.
2301 // This estimate should be a safe, conservative estimate. The actual
2302 // stack probe is enabled based on the size of the local objects;
2303 // this estimate also includes the varargs store size.
2304 if (STI.isTargetWindows() &&
2305 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2306 SavedRegs.set(ARM::R4);
2307 SavedRegs.set(ARM::LR);
2308 }
2309
2310 if (AFI->isThumb1OnlyFunction()) {
2311 // Spill LR if Thumb1 function uses variable length argument lists.
2312 if (AFI->getArgRegsSaveSize() > 0)
2313 SavedRegs.set(ARM::LR);
2314
2315 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2316 // requires stack alignment. We don't know for sure what the stack size
2317 // will be, but for this, an estimate is good enough. If there anything
2318 // changes it, it'll be a spill, which implies we've used all the registers
2319 // and so R4 is already used, so not marking it here will be OK.
2320 // FIXME: It will be better just to find spare register here.
2321 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2322 MFI.estimateStackSize(MF) > 508)
2323 SavedRegs.set(ARM::R4);
2324 }
2325
2326 // See if we can spill vector registers to aligned stack.
2327 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2328
2329 // Spill the BasePtr if it's used.
2330 if (RegInfo->hasBasePointer(MF))
2331 SavedRegs.set(RegInfo->getBaseRegister());
2332
2333 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2334 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2335 CanEliminateFrame = false;
2336
2337 // Don't spill FP if the frame can be eliminated. This is determined
2338 // by scanning the callee-save registers to see if any is modified.
2339 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2340 for (unsigned i = 0; CSRegs[i]; ++i) {
2341 unsigned Reg = CSRegs[i];
2342 bool Spilled = false;
2343 if (SavedRegs.test(Reg)) {
2344 Spilled = true;
2345 CanEliminateFrame = false;
2346 }
2347
2348 if (!ARM::GPRRegClass.contains(Reg)) {
2349 if (Spilled) {
2350 if (ARM::SPRRegClass.contains(Reg))
2351 NumFPRSpills++;
2352 else if (ARM::DPRRegClass.contains(Reg))
2353 NumFPRSpills += 2;
2354 else if (ARM::QPRRegClass.contains(Reg))
2355 NumFPRSpills += 4;
2356 }
2357 continue;
2358 }
2359
2360 if (Spilled) {
2361 NumGPRSpills++;
2362
2363 if (!STI.splitFramePushPop(MF)) {
2364 if (Reg == ARM::LR)
2365 LRSpilled = true;
2366 CS1Spilled = true;
2367 continue;
2368 }
2369
2370 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2371 switch (Reg) {
2372 case ARM::LR:
2373 LRSpilled = true;
2374 [[fallthrough]];
2375 case ARM::R0: case ARM::R1:
2376 case ARM::R2: case ARM::R3:
2377 case ARM::R4: case ARM::R5:
2378 case ARM::R6: case ARM::R7:
2379 CS1Spilled = true;
2380 break;
2381 default:
2382 break;
2383 }
2384 } else {
2385 if (!STI.splitFramePushPop(MF)) {
2386 UnspilledCS1GPRs.push_back(Reg);
2387 continue;
2388 }
2389
2390 switch (Reg) {
2391 case ARM::R0: case ARM::R1:
2392 case ARM::R2: case ARM::R3:
2393 case ARM::R4: case ARM::R5:
2394 case ARM::R6: case ARM::R7:
2395 case ARM::LR:
2396 UnspilledCS1GPRs.push_back(Reg);
2397 break;
2398 default:
2399 UnspilledCS2GPRs.push_back(Reg);
2400 break;
2401 }
2402 }
2403 }
2404
2405 bool ForceLRSpill = false;
2406 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2407 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2408 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2409 // use of BL to implement far jump.
2410 if (FnSize >= (1 << 11)) {
2411 CanEliminateFrame = false;
2412 ForceLRSpill = true;
2413 }
2414 }
2415
2416 // If any of the stack slot references may be out of range of an immediate
2417 // offset, make sure a register (or a spill slot) is available for the
2418 // register scavenger. Note that if we're indexing off the frame pointer, the
2419 // effective stack size is 4 bytes larger since the FP points to the stack
2420 // slot of the previous FP. Also, if we have variable sized objects in the
2421 // function, stack slot references will often be negative, and some of
2422 // our instructions are positive-offset only, so conservatively consider
2423 // that case to want a spill slot (or register) as well. Similarly, if
2424 // the function adjusts the stack pointer during execution and the
2425 // adjustments aren't already part of our stack size estimate, our offset
2426 // calculations may be off, so be conservative.
2427 // FIXME: We could add logic to be more precise about negative offsets
2428 // and which instructions will need a scratch register for them. Is it
2429 // worth the effort and added fragility?
2430 unsigned EstimatedStackSize =
2431 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2432
2433 // Determine biggest (positive) SP offset in MachineFrameInfo.
2434 int MaxFixedOffset = 0;
2435 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2436 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2437 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2438 }
2439
2440 bool HasFP = hasFP(MF);
2441 if (HasFP) {
2442 if (AFI->hasStackFrame())
2443 EstimatedStackSize += 4;
2444 } else {
2445 // If FP is not used, SP will be used to access arguments, so count the
2446 // size of arguments into the estimation.
2447 EstimatedStackSize += MaxFixedOffset;
2448 }
2449 EstimatedStackSize += 16; // For possible paddings.
2450
2451 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2452 bool HasNonSPFrameIndex = false;
2453 if (AFI->isThumb1OnlyFunction()) {
2454 // For Thumb1, don't bother to iterate over the function. The only
2455 // instruction that requires an emergency spill slot is a store to a
2456 // frame index.
2457 //
2458 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2459 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2460 // a 5-bit unsigned immediate.
2461 //
2462 // We could try to check if the function actually contains a tSTRspi
2463 // that might need the spill slot, but it's not really important.
2464 // Functions with VLAs or extremely large call frames are rare, and
2465 // if a function is allocating more than 1KB of stack, an extra 4-byte
2466 // slot probably isn't relevant.
2467 //
2468 // A special case is the scenario where r11 is used as FP, where accesses
2469 // to a frame index will require its value to be moved into a low reg.
2470 // This is handled later on, once we are able to determine if we have any
2471 // fp-relative accesses.
2472 if (RegInfo->hasBasePointer(MF))
2473 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2474 else
2475 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2476 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2477 } else {
2478 EstimatedRSStackSizeLimit =
2479 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2480 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2481 }
2482 // Final estimate of whether sp or bp-relative accesses might require
2483 // scavenging.
2484 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2485
2486 // If the stack pointer moves and we don't have a base pointer, the
2487 // estimate logic doesn't work. The actual offsets might be larger when
2488 // we're constructing a call frame, or we might need to use negative
2489 // offsets from fp.
2490 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2491 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2492 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2493
2494 // If we have a frame pointer, we assume arguments will be accessed
2495 // relative to the frame pointer. Check whether fp-relative accesses to
2496 // arguments require scavenging.
2497 //
2498 // We could do slightly better on Thumb1; in some cases, an sp-relative
2499 // offset would be legal even though an fp-relative offset is not.
2500 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2501 bool HasLargeArgumentList =
2502 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2503
2504 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2505 HasLargeArgumentList || HasNonSPFrameIndex;
2506 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2507 << "; EstimatedStack: " << EstimatedStackSize
2508 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2509 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2510 if (BigFrameOffsets ||
2511 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2512 AFI->setHasStackFrame(true);
2513
2514 if (HasFP) {
2515 SavedRegs.set(FramePtr);
2516 // If the frame pointer is required by the ABI, also spill LR so that we
2517 // emit a complete frame record.
2518 if ((requiresAAPCSFrameRecord(MF) ||
2520 !LRSpilled) {
2521 SavedRegs.set(ARM::LR);
2522 LRSpilled = true;
2523 NumGPRSpills++;
2524 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2525 if (LRPos != UnspilledCS1GPRs.end())
2526 UnspilledCS1GPRs.erase(LRPos);
2527 }
2528 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2529 if (FPPos != UnspilledCS1GPRs.end())
2530 UnspilledCS1GPRs.erase(FPPos);
2531 NumGPRSpills++;
2532 if (FramePtr == ARM::R7)
2533 CS1Spilled = true;
2534 }
2535
2536 // This is true when we inserted a spill for a callee-save GPR which is
2537 // not otherwise used by the function. This guaranteees it is possible
2538 // to scavenge a register to hold the address of a stack slot. On Thumb1,
2539 // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
2540 // subtargets, this is any GPR, i.e. r4-r11 or lr.
2541 //
2542 // If we don't insert a spill, we instead allocate an emergency spill
2543 // slot, which can be used by scavenging to spill an arbitrary register.
2544 //
2545 // We currently don't try to figure out whether any specific instruction
2546 // requires scavening an additional register.
2547 bool ExtraCSSpill = false;
2548
2549 if (AFI->isThumb1OnlyFunction()) {
2550 // For Thumb1-only targets, we need some low registers when we save and
2551 // restore the high registers (which aren't allocatable, but could be
2552 // used by inline assembly) because the push/pop instructions can not
2553 // access high registers. If necessary, we might need to push more low
2554 // registers to ensure that there is at least one free that can be used
2555 // for the saving & restoring, and preferably we should ensure that as
2556 // many as are needed are available so that fewer push/pop instructions
2557 // are required.
2558
2559 // Low registers which are not currently pushed, but could be (r4-r7).
2560 SmallVector<unsigned, 4> AvailableRegs;
2561
2562 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2563 // free.
2564 int EntryRegDeficit = 0;
2565 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2566 if (!MF.getRegInfo().isLiveIn(Reg)) {
2567 --EntryRegDeficit;
2569 << printReg(Reg, TRI)
2570 << " is unused argument register, EntryRegDeficit = "
2571 << EntryRegDeficit << "\n");
2572 }
2573 }
2574
2575 // Unused return registers can be clobbered in the epilogue for free.
2576 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2578 << " return regs used, ExitRegDeficit = "
2579 << ExitRegDeficit << "\n");
2580
2581 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2582 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2583
2584 // r4-r6 can be used in the prologue if they are pushed by the first push
2585 // instruction.
2586 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2587 if (SavedRegs.test(Reg)) {
2588 --RegDeficit;
2589 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2590 << " is saved low register, RegDeficit = "
2591 << RegDeficit << "\n");
2592 } else {
2593 AvailableRegs.push_back(Reg);
2594 LLVM_DEBUG(
2595 dbgs()
2596 << printReg(Reg, TRI)
2597 << " is non-saved low register, adding to AvailableRegs\n");
2598 }
2599 }
2600
2601 // r7 can be used if it is not being used as the frame pointer.
2602 if (!HasFP || FramePtr != ARM::R7) {
2603 if (SavedRegs.test(ARM::R7)) {
2604 --RegDeficit;
2605 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2606 << RegDeficit << "\n");
2607 } else {
2608 AvailableRegs.push_back(ARM::R7);
2609 LLVM_DEBUG(
2610 dbgs()
2611 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2612 }
2613 }
2614
2615 // Each of r8-r11 needs to be copied to a low register, then pushed.
2616 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2617 if (SavedRegs.test(Reg)) {
2618 ++RegDeficit;
2619 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2620 << " is saved high register, RegDeficit = "
2621 << RegDeficit << "\n");
2622 }
2623 }
2624
2625 // LR can only be used by PUSH, not POP, and can't be used at all if the
2626 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2627 // are more limited at function entry than exit.
2628 if ((EntryRegDeficit > ExitRegDeficit) &&
2629 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2631 if (SavedRegs.test(ARM::LR)) {
2632 --RegDeficit;
2633 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2634 << RegDeficit << "\n");
2635 } else {
2636 AvailableRegs.push_back(ARM::LR);
2637 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2638 }
2639 }
2640
2641 // If there are more high registers that need pushing than low registers
2642 // available, push some more low registers so that we can use fewer push
2643 // instructions. This might not reduce RegDeficit all the way to zero,
2644 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2645 // need saving.
2646 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2647 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2648 unsigned Reg = AvailableRegs.pop_back_val();
2649 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2650 << " to make up reg deficit\n");
2651 SavedRegs.set(Reg);
2652 NumGPRSpills++;
2653 CS1Spilled = true;
2654 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2655 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2656 ExtraCSSpill = true;
2657 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2658 if (Reg == ARM::LR)
2659 LRSpilled = true;
2660 }
2661 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2662 << "\n");
2663 }
2664
2665 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2666 // restore LR in that case.
2667 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2668
2669 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2670 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2671 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2672 SavedRegs.set(ARM::LR);
2673 NumGPRSpills++;
2675 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2676 if (LRPos != UnspilledCS1GPRs.end())
2677 UnspilledCS1GPRs.erase(LRPos);
2678
2679 ForceLRSpill = false;
2680 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2681 !AFI->isThumb1OnlyFunction())
2682 ExtraCSSpill = true;
2683 }
2684
2685 // If stack and double are 8-byte aligned and we are spilling an odd number
2686 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2687 // the integer and double callee save areas.
2688 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2689 const Align TargetAlign = getStackAlign();
2690 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2691 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2692 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2693 unsigned Reg = UnspilledCS1GPRs[i];
2694 // Don't spill high register if the function is thumb. In the case of
2695 // Windows on ARM, accept R11 (frame pointer)
2696 if (!AFI->isThumbFunction() ||
2697 (STI.isTargetWindows() && Reg == ARM::R11) ||
2698 isARMLowRegister(Reg) ||
2699 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2700 SavedRegs.set(Reg);
2701 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2702 << " to make up alignment\n");
2703 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2704 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2705 ExtraCSSpill = true;
2706 break;
2707 }
2708 }
2709 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2710 unsigned Reg = UnspilledCS2GPRs.front();
2711 SavedRegs.set(Reg);
2712 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2713 << " to make up alignment\n");
2714 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2715 ExtraCSSpill = true;
2716 }
2717 }
2718
2719 // Estimate if we might need to scavenge a register at some point in order
2720 // to materialize a stack offset. If so, either spill one additional
2721 // callee-saved register or reserve a special spill slot to facilitate
2722 // register scavenging. Thumb1 needs a spill slot for stack pointer
2723 // adjustments and for frame index accesses when FP is high register,
2724 // even when the frame itself is small.
2725 if (!ExtraCSSpill &&
2726 (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) {
2727 // If any non-reserved CS register isn't spilled, just spill one or two
2728 // extra. That should take care of it!
2729 unsigned NumExtras = TargetAlign.value() / 4;
2731 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2732 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2733 if (!MRI.isReserved(Reg) &&
2734 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2735 Extras.push_back(Reg);
2736 NumExtras--;
2737 }
2738 }
2739 // For non-Thumb1 functions, also check for hi-reg CS registers
2740 if (!AFI->isThumb1OnlyFunction()) {
2741 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2742 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2743 if (!MRI.isReserved(Reg)) {
2744 Extras.push_back(Reg);
2745 NumExtras--;
2746 }
2747 }
2748 }
2749 if (NumExtras == 0) {
2750 for (unsigned Reg : Extras) {
2751 SavedRegs.set(Reg);
2752 if (!MRI.isPhysRegUsed(Reg))
2753 ExtraCSSpill = true;
2754 }
2755 }
2756 if (!ExtraCSSpill && RS) {
2757 // Reserve a slot closest to SP or frame pointer.
2758 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2759 const TargetRegisterClass &RC = ARM::GPRRegClass;
2760 unsigned Size = TRI->getSpillSize(RC);
2761 Align Alignment = TRI->getSpillAlign(RC);
2763 MFI.CreateStackObject(Size, Alignment, false));
2764 }
2765 }
2766 }
2767
2768 if (ForceLRSpill)
2769 SavedRegs.set(ARM::LR);
2770 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2771}
2772
2774 BitVector &SavedRegs) const {
2776
2777 // If we have the "returned" parameter attribute which guarantees that we
2778 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2779 // record that fact for IPRA.
2780 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2781 if (AFI->getPreservesR0())
2782 SavedRegs.set(ARM::R0);
2783}
2784
2787 std::vector<CalleeSavedInfo> &CSI) const {
2788 // For CMSE entry functions, handle floating-point context as if it was a
2789 // callee-saved register.
2790 if (STI.hasV8_1MMainlineOps() &&
2792 CSI.emplace_back(ARM::FPCXTNS);
2793 CSI.back().setRestored(false);
2794 }
2795
2796 // For functions, which sign their return address, upon function entry, the
2797 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2798 // in this case.
2799 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2800 if (AFI.shouldSignReturnAddress()) {
2801 // The order of register must match the order we push them, because the
2802 // PEI assigns frame indices in that order. When compiling for return
2803 // address sign and authenication, we use split push, therefore the orders
2804 // we want are:
2805 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2806 CSI.insert(find_if(CSI,
2807 [=](const auto &CS) {
2808 Register Reg = CS.getReg();
2809 return Reg == ARM::R10 || Reg == ARM::R11 ||
2810 Reg == ARM::R8 || Reg == ARM::R9 ||
2811 ARM::DPRRegClass.contains(Reg);
2812 }),
2813 CalleeSavedInfo(ARM::R12));
2814 }
2815
2816 return false;
2817}
2818
2821 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2822 NumEntries = std::size(FixedSpillOffsets);
2823 return FixedSpillOffsets;
2824}
2825
2826MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2829 const ARMBaseInstrInfo &TII =
2830 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2832 bool isARM = !AFI->isThumbFunction();
2833 DebugLoc dl = I->getDebugLoc();
2834 unsigned Opc = I->getOpcode();
2835 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2836 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2837
2838 assert(!AFI->isThumb1OnlyFunction() &&
2839 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2840
2841 int PIdx = I->findFirstPredOperandIdx();
2842 ARMCC::CondCodes Pred = (PIdx == -1)
2843 ? ARMCC::AL
2844 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2845 unsigned PredReg = TII.getFramePred(*I);
2846
2847 if (!hasReservedCallFrame(MF)) {
2848 // Bail early if the callee is expected to do the adjustment.
2849 if (IsDestroy && CalleePopAmount != -1U)
2850 return MBB.erase(I);
2851
2852 // If we have alloca, convert as follows:
2853 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2854 // ADJCALLSTACKUP -> add, sp, sp, amount
2855 unsigned Amount = TII.getFrameSize(*I);
2856 if (Amount != 0) {
2857 // We need to keep the stack aligned properly. To do this, we round the
2858 // amount of space needed for the outgoing arguments up to the next
2859 // alignment boundary.
2860 Amount = alignSPAdjust(Amount);
2861
2862 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2863 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2864 Pred, PredReg);
2865 } else {
2866 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2867 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2868 Pred, PredReg);
2869 }
2870 }
2871 } else if (CalleePopAmount != -1U) {
2872 // If the calling convention demands that the callee pops arguments from the
2873 // stack, we want to add it back if we have a reserved call frame.
2874 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2875 MachineInstr::NoFlags, Pred, PredReg);
2876 }
2877 return MBB.erase(I);
2878}
2879
2880/// Get the minimum constant for ARM that is greater than or equal to the
2881/// argument. In ARM, constants can have any value that can be produced by
2882/// rotating an 8-bit value to the right by an even number of bits within a
2883/// 32-bit word.
2885 unsigned Shifted = 0;
2886
2887 if (Value == 0)
2888 return 0;
2889
2890 while (!(Value & 0xC0000000)) {
2891 Value = Value << 2;
2892 Shifted += 2;
2893 }
2894
2895 bool Carry = (Value & 0x00FFFFFF);
2896 Value = ((Value & 0xFF000000) >> 24) + Carry;
2897
2898 if (Value & 0x0000100)
2899 Value = Value & 0x000001FC;
2900
2901 if (Shifted > 24)
2902 Value = Value >> (Shifted - 24);
2903 else
2904 Value = Value << (24 - Shifted);
2905
2906 return Value;
2907}
2908
2909// The stack limit in the TCB is set to this many bytes above the actual
2910// stack limit.
2912
2913// Adjust the function prologue to enable split stacks. This currently only
2914// supports android and linux.
2915//
2916// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2917// must be well defined in order to allow for consistent implementations of the
2918// __morestack helper function. The ABI is also not a normal ABI in that it
2919// doesn't follow the normal calling conventions because this allows the
2920// prologue of each function to be optimized further.
2921//
2922// Currently, the ABI looks like (when calling __morestack)
2923//
2924// * r4 holds the minimum stack size requested for this function call
2925// * r5 holds the stack size of the arguments to the function
2926// * the beginning of the function is 3 instructions after the call to
2927// __morestack
2928//
2929// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2930// place the arguments on to the new stack, and the 3-instruction knowledge to
2931// jump directly to the body of the function when working on the new stack.
2932//
2933// An old (and possibly no longer compatible) implementation of __morestack for
2934// ARM can be found at [1].
2935//
2936// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2938 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2939 unsigned Opcode;
2940 unsigned CFIIndex;
2941 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2942 bool Thumb = ST->isThumb();
2943 bool Thumb2 = ST->isThumb2();
2944
2945 // Sadly, this currently doesn't support varargs, platforms other than
2946 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2947 if (MF.getFunction().isVarArg())
2948 report_fatal_error("Segmented stacks do not support vararg functions.");
2949 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2950 report_fatal_error("Segmented stacks not supported on this platform.");
2951
2952 MachineFrameInfo &MFI = MF.getFrameInfo();
2953 MachineModuleInfo &MMI = MF.getMMI();
2954 MCContext &Context = MMI.getContext();
2955 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2956 const ARMBaseInstrInfo &TII =
2957 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2959 DebugLoc DL;
2960
2961 if (!MFI.needsSplitStackProlog())
2962 return;
2963
2964 uint64_t StackSize = MFI.getStackSize();
2965
2966 // Use R4 and R5 as scratch registers.
2967 // We save R4 and R5 before use and restore them before leaving the function.
2968 unsigned ScratchReg0 = ARM::R4;
2969 unsigned ScratchReg1 = ARM::R5;
2970 uint64_t AlignedStackSize;
2971
2972 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2973 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2977
2978 // Grab everything that reaches PrologueMBB to update there liveness as well.
2979 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2981 WalkList.push_back(&PrologueMBB);
2982
2983 do {
2984 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
2985 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
2986 if (BeforePrologueRegion.insert(PredBB).second)
2987 WalkList.push_back(PredBB);
2988 }
2989 } while (!WalkList.empty());
2990
2991 // The order in that list is important.
2992 // The blocks will all be inserted before PrologueMBB using that order.
2993 // Therefore the block that should appear first in the CFG should appear
2994 // first in the list.
2995 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
2996 PostStackMBB};
2997
2998 for (MachineBasicBlock *B : AddedBlocks)
2999 BeforePrologueRegion.insert(B);
3000
3001 for (const auto &LI : PrologueMBB.liveins()) {
3002 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3003 PredBB->addLiveIn(LI);
3004 }
3005
3006 // Remove the newly added blocks from the list, since we know
3007 // we do not have to do the following updates for them.
3008 for (MachineBasicBlock *B : AddedBlocks) {
3009 BeforePrologueRegion.erase(B);
3010 MF.insert(PrologueMBB.getIterator(), B);
3011 }
3012
3013 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3014 // Make sure the LiveIns are still sorted and unique.
3016 // Replace the edges to PrologueMBB by edges to the sequences
3017 // we are about to add, but only update for immediate predecessors.
3018 if (MBB->isSuccessor(&PrologueMBB))
3019 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3020 }
3021
3022 // The required stack size that is aligned to ARM constant criterion.
3023 AlignedStackSize = alignToARMConstant(StackSize);
3024
3025 // When the frame size is less than 256 we just compare the stack
3026 // boundary directly to the value of the stack pointer, per gcc.
3027 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3028
3029 // We will use two of the callee save registers as scratch registers so we
3030 // need to save those registers onto the stack.
3031 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3032 // requested and arguments for __morestack().
3033 // SR0: Scratch Register #0
3034 // SR1: Scratch Register #1
3035 // push {SR0, SR1}
3036 if (Thumb) {
3037 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3039 .addReg(ScratchReg0)
3040 .addReg(ScratchReg1);
3041 } else {
3042 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3043 .addReg(ARM::SP, RegState::Define)
3044 .addReg(ARM::SP)
3046 .addReg(ScratchReg0)
3047 .addReg(ScratchReg1);
3048 }
3049
3050 // Emit the relevant DWARF information about the change in stack pointer as
3051 // well as where to find both r4 and r5 (the callee-save registers)
3052 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3053 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3054 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3055 .addCFIIndex(CFIIndex);
3057 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3058 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3059 .addCFIIndex(CFIIndex);
3061 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3062 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3063 .addCFIIndex(CFIIndex);
3064 }
3065
3066 // mov SR1, sp
3067 if (Thumb) {
3068 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3069 .addReg(ARM::SP)
3071 } else if (CompareStackPointer) {
3072 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3073 .addReg(ARM::SP)
3075 .add(condCodeOp());
3076 }
3077
3078 // sub SR1, sp, #StackSize
3079 if (!CompareStackPointer && Thumb) {
3080 if (AlignedStackSize < 256) {
3081 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3082 .add(condCodeOp())
3083 .addReg(ScratchReg1)
3084 .addImm(AlignedStackSize)
3086 } else {
3087 if (Thumb2) {
3088 BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3089 .addImm(AlignedStackSize);
3090 } else {
3091 auto MBBI = McrMBB->end();
3092 auto RegInfo = STI.getRegisterInfo();
3093 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3094 AlignedStackSize);
3095 }
3096 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3097 .add(condCodeOp())
3098 .addReg(ScratchReg1)
3099 .addReg(ScratchReg0)
3101 }
3102 } else if (!CompareStackPointer) {
3103 if (AlignedStackSize < 256) {
3104 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3105 .addReg(ARM::SP)
3106 .addImm(AlignedStackSize)
3108 .add(condCodeOp());
3109 } else {
3110 auto MBBI = McrMBB->end();
3111 auto RegInfo = STI.getRegisterInfo();
3112 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3113 AlignedStackSize);
3114 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3115 .addReg(ARM::SP)
3116 .addReg(ScratchReg0)
3118 .add(condCodeOp());
3119 }
3120 }
3121
3122 if (Thumb && ST->isThumb1Only()) {
3123 unsigned PCLabelId = ARMFI->createPICLabelUId();
3125 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3127 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3128
3129 // ldr SR0, [pc, offset(STACK_LIMIT)]
3130 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3133
3134 // ldr SR0, [SR0]
3135 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3136 .addReg(ScratchReg0)
3137 .addImm(0)
3139 } else {
3140 // Get TLS base address from the coprocessor
3141 // mrc p15, #0, SR0, c13, c0, #3
3142 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3143 ScratchReg0)
3144 .addImm(15)
3145 .addImm(0)
3146 .addImm(13)
3147 .addImm(0)
3148 .addImm(3)
3150
3151 // Use the last tls slot on android and a private field of the TCP on linux.
3152 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3153 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3154
3155 // Get the stack limit from the right offset
3156 // ldr SR0, [sr0, #4 * TlsOffset]
3157 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3158 ScratchReg0)
3159 .addReg(ScratchReg0)
3160 .addImm(4 * TlsOffset)
3162 }
3163
3164 // Compare stack limit with stack size requested.
3165 // cmp SR0, SR1
3166 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3167 BuildMI(GetMBB, DL, TII.get(Opcode))
3168 .addReg(ScratchReg0)
3169 .addReg(ScratchReg1)
3171
3172 // This jump is taken if StackLimit < SP - stack required.
3173 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3174 BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
3176 .addReg(ARM::CPSR);
3177
3178
3179 // Calling __morestack(StackSize, Size of stack arguments).
3180 // __morestack knows that the stack size requested is in SR0(r4)
3181 // and amount size of stack arguments is in SR1(r5).
3182
3183 // Pass first argument for the __morestack by Scratch Register #0.
3184 // The amount size of stack required
3185 if (Thumb) {
3186 if (AlignedStackSize < 256) {
3187 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3188 .add(condCodeOp())
3189 .addImm(AlignedStackSize)
3191 } else {
3192 if (Thumb2) {
3193 BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0)
3194 .addImm(AlignedStackSize);
3195 } else {
3196 auto MBBI = AllocMBB->end();
3197 auto RegInfo = STI.getRegisterInfo();
3198 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3199 AlignedStackSize);
3200 }
3201 }
3202 } else {
3203 if (AlignedStackSize < 256) {
3204 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3205 .addImm(AlignedStackSize)
3207 .add(condCodeOp());
3208 } else {
3209 auto MBBI = AllocMBB->end();
3210 auto RegInfo = STI.getRegisterInfo();
3211 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3212 AlignedStackSize);
3213 }
3214 }
3215
3216 // Pass second argument for the __morestack by Scratch Register #1.
3217 // The amount size of stack consumed to save function arguments.
3218 if (Thumb) {
3219 if (ARMFI->getArgumentStackSize() < 256) {
3220 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3221 .add(condCodeOp())
3224 } else {
3225 if (Thumb2) {
3226 BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1)
3228 } else {
3229 auto MBBI = AllocMBB->end();
3230 auto RegInfo = STI.getRegisterInfo();
3231 RegInfo->emitLoadConstPool(
3232 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3234 }
3235 }
3236 } else {
3237 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3238 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3241 .add(condCodeOp());
3242 } else {
3243 auto MBBI = AllocMBB->end();
3244 auto RegInfo = STI.getRegisterInfo();
3245 RegInfo->emitLoadConstPool(
3246 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3248 }
3249 }
3250
3251 // push {lr} - Save return address of this function.
3252 if (Thumb) {
3253 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3255 .addReg(ARM::LR);
3256 } else {
3257 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3258 .addReg(ARM::SP, RegState::Define)
3259 .addReg(ARM::SP)
3261 .addReg(ARM::LR);
3262 }
3263
3264 // Emit the DWARF info about the change in stack as well as where to find the
3265 // previous link register
3266 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3267 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3268 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3269 .addCFIIndex(CFIIndex);
3271 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3272 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3273 .addCFIIndex(CFIIndex);
3274 }
3275
3276 // Call __morestack().
3277 if (Thumb) {
3278 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3280 .addExternalSymbol("__morestack");
3281 } else {
3282 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3283 .addExternalSymbol("__morestack");
3284 }
3285
3286 // pop {lr} - Restore return address of this original function.
3287 if (Thumb) {
3288 if (ST->isThumb1Only()) {
3289 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3291 .addReg(ScratchReg0);
3292 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3293 .addReg(ScratchReg0)
3295 } else {
3296 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3297 .addReg(ARM::LR, RegState::Define)
3298 .addReg(ARM::SP, RegState::Define)
3299 .addReg(ARM::SP)
3300 .addImm(4)
3302 }
3303 } else {
3304 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3305 .addReg(ARM::SP, RegState::Define)
3306 .addReg(ARM::SP)
3308 .addReg(ARM::LR);
3309 }
3310
3311 // Restore SR0 and SR1 in case of __morestack() was called.
3312 // __morestack() will skip PostStackMBB block so we need to restore
3313 // scratch registers from here.
3314 // pop {SR0, SR1}
3315 if (Thumb) {
3316 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3318 .addReg(ScratchReg0)
3319 .addReg(ScratchReg1);
3320 } else {
3321 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3322 .addReg(ARM::SP, RegState::Define)
3323 .addReg(ARM::SP)
3325 .addReg(ScratchReg0)
3326 .addReg(ScratchReg1);
3327 }
3328
3329 // Update the CFA offset now that we've popped
3330 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3331 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3332 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3333 .addCFIIndex(CFIIndex);
3334 }
3335
3336 // Return from this function.
3337 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3338
3339 // Restore SR0 and SR1 in case of __morestack() was not called.
3340 // pop {SR0, SR1}
3341 if (Thumb) {
3342 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3344 .addReg(ScratchReg0)
3345 .addReg(ScratchReg1);
3346 } else {
3347 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3348 .addReg(ARM::SP, RegState::Define)
3349 .addReg(ARM::SP)
3351 .addReg(ScratchReg0)
3352 .addReg(ScratchReg1);
3353 }
3354
3355 // Update the CFA offset now that we've popped
3356 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3357 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3358 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3359 .addCFIIndex(CFIIndex);
3360
3361 // Tell debuggers that r4 and r5 are now the same as they were in the
3362 // previous function, that they're the "Same Value".
3364 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3365 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3366 .addCFIIndex(CFIIndex);
3368 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3369 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3370 .addCFIIndex(CFIIndex);
3371 }
3372
3373 // Organizing MBB lists
3374 PostStackMBB->addSuccessor(&PrologueMBB);
3375
3376 AllocMBB->addSuccessor(PostStackMBB);
3377
3378 GetMBB->addSuccessor(PostStackMBB);
3379 GetMBB->addSuccessor(AllocMBB);
3380
3381 McrMBB->addSuccessor(GetMBB);
3382
3383 PrevStackMBB->addSuccessor(McrMBB);
3384
3385#ifdef EXPENSIVE_CHECKS
3386 MF.verify();
3387#endif
3388}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool requiresAAPCSFrameRecord(const MachineFunction &MF)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
@ Flags
Definition: TextStubV5.cpp:93
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
bool isTargetWindows() const
Definition: ARMSubtarget.h:369
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:274
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:442
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:372
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register)
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:616
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
Context object for machine code objects.
Definition: MCContext.h:76
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int getOffsetAdjustment() const
Return the correction for frame offsets.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:2011
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85