LLVM 23.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCInstrDesc.h"
144#include "llvm/Support/CodeGen.h"
147#include "llvm/Support/Debug.h"
152#include <algorithm>
153#include <cassert>
154#include <cstddef>
155#include <cstdint>
156#include <iterator>
157#include <utility>
158#include <vector>
159
160#define DEBUG_TYPE "arm-frame-lowering"
161
162using namespace llvm;
163
164static cl::opt<bool>
165SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
166 cl::desc("Align ARM NEON spills in prolog and epilog"));
167
170 unsigned NumAlignedDPRCS2Regs);
171
181
182/// Get the spill area that Reg should be saved into in the prologue.
185 unsigned NumAlignedDPRCS2Regs,
187 // NoSplit:
188 // push {r0-r12, lr} GPRCS1
189 // vpush {r8-d15} DPRCS1
190 //
191 // SplitR7:
192 // push {r0-r7, lr} GPRCS1
193 // push {r8-r12} GPRCS2
194 // vpush {r8-d15} DPRCS1
195 //
196 // SplitR11WindowsSEH:
197 // push {r0-r10, r12} GPRCS1
198 // vpush {r8-d15} DPRCS1
199 // push {r11, lr} GPRCS3
200 //
201 // SplitR11AAPCSSignRA:
202 // push {r0-r10, r12} GPRSC1
203 // push {r11, lr} GPRCS2
204 // vpush {r8-d15} DPRCS1
205
206 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
207 // the top of the stack frame.
208 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
209 // of SP. If used, it will be below the other save areas, after the stack has
210 // been re-aligned.
211
212 switch (Reg) {
213 default:
214 dbgs() << "Don't know where to spill " << printReg(Reg, RegInfo) << "\n";
215 llvm_unreachable("Don't know where to spill this register");
216 break;
217
218 case ARM::FPCXTNS:
219 return SpillArea::FPCXT;
220
221 case ARM::FPSCR:
222 case ARM::FPEXC:
223 return SpillArea::FPStatus;
224
225 case ARM::R0:
226 case ARM::R1:
227 case ARM::R2:
228 case ARM::R3:
229 case ARM::R4:
230 case ARM::R5:
231 case ARM::R6:
232 case ARM::R7:
233 return SpillArea::GPRCS1;
234
235 case ARM::R8:
236 case ARM::R9:
237 case ARM::R10:
238 if (Variation == ARMSubtarget::SplitR7)
239 return SpillArea::GPRCS2;
240 else
241 return SpillArea::GPRCS1;
242
243 case ARM::R11:
244 if (Variation == ARMSubtarget::SplitR7 ||
246 return SpillArea::GPRCS2;
247 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
248 return SpillArea::GPRCS3;
249
250 return SpillArea::GPRCS1;
251
252 case ARM::R12:
253 if (Variation == ARMSubtarget::SplitR7)
254 return SpillArea::GPRCS2;
255 else
256 return SpillArea::GPRCS1;
257
258 case ARM::LR:
259 if (Variation == ARMSubtarget::SplitR11AAPCSSignRA)
260 return SpillArea::GPRCS2;
261 if (Variation == ARMSubtarget::SplitR11WindowsSEH)
262 return SpillArea::GPRCS3;
263
264 return SpillArea::GPRCS1;
265
266 case ARM::D0:
267 case ARM::D1:
268 case ARM::D2:
269 case ARM::D3:
270 case ARM::D4:
271 case ARM::D5:
272 case ARM::D6:
273 case ARM::D7:
274 return SpillArea::DPRCS1;
275
276 case ARM::D8:
277 case ARM::D9:
278 case ARM::D10:
279 case ARM::D11:
280 case ARM::D12:
281 case ARM::D13:
282 case ARM::D14:
283 case ARM::D15:
284 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
285 return SpillArea::DPRCS2;
286 else
287 return SpillArea::DPRCS1;
288
289 case ARM::D16:
290 case ARM::D17:
291 case ARM::D18:
292 case ARM::D19:
293 case ARM::D20:
294 case ARM::D21:
295 case ARM::D22:
296 case ARM::D23:
297 case ARM::D24:
298 case ARM::D25:
299 case ARM::D26:
300 case ARM::D27:
301 case ARM::D28:
302 case ARM::D29:
303 case ARM::D30:
304 case ARM::D31:
305 return SpillArea::DPRCS1;
306 }
307}
308
312
314 // iOS always has a FP for backtracking, force other targets to keep their FP
315 // when doing FastISel. The emitted code is currently superior, and in cases
316 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
317 return MF.getSubtarget<ARMSubtarget>().useFastISel();
318}
319
320/// Returns true if the target can safely skip saving callee-saved registers
321/// for noreturn nounwind functions.
323 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
324 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
325 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
326
327 // Frame pointer and link register are not treated as normal CSR, thus we
328 // can always skip CSR saves for nonreturning functions.
329 return true;
330}
331
332/// hasFPImpl - Return true if the specified function should have a dedicated
333/// frame pointer register. This is true if the function has variable sized
334/// allocas or if frame pointer elimination is disabled.
336 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
337 const MachineFrameInfo &MFI = MF.getFrameInfo();
338
339 // Check to see if the target want to forcibly keep frame pointer.
340 if (keepFramePointer(MF))
341 return true;
342
343 // ABI-required frame pointer.
345 return true;
346
347 // Frame pointer required for use within this function.
348 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
349 MFI.isFrameAddressTaken());
350}
351
352/// isFPReserved - Return true if the frame pointer register should be
353/// considered a reserved register on the scope of the specified function.
355 return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);
356}
357
358/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
359/// not required, we reserve argument space for call sites in the function
360/// immediately on entry to the current function. This eliminates the need for
361/// add/sub sp brackets around call sites. Returns true if the call frame is
362/// included as part of the stack frame.
364 const MachineFrameInfo &MFI = MF.getFrameInfo();
365 unsigned CFSize = MFI.getMaxCallFrameSize();
366 // It's not always a good idea to include the call frame as part of the
367 // stack frame. ARM (especially Thumb) has small immediate offset to
368 // address the stack frame. So a large call frame can cause poor codegen
369 // and may even makes it impossible to scavenge a register.
370 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
371 return false;
372
373 return !MFI.hasVarSizedObjects();
374}
375
376/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
377/// call frame pseudos can be simplified. Unlike most targets, having a FP
378/// is not sufficient here since we still may reference some objects via SP
379/// even when FP is available in Thumb2 mode.
380bool
384
385// Returns how much of the incoming argument stack area we should clean up in an
386// epilogue. For the C calling convention this will be 0, for guaranteed tail
387// call conventions it can be positive (a normal return or a tail call to a
388// function that uses less stack space for arguments) or negative (for a tail
389// call to a function that needs more stack space than us for arguments).
392 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
393 bool IsTailCallReturn = false;
394 if (MBB.end() != MBBI) {
395 unsigned RetOpcode = MBBI->getOpcode();
396 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
397 RetOpcode == ARM::TCRETURNri ||
398 RetOpcode == ARM::TCRETURNrinotr12;
399 }
401
402 int ArgumentPopSize = 0;
403 if (IsTailCallReturn) {
404 MachineOperand &StackAdjust = MBBI->getOperand(1);
405
406 // For a tail-call in a callee-pops-arguments environment, some or all of
407 // the stack may actually be in use for the call's arguments, this is
408 // calculated during LowerCall and consumed here...
409 ArgumentPopSize = StackAdjust.getImm();
410 } else {
411 // ... otherwise the amount to pop is *all* of the argument space,
412 // conveniently stored in the MachineFunctionInfo by
413 // LowerFormalArguments. This will, of course, be zero for the C calling
414 // convention.
415 ArgumentPopSize = AFI->getArgumentStackToRestore();
416 }
417
418 return ArgumentPopSize;
419}
420
421static bool needsWinCFI(const MachineFunction &MF) {
422 const Function &F = MF.getFunction();
423 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
424 F.needsUnwindTableEntry();
425}
426
427// Given a load or a store instruction, generate an appropriate unwinding SEH
428// code on Windows.
430 const TargetInstrInfo &TII,
431 unsigned Flags) {
432 unsigned Opc = MBBI->getOpcode();
433 MachineBasicBlock *MBB = MBBI->getParent();
434 MachineFunction &MF = *MBB->getParent();
435 DebugLoc DL = MBBI->getDebugLoc();
437 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
438 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439
440 Flags |= MachineInstr::NoMerge;
441
442 switch (Opc) {
443 default:
444 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
445 break;
446 case ARM::t2ADDri: // add.w r11, sp, #xx
447 case ARM::t2ADDri12: // add.w r11, sp, #xx
448 case ARM::t2MOVTi16: // movt r4, #xx
449 case ARM::tBL: // bl __chkstk
450 // These are harmless if used for just setting up a frame pointer,
451 // but that frame pointer can't be relied upon for unwinding, unless
452 // set up with SEH_SaveSP.
453 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
454 .addImm(/*Wide=*/1)
455 .setMIFlags(Flags);
456 break;
457
458 case ARM::t2MOVi16: { // mov(w) r4, #xx
459 bool Wide = MBBI->getOperand(1).getImm() >= 256;
460 if (!Wide) {
461 MachineInstrBuilder NewInstr =
462 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
463 NewInstr.add(MBBI->getOperand(0));
464 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
465 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
466 NewInstr.add(MO);
467 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
468 MBB->erase(MBBI);
469 MBBI = NewMBBI;
470 }
471 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
472 break;
473 }
474
475 case ARM::tBLXr: // blx r12 (__chkstk)
476 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
477 .addImm(/*Wide=*/0)
478 .setMIFlags(Flags);
479 break;
480
481 case ARM::t2MOVi32imm: // movw+movt
482 // This pseudo instruction expands into two mov instructions. If the
483 // second operand is a symbol reference, this will stay as two wide
484 // instructions, movw+movt. If they're immediates, the first one can
485 // end up as a narrow mov though.
486 // As two SEH instructions are appended here, they won't get interleaved
487 // between the two final movw/movt instructions, but it doesn't make any
488 // practical difference.
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
490 .addImm(/*Wide=*/1)
491 .setMIFlags(Flags);
492 MBB->insertAfter(MBBI, MIB);
493 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
494 .addImm(/*Wide=*/1)
495 .setMIFlags(Flags);
496 break;
497
498 case ARM::t2STR_PRE:
499 if (MBBI->getOperand(0).getReg() == ARM::SP &&
500 MBBI->getOperand(2).getReg() == ARM::SP &&
501 MBBI->getOperand(3).getImm() == -4) {
502 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
503 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
504 .addImm(1ULL << Reg)
505 .addImm(/*Wide=*/1)
506 .setMIFlags(Flags);
507 } else {
508 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
509 }
510 break;
511
512 case ARM::t2LDR_POST:
513 if (MBBI->getOperand(1).getReg() == ARM::SP &&
514 MBBI->getOperand(2).getReg() == ARM::SP &&
515 MBBI->getOperand(3).getImm() == 4) {
516 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
517 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
518 .addImm(1ULL << Reg)
519 .addImm(/*Wide=*/1)
520 .setMIFlags(Flags);
521 } else {
522 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
523 }
524 break;
525
526 case ARM::t2LDMIA_RET:
527 case ARM::t2LDMIA_UPD:
528 case ARM::t2STMDB_UPD: {
529 unsigned Mask = 0;
530 bool Wide = false;
531 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
532 const MachineOperand &MO = MBBI->getOperand(i);
533 if (!MO.isReg() || MO.isImplicit())
534 continue;
535 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
536 if (Reg == 15)
537 Reg = 14;
538 if (Reg >= 8 && Reg <= 13)
539 Wide = true;
540 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
541 Wide = true;
542 Mask |= 1 << Reg;
543 }
544 if (!Wide) {
545 unsigned NewOpc;
546 switch (Opc) {
547 case ARM::t2LDMIA_RET:
548 NewOpc = ARM::tPOP_RET;
549 break;
550 case ARM::t2LDMIA_UPD:
551 NewOpc = ARM::tPOP;
552 break;
553 case ARM::t2STMDB_UPD:
554 NewOpc = ARM::tPUSH;
555 break;
556 default:
558 }
559 MachineInstrBuilder NewInstr =
560 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
561 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
562 NewInstr.add(MBBI->getOperand(i));
563 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
564 MBB->erase(MBBI);
565 MBBI = NewMBBI;
566 }
567 unsigned SEHOpc =
568 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
569 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
570 .addImm(Mask)
571 .addImm(Wide ? 1 : 0)
572 .setMIFlags(Flags);
573 break;
574 }
575 case ARM::VSTMDDB_UPD:
576 case ARM::VLDMDIA_UPD: {
577 int First = -1, Last = 0;
578 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
579 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
580 if (First == -1)
581 First = Reg;
582 Last = Reg;
583 }
584 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
585 .addImm(First)
586 .addImm(Last)
587 .setMIFlags(Flags);
588 break;
589 }
590 case ARM::tSUBspi:
591 case ARM::tADDspi:
592 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
593 .addImm(MBBI->getOperand(2).getImm() * 4)
594 .addImm(/*Wide=*/0)
595 .setMIFlags(Flags);
596 break;
597 case ARM::t2SUBspImm:
598 case ARM::t2SUBspImm12:
599 case ARM::t2ADDspImm:
600 case ARM::t2ADDspImm12:
601 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
602 .addImm(MBBI->getOperand(2).getImm())
603 .addImm(/*Wide=*/1)
604 .setMIFlags(Flags);
605 break;
606
607 case ARM::tMOVr:
608 if (MBBI->getOperand(1).getReg() == ARM::SP &&
609 (Flags & MachineInstr::FrameSetup)) {
610 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
611 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
612 .addImm(Reg)
613 .setMIFlags(Flags);
614 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
615 (Flags & MachineInstr::FrameDestroy)) {
616 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
617 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
618 .addImm(Reg)
619 .setMIFlags(Flags);
620 } else {
621 report_fatal_error("No SEH Opcode for MOV");
622 }
623 break;
624
625 case ARM::tBX_RET:
626 case ARM::TCRETURNri:
627 case ARM::TCRETURNrinotr12:
628 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
629 .addImm(/*Wide=*/0)
630 .setMIFlags(Flags);
631 break;
632
633 case ARM::TCRETURNdi:
634 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
635 .addImm(/*Wide=*/1)
636 .setMIFlags(Flags);
637 break;
638 }
639 return MBB->insertAfter(MBBI, MIB);
640}
641
644 if (MBBI == MBB.begin())
646 return std::prev(MBBI);
647}
648
652 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
653 if (Start.isValid())
654 Start = std::next(Start);
655 else
656 Start = MBB.begin();
657
658 for (auto MI = Start; MI != End;) {
659 auto Next = std::next(MI);
660 // Check if this instruction already has got a SEH opcode added. In that
661 // case, don't do this generic mapping.
662 if (Next != End && isSEHInstruction(*Next)) {
663 MI = std::next(Next);
664 while (MI != End && isSEHInstruction(*MI))
665 ++MI;
666 continue;
667 }
668 insertSEH(MI, TII, MIFlags);
669 MI = Next;
670 }
671}
672
675 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
676 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
677 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
678 if (isARM)
679 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
680 Pred, PredReg, TII, MIFlags);
681 else
682 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
683 Pred, PredReg, TII, MIFlags);
684}
685
686static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
688 const ARMBaseInstrInfo &TII, int NumBytes,
689 unsigned MIFlags = MachineInstr::NoFlags,
691 unsigned PredReg = 0) {
692 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
693 MIFlags, Pred, PredReg);
694}
695
697 int RegSize;
698 switch (MI.getOpcode()) {
699 case ARM::VSTMDDB_UPD:
700 RegSize = 8;
701 break;
702 case ARM::STMDB_UPD:
703 case ARM::t2STMDB_UPD:
704 RegSize = 4;
705 break;
706 case ARM::t2STR_PRE:
707 case ARM::STR_PRE_IMM:
708 return 4;
709 default:
710 llvm_unreachable("Unknown push or pop like instruction");
711 }
712
713 int count = 0;
714 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
715 // pred) so the list starts at 4.
716 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
717 count += RegSize;
718 return count;
719}
720
722 size_t StackSizeInBytes) {
723 const MachineFrameInfo &MFI = MF.getFrameInfo();
724 const Function &F = MF.getFunction();
725 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
726
727 StackProbeSize =
728 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
729 return (StackSizeInBytes >= StackProbeSize) &&
730 !F.hasFnAttribute("no-stack-arg-probe");
731}
732
733namespace {
734
735struct StackAdjustingInsts {
736 struct InstInfo {
738 unsigned SPAdjust;
739 bool BeforeFPSet;
740
741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742 void dump() {
743 dbgs() << " " << (BeforeFPSet ? "before-fp " : " ")
744 << "sp-adjust=" << SPAdjust;
745 I->dump();
746 }
747#endif
748 };
749
751
752 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
753 bool BeforeFPSet = false) {
754 InstInfo Info = {I, SPAdjust, BeforeFPSet};
755 Insts.push_back(Info);
756 }
757
758 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
759 auto Info =
760 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
761 assert(Info != Insts.end() && "invalid sp adjusting instruction");
762 Info->SPAdjust += ExtraBytes;
763 }
764
765 void emitDefCFAOffsets(MachineBasicBlock &MBB, bool HasFP) {
766 CFIInstBuilder CFIBuilder(MBB, MBB.end(), MachineInstr::FrameSetup);
767 unsigned CFAOffset = 0;
768 for (auto &Info : Insts) {
769 if (HasFP && !Info.BeforeFPSet)
770 return;
771
772 CFAOffset += Info.SPAdjust;
773 CFIBuilder.setInsertPoint(std::next(Info.I));
774 CFIBuilder.buildDefCFAOffset(CFAOffset);
775 }
776 }
777
778#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
779 void dump() {
780 dbgs() << "StackAdjustingInsts:\n";
781 for (auto &Info : Insts)
782 Info.dump();
783 }
784#endif
785};
786
787} // end anonymous namespace
788
789/// Emit an instruction sequence that will align the address in
790/// register Reg by zero-ing out the lower bits. For versions of the
791/// architecture that support Neon, this must be done in a single
792/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
793/// single instruction. That function only gets called when optimizing
794/// spilling of D registers on a core with the Neon instruction set
795/// present.
797 const TargetInstrInfo &TII,
800 const DebugLoc &DL, const unsigned Reg,
801 const Align Alignment,
802 const bool MustBeSingleInstruction) {
803 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
804 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
805 const unsigned AlignMask = Alignment.value() - 1U;
806 const unsigned NrBitsToZero = Log2(Alignment);
807 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
808 if (!AFI->isThumbFunction()) {
809 // if the BFC instruction is available, use that to zero the lower
810 // bits:
811 // bfc Reg, #0, log2(Alignment)
812 // otherwise use BIC, if the mask to zero the required number of bits
813 // can be encoded in the bic immediate field
814 // bic Reg, Reg, Alignment-1
815 // otherwise, emit
816 // lsr Reg, Reg, log2(Alignment)
817 // lsl Reg, Reg, log2(Alignment)
818 if (CanUseBFC) {
819 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
821 .addImm(~AlignMask)
823 } else if (AlignMask <= 255) {
824 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
826 .addImm(AlignMask)
828 .add(condCodeOp());
829 } else {
830 assert(!MustBeSingleInstruction &&
831 "Shouldn't call emitAligningInstructions demanding a single "
832 "instruction to be emitted for large stack alignment for a target "
833 "without BFC.");
834 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
836 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
838 .add(condCodeOp());
839 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
841 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
843 .add(condCodeOp());
844 }
845 } else {
846 // Since this is only reached for Thumb-2 targets, the BFC instruction
847 // should always be available.
848 assert(CanUseBFC);
849 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
851 .addImm(~AlignMask)
853 }
854}
855
856/// We need the offset of the frame pointer relative to other MachineFrameInfo
857/// offsets which are encoded relative to SP at function begin.
858/// See also emitPrologue() for how the FP is set up.
859/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
860/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
861/// this to produce a conservative estimate that we check in an assert() later.
862static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863 const MachineFunction &MF) {
866 // For Thumb1, push.w isn't available, so the first push will always push
867 // r7 and lr onto the stack first.
868 if (AFI.isThumb1OnlyFunction())
869 return -AFI.getArgRegsSaveSize() - (2 * 4);
870 // This is a conservative estimation: Assume the frame pointer being r7 and
871 // pc("r15") up to r8 getting spilled before (= 8 registers).
872 int MaxRegBytes = 8 * 4;
873 if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
874 // Here, r11 can be stored below all of r4-r15.
875 MaxRegBytes = 11 * 4;
876 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
877 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
878 MaxRegBytes = 11 * 4 + 8 * 8;
879 }
880 int FPCXTSaveSize =
881 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
882 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
883}
884
886 MachineBasicBlock &MBB) const {
888 MachineFrameInfo &MFI = MF.getFrameInfo();
890 const TargetMachine &TM = MF.getTarget();
891 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
892 const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
894 "This emitPrologue does not support Thumb1!");
895 bool isARM = !AFI->isThumbFunction();
896 Align Alignment = STI.getFrameLowering()->getStackAlign();
897 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
898 unsigned NumBytes = MFI.getStackSize();
899 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
900 int FPCXTSaveSize = 0;
901 bool NeedsWinCFI = needsWinCFI(MF);
903 STI.getPushPopSplitVariation(MF);
904
905 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF.getName() << "\n");
906
907 // Debug location must be unknown since the first debug location is used
908 // to determine the end of the prologue.
909 DebugLoc dl;
910
911 Register FramePtr = RegInfo->getFrameRegister(MF);
912
913 // Determine the sizes of each callee-save spill areas and record which frame
914 // belongs to which callee-save spill areas.
915 unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
916 DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
917 int FramePtrSpillFI = 0;
918 int D8SpillFI = 0;
919
920 // All calls are tail calls in GHC calling conv, and functions have no
921 // prologue/epilogue.
923 return;
924
925 StackAdjustingInsts DefCFAOffsetCandidates;
926 bool HasFP = hasFP(MF);
927
928 if (!AFI->hasStackFrame() &&
929 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
930 if (NumBytes != 0) {
931 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
933 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
934 }
935 if (!NeedsWinCFI)
936 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
937 if (NeedsWinCFI && MBBI != MBB.begin()) {
939 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
941 MF.setHasWinCFI(true);
942 }
943 return;
944 }
945
946 // Determine spill area sizes, and some important frame indices.
947 SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948 bool BeforeFPPush = true;
949 for (const CalleeSavedInfo &I : CSI) {
950 MCRegister Reg = I.getReg();
951 int FI = I.getFrameIdx();
952
953 SpillArea Area = getSpillArea(Reg, PushPopSplit,
954 AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955
956 if (Reg == FramePtr.asMCReg()) {
957 FramePtrSpillFI = FI;
958 FramePtrSpillArea = Area;
959 }
960 if (Reg == ARM::D8)
961 D8SpillFI = FI;
962
963 switch (Area) {
964 case SpillArea::FPCXT:
965 FPCXTSaveSize += 4;
966 break;
968 GPRCS1Size += 4;
969 break;
971 GPRCS2Size += 4;
972 break;
974 FPStatusSize += 4;
975 break;
977 DPRCS1Size += 8;
978 break;
980 GPRCS3Size += 4;
981 break;
983 DPRCS2Size += 8;
984 break;
985 }
986 }
987
988 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push,
989 DPRCS1Push, GPRCS3Push;
990
991 // Move past the PAC computation.
992 if (AFI->shouldSignReturnAddress())
993 LastPush = MBBI++;
994
995 // Move past FPCXT area.
996 if (FPCXTSaveSize > 0) {
997 LastPush = MBBI++;
998 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
999 }
1000
1001 // Allocate the vararg register save area.
1002 if (ArgRegsSaveSize) {
1003 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
1005 LastPush = std::prev(MBBI);
1006 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
1007 }
1008
1009 // Move past area 1.
1010 if (GPRCS1Size > 0) {
1011 GPRCS1Push = LastPush = MBBI++;
1012 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1013 if (FramePtrSpillArea == SpillArea::GPRCS1)
1014 BeforeFPPush = false;
1015 }
1016
1017 // Determine starting offsets of spill areas. These offsets are all positive
1018 // offsets from the bottom of the lowest-addressed callee-save area
1019 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1020 // of the spill area in question.
1021 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
1022 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
1023 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
1024 unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
1025
1026 Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
1027 unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1028 GPRCS2Size + FPStatusSize) %
1029 DPRAlign.value();
1030
1031 unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
1032
1033 if (HasFP) {
1034 // Offset from the CFA to the saved frame pointer, will be negative.
1035 [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
1036 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1037 << ", FPOffset: " << FPOffset << "\n");
1038 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
1039 "Max FP estimation is wrong");
1040 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
1041 NumBytes);
1042 }
1043 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
1044 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
1045 AFI->setDPRCalleeSavedArea1Offset(DPRCS1Offset);
1046
1047 // Move past area 2.
1048 if (GPRCS2Size > 0) {
1050 GPRCS2Push = LastPush = MBBI++;
1051 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1052 if (FramePtrSpillArea == SpillArea::GPRCS2)
1053 BeforeFPPush = false;
1054 }
1055
1056 // Move past FP status save area.
1057 if (FPStatusSize > 0) {
1058 while (MBBI != MBB.end()) {
1059 unsigned Opc = MBBI->getOpcode();
1060 if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
1061 MBBI++;
1062 else
1063 break;
1064 }
1065 LastPush = MBBI++;
1066 DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize);
1067 }
1068
1069 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1070 // .cfi_offset operations will reflect that.
1071 if (DPRGapSize) {
1072 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
1073 if (LastPush != MBB.end() &&
1074 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
1075 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
1076 else {
1077 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1079 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
1080 }
1081 }
1082
1083 // Move past DPRCS1Size.
1084 if (DPRCS1Size > 0) {
1085 // Since vpush register list cannot have gaps, there may be multiple vpush
1086 // instructions in the prologue.
1087 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1088 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1089 BeforeFPPush);
1090 DPRCS1Push = LastPush = MBBI++;
1091 }
1092 }
1093
1094 // Move past the aligned DPRCS2 area.
1095 if (DPRCS2Size > 0) {
1097 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1098 // leaves the stack pointer pointing to the DPRCS2 area.
1099 //
1100 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1101 NumBytes += MFI.getObjectOffset(D8SpillFI);
1102 } else
1103 NumBytes = DPRCS1Offset;
1104
1105 // Move GPRCS3, if using using SplitR11WindowsSEH.
1106 if (GPRCS3Size > 0) {
1108 GPRCS3Push = LastPush = MBBI++;
1109 DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size, BeforeFPPush);
1110 if (FramePtrSpillArea == SpillArea::GPRCS3)
1111 BeforeFPPush = false;
1112 }
1113
1114 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
1115 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH && HasFP)
1116 NeedsWinCFIStackAlloc = false;
1117
1118 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
1119 uint32_t NumWords = NumBytes >> 2;
1120
1121 if (NumWords < 65536) {
1122 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1123 .addImm(NumWords)
1126 } else {
1127 // Split into two instructions here, instead of using t2MOVi32imm,
1128 // to allow inserting accurate SEH instructions (including accurate
1129 // instruction size for each of them).
1130 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
1131 .addImm(NumWords & 0xffff)
1134 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
1135 .addReg(ARM::R4)
1136 .addImm(NumWords >> 16)
1139 }
1140
1141 const ARMTargetLowering *TLI = STI.getTargetLowering();
1142 RTLIB::LibcallImpl ChkStkLibcall = TLI->getLibcallImpl(RTLIB::STACK_PROBE);
1143 if (ChkStkLibcall == RTLIB::Unsupported)
1144 reportFatalUsageError("no available implementation of __chkstk");
1145 const char *ChkStk = TLI->getLibcallImplName(ChkStkLibcall).data();
1146
1147 switch (TM.getCodeModel()) {
1148 case CodeModel::Tiny:
1149 llvm_unreachable("Tiny code model not available on ARM.");
1150 case CodeModel::Small:
1151 case CodeModel::Medium:
1152 case CodeModel::Kernel:
1153 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1155 .addExternalSymbol(ChkStk)
1156 .addReg(ARM::R4, RegState::Implicit)
1158 break;
1159 case CodeModel::Large:
1160 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1161 .addExternalSymbol(ChkStk)
1163
1164 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1166 .addReg(ARM::R12, RegState::Kill)
1167 .addReg(ARM::R4, RegState::Implicit)
1169 break;
1170 }
1171
1172 MachineInstrBuilder Instr, SEH;
1173 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1174 .addReg(ARM::SP, RegState::Kill)
1175 .addReg(ARM::R4, RegState::Kill)
1178 .add(condCodeOp());
1179 if (NeedsWinCFIStackAlloc) {
1180 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1181 .addImm(NumBytes)
1182 .addImm(/*Wide=*/1)
1184 MBB.insertAfter(Instr, SEH);
1185 }
1186 NumBytes = 0;
1187 }
1188
1189 if (NumBytes) {
1190 // Adjust SP after all the callee-save spills.
1191 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1192 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1193 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1194 else {
1195 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1197 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1198 }
1199
1200 if (HasFP && isARM)
1201 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1202 // Note it's not safe to do this in Thumb2 mode because it would have
1203 // taken two instructions:
1204 // mov sp, r7
1205 // sub sp, #24
1206 // If an interrupt is taken between the two instructions, then sp is in
1207 // an inconsistent state (pointing to the middle of callee-saved area).
1208 // The interrupt handler can end up clobbering the registers.
1209 AFI->setShouldRestoreSPFromFP(true);
1210 }
1211
1212 // Set FP to point to the stack slot that contains the previous FP.
1213 // For iOS, FP is R7, which has now been stored in spill area 1.
1214 // Otherwise, if this is not iOS, all the callee-saved registers go
1215 // into spill area 1, including the FP in R11. In either case, it
1216 // is in area one and the adjustment needs to take place just after
1217 // that push.
1219 if (HasFP) {
1220 MachineBasicBlock::iterator FPPushInst;
1221 // Offset from SP immediately after the push which saved the FP to the FP
1222 // save slot.
1223 int64_t FPOffsetAfterPush;
1224 switch (FramePtrSpillArea) {
1225 case SpillArea::GPRCS1:
1226 FPPushInst = GPRCS1Push;
1227 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1228 ArgRegsSaveSize + FPCXTSaveSize +
1229 sizeOfSPAdjustment(*FPPushInst);
1230 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1231 << FPOffsetAfterPush << " after that push\n");
1232 break;
1233 case SpillArea::GPRCS2:
1234 FPPushInst = GPRCS2Push;
1235 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1236 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1237 sizeOfSPAdjustment(*FPPushInst);
1238 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1239 << FPOffsetAfterPush << " after that push\n");
1240 break;
1241 case SpillArea::GPRCS3:
1242 FPPushInst = GPRCS3Push;
1243 FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1244 ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1245 FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
1246 sizeOfSPAdjustment(*FPPushInst);
1247 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1248 << FPOffsetAfterPush << " after that push\n");
1249 break;
1250 default:
1251 llvm_unreachable("frame pointer in unknown spill area");
1252 break;
1253 }
1254 AfterPush = std::next(FPPushInst);
1255 if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1256 assert(FPOffsetAfterPush == 0);
1257
1258 // Emit the MOV or ADD to set up the frame pointer register.
1259 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1260 FramePtr, ARM::SP, FPOffsetAfterPush,
1262
1263 if (!NeedsWinCFI) {
1264 // Emit DWARF info to find the CFA using the frame pointer from this
1265 // point onward.
1266 CFIInstBuilder CFIBuilder(MBB, AfterPush, MachineInstr::FrameSetup);
1267 if (FPOffsetAfterPush != 0)
1268 CFIBuilder.buildDefCFA(FramePtr, -MFI.getObjectOffset(FramePtrSpillFI));
1269 else
1270 CFIBuilder.buildDefCFARegister(FramePtr);
1271 }
1272 }
1273
1274 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1275 // instructions below don't need to be replayed to unwind the stack.
1276 if (NeedsWinCFI && MBBI != MBB.begin()) {
1278 if (HasFP && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1279 End = AfterPush;
1281 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1283 MF.setHasWinCFI(true);
1284 }
1285
1286 // Now that the prologue's actual instructions are finalised, we can insert
1287 // the necessary DWARF cf instructions to describe the situation. Start by
1288 // recording where each register ended up:
1289 if (!NeedsWinCFI) {
1290 for (const auto &Entry : reverse(CSI)) {
1291 MCRegister Reg = Entry.getReg();
1292 int FI = Entry.getFrameIdx();
1294 switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
1295 RegInfo)) {
1296 case SpillArea::GPRCS1:
1297 CFIPos = std::next(GPRCS1Push);
1298 break;
1299 case SpillArea::GPRCS2:
1300 CFIPos = std::next(GPRCS2Push);
1301 break;
1302 case SpillArea::DPRCS1:
1303 CFIPos = std::next(DPRCS1Push);
1304 break;
1305 case SpillArea::GPRCS3:
1306 CFIPos = std::next(GPRCS3Push);
1307 break;
1309 case SpillArea::FPCXT:
1310 case SpillArea::DPRCS2:
1311 // FPCXT and DPRCS2 are not represented in the DWARF info.
1312 break;
1313 }
1314
1315 if (CFIPos.isValid()) {
1317 .buildOffset(Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg,
1318 MFI.getObjectOffset(FI));
1319 }
1320 }
1321 }
1322
1323 // Now we can emit descriptions of where the canonical frame address was
1324 // throughout the process. If we have a frame pointer, it takes over the job
1325 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1326 // actually get emitted.
1327 if (!NeedsWinCFI) {
1328 LLVM_DEBUG(DefCFAOffsetCandidates.dump());
1329 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, HasFP);
1330 }
1331
1332 if (STI.isTargetELF() && hasFP(MF))
1334 AFI->getFramePtrSpillOffset());
1335
1336 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1337 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1338 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1339 AFI->setFPStatusSavesSize(FPStatusSize);
1340 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1341 AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
1342 AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
1343
1344 // If we need dynamic stack realignment, do it here. Be paranoid and make
1345 // sure if we also have VLAs, we have a base pointer for frame access.
1346 // If aligned NEON registers were spilled, the stack has already been
1347 // realigned.
1348 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1349 Align MaxAlign = MFI.getMaxAlign();
1351 if (!AFI->isThumbFunction()) {
1352 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1353 false);
1354 } else {
1355 // We cannot use sp as source/dest register here, thus we're using r4 to
1356 // perform the calculations. We're emitting the following sequence:
1357 // mov r4, sp
1358 // -- use emitAligningInstructions to produce best sequence to zero
1359 // -- out lower bits in r4
1360 // mov sp, r4
1361 // FIXME: It will be better just to find spare register here.
1362 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1363 .addReg(ARM::SP, RegState::Kill)
1365 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1366 false);
1367 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1368 .addReg(ARM::R4, RegState::Kill)
1370 }
1371
1372 AFI->setShouldRestoreSPFromFP(true);
1373 }
1374
1375 // If we need a base pointer, set it up here. It's whatever the value
1376 // of the stack pointer is at this point. Any variable size objects
1377 // will be allocated after this, so we can still use the base pointer
1378 // to reference locals.
1379 // FIXME: Clarify FrameSetup flags here.
1380 if (RegInfo->hasBasePointer(MF)) {
1381 if (isARM)
1382 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1383 .addReg(ARM::SP)
1385 .add(condCodeOp());
1386 else
1387 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1388 .addReg(ARM::SP)
1390 }
1391
1392 // If the frame has variable sized objects then the epilogue must restore
1393 // the sp from fp. We can assume there's an FP here since hasFP already
1394 // checks for hasVarSizedObjects.
1395 if (MFI.hasVarSizedObjects())
1396 AFI->setShouldRestoreSPFromFP(true);
1397}
1398
1400 MachineBasicBlock &MBB) const {
1401 MachineFrameInfo &MFI = MF.getFrameInfo();
1403 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1404 const ARMBaseInstrInfo &TII =
1405 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1406 assert(!AFI->isThumb1OnlyFunction() &&
1407 "This emitEpilogue does not support Thumb1!");
1408 bool isARM = !AFI->isThumbFunction();
1410 STI.getPushPopSplitVariation(MF);
1411
1412 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF.getName() << "\n");
1413
1414 // Amount of stack space we reserved next to incoming args for either
1415 // varargs registers or stack arguments in tail calls made by this function.
1416 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1417
1418 // How much of the stack used by incoming arguments this function is expected
1419 // to restore in this particular epilogue.
1420 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1421 int NumBytes = (int)MFI.getStackSize();
1422 Register FramePtr = RegInfo->getFrameRegister(MF);
1423
1424 // All calls are tail calls in GHC calling conv, and functions have no
1425 // prologue/epilogue.
1427 return;
1428
1429 // First put ourselves on the first (from top) terminator instructions.
1430 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1431 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1432
1433 MachineBasicBlock::iterator RangeStart;
1434 if (!AFI->hasStackFrame()) {
1435 if (MF.hasWinCFI()) {
1436 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1438 RangeStart = initMBBRange(MBB, MBBI);
1439 }
1440
1441 if (NumBytes + IncomingArgStackToRestore != 0)
1442 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1443 NumBytes + IncomingArgStackToRestore,
1445 } else {
1446 // Unwind MBBI to point to first LDR / VLDRD.
1447 if (MBBI != MBB.begin()) {
1448 do {
1449 --MBBI;
1450 } while (MBBI != MBB.begin() &&
1452 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1453 ++MBBI;
1454 }
1455
1456 if (MF.hasWinCFI()) {
1457 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1459 RangeStart = initMBBRange(MBB, MBBI);
1460 }
1461
1462 // Move SP to start of FP callee save spill area.
1463 NumBytes -=
1464 (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
1468
1469 // Reset SP based on frame pointer only if the stack frame extends beyond
1470 // frame pointer stack slot or target is ELF and the function has FP.
1471 if (AFI->shouldRestoreSPFromFP()) {
1472 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1473 if (NumBytes) {
1474 if (isARM)
1475 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1476 ARMCC::AL, 0, TII,
1478 else {
1479 // It's not possible to restore SP from FP in a single instruction.
1480 // For iOS, this looks like:
1481 // mov sp, r7
1482 // sub sp, #24
1483 // This is bad, if an interrupt is taken after the mov, sp is in an
1484 // inconsistent state.
1485 // Use the first callee-saved register as a scratch register.
1486 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1487 "No scratch register to restore SP from FP!");
1488 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1490 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1491 .addReg(ARM::R4)
1494 }
1495 } else {
1496 // Thumb2 or ARM.
1497 if (isARM)
1498 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1501 .add(condCodeOp())
1503 else
1504 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1508 }
1509 } else if (NumBytes &&
1510 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1511 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1513
1514 // Increment past our save areas.
1515 if (AFI->getGPRCalleeSavedArea3Size()) {
1517 (void)PushPopSplit;
1518 MBBI++;
1519 }
1520
1521 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) {
1522 MBBI++;
1523 // Since vpop register list cannot have gaps, there may be multiple vpop
1524 // instructions in the epilogue.
1525 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1526 MBBI++;
1527 }
1528 if (AFI->getDPRCalleeSavedGapSize()) {
1529 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1530 "unexpected DPR alignment gap");
1531 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1533 }
1534
1535 if (AFI->getGPRCalleeSavedArea2Size()) {
1537 (void)PushPopSplit;
1538 MBBI++;
1539 }
1540 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1541
1542 if (ReservedArgStack || IncomingArgStackToRestore) {
1543 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1544 "attempting to restore negative stack amount");
1545 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1546 ReservedArgStack + IncomingArgStackToRestore,
1548 }
1549
1550 // Validate PAC, It should have been already popped into R12. For CMSE entry
1551 // function, the validation instruction is emitted during expansion of the
1552 // tBXNS_RET, since the validation must use the value of SP at function
1553 // entry, before saving, resp. after restoring, FPCXTNS.
1554 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1555 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1556 }
1557
1558 if (MF.hasWinCFI()) {
1559 insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);
1560 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1562 }
1563}
1564
1565/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1566/// debug info. It's the same as what we use for resolving the code-gen
1567/// references for now. FIXME: This can go wrong when references are
1568/// SP-relative and simple call frames aren't used.
1570 int FI,
1571 Register &FrameReg) const {
1572 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1573}
1574
1576 int FI, Register &FrameReg,
1577 int SPAdj) const {
1578 const MachineFrameInfo &MFI = MF.getFrameInfo();
1579 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1580 MF.getSubtarget().getRegisterInfo());
1581 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1582 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1583 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1584 bool isFixed = MFI.isFixedObjectIndex(FI);
1585
1586 FrameReg = ARM::SP;
1587 Offset += SPAdj;
1588
1589 // SP can move around if there are allocas. We may also lose track of SP
1590 // when emergency spilling inside a non-reserved call frame setup.
1591 bool hasMovingSP = !hasReservedCallFrame(MF);
1592
1593 // When dynamically realigning the stack, use the frame pointer for
1594 // parameters, and the stack/base pointer for locals.
1595 if (RegInfo->hasStackRealignment(MF)) {
1596 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1597 if (isFixed) {
1598 FrameReg = RegInfo->getFrameRegister(MF);
1599 Offset = FPOffset;
1600 } else if (hasMovingSP) {
1601 assert(RegInfo->hasBasePointer(MF) &&
1602 "VLAs and dynamic stack alignment, but missing base pointer!");
1603 FrameReg = RegInfo->getBaseRegister();
1604 Offset -= SPAdj;
1605 }
1606 return Offset;
1607 }
1608
1609 // If there is a frame pointer, use it when we can.
1610 if (hasFP(MF) && AFI->hasStackFrame()) {
1611 // Use frame pointer to reference fixed objects. Use it for locals if
1612 // there are VLAs (and thus the SP isn't reliable as a base).
1613 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1614 FrameReg = RegInfo->getFrameRegister(MF);
1615 return FPOffset;
1616 } else if (hasMovingSP) {
1617 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1618 if (AFI->isThumb2Function()) {
1619 // Try to use the frame pointer if we can, else use the base pointer
1620 // since it's available. This is handy for the emergency spill slot, in
1621 // particular.
1622 if (FPOffset >= -255 && FPOffset < 0) {
1623 FrameReg = RegInfo->getFrameRegister(MF);
1624 return FPOffset;
1625 }
1626 }
1627 } else if (AFI->isThumbFunction()) {
1628 // Prefer SP to base pointer, if the offset is suitably aligned and in
1629 // range as the effective range of the immediate offset is bigger when
1630 // basing off SP.
1631 // Use add <rd>, sp, #<imm8>
1632 // ldr <rd>, [sp, #<imm8>]
1633 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1634 return Offset;
1635 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1636 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1637 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1638 FrameReg = RegInfo->getFrameRegister(MF);
1639 return FPOffset;
1640 }
1641 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1642 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1643 FrameReg = RegInfo->getFrameRegister(MF);
1644 return FPOffset;
1645 }
1646 }
1647 // Use the base pointer if we have one.
1648 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1649 // That can happen if we forced a base pointer for a large call frame.
1650 if (RegInfo->hasBasePointer(MF)) {
1651 FrameReg = RegInfo->getBaseRegister();
1652 Offset -= SPAdj;
1653 }
1654 return Offset;
1655}
1656
1657void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1660 unsigned StmOpc, unsigned StrOpc,
1661 bool NoGap,
1662 function_ref<bool(unsigned)> Func) const {
1663 MachineFunction &MF = *MBB.getParent();
1666
1667 DebugLoc DL;
1668
1669 using RegAndKill = std::pair<unsigned, bool>;
1670
1672 unsigned i = CSI.size();
1673 while (i != 0) {
1674 unsigned LastReg = 0;
1675 for (; i != 0; --i) {
1676 MCRegister Reg = CSI[i-1].getReg();
1677 if (!Func(Reg))
1678 continue;
1679
1680 const MachineRegisterInfo &MRI = MF.getRegInfo();
1681 bool isLiveIn = MRI.isLiveIn(Reg);
1682 if (!isLiveIn && !MRI.isReserved(Reg))
1683 MBB.addLiveIn(Reg);
1684 // If NoGap is true, push consecutive registers and then leave the rest
1685 // for other instructions. e.g.
1686 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1687 if (NoGap && LastReg && LastReg != Reg-1)
1688 break;
1689 LastReg = Reg;
1690 // Do not set a kill flag on values that are also marked as live-in. This
1691 // happens with the @llvm-returnaddress intrinsic and with arguments
1692 // passed in callee saved registers.
1693 // Omitting the kill flags is conservatively correct even if the live-in
1694 // is not used after all.
1695 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1696 }
1697
1698 if (Regs.empty())
1699 continue;
1700
1701 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1702 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1703 });
1704
1705 if (Regs.size() > 1 || StrOpc== 0) {
1706 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1707 .addReg(ARM::SP)
1710 for (const auto &[Reg, Kill] : Regs)
1712 } else if (Regs.size() == 1) {
1713 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1714 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1715 .addReg(ARM::SP)
1717 .addImm(-4)
1719 }
1720 Regs.clear();
1721
1722 // Put any subsequent vpush instructions before this one: they will refer to
1723 // higher register numbers so need to be pushed first in order to preserve
1724 // monotonicity.
1725 if (MI != MBB.begin())
1726 --MI;
1727 }
1728}
1729
1730void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1733 unsigned LdmOpc, unsigned LdrOpc,
1734 bool isVarArg, bool NoGap,
1735 function_ref<bool(unsigned)> Func) const {
1736 MachineFunction &MF = *MBB.getParent();
1737 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1738 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
1739 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1740 bool hasPAC = AFI->shouldSignReturnAddress();
1741 DebugLoc DL;
1742 bool isTailCall = false;
1743 bool isInterrupt = false;
1744 bool isTrap = false;
1745 bool isCmseEntry = false;
1747 STI.getPushPopSplitVariation(MF);
1748 if (MBB.end() != MI) {
1749 DL = MI->getDebugLoc();
1750 unsigned RetOpcode = MI->getOpcode();
1751 isTailCall =
1752 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1753 RetOpcode == ARM::TCRETURNrinotr12);
1754 isInterrupt =
1755 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1756 isTrap = RetOpcode == ARM::TRAP || RetOpcode == ARM::tTRAP;
1757 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1758 }
1759
1760 SmallVector<unsigned, 4> Regs;
1761 unsigned i = CSI.size();
1762 while (i != 0) {
1763 unsigned LastReg = 0;
1764 bool DeleteRet = false;
1765 for (; i != 0; --i) {
1766 CalleeSavedInfo &Info = CSI[i-1];
1767 MCRegister Reg = Info.getReg();
1768 if (!Func(Reg))
1769 continue;
1770
1771 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1772 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1773 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1774 (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1775 PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1776 Reg = ARM::PC;
1777 // Fold the return instruction into the LDM.
1778 DeleteRet = true;
1779 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1780 }
1781
1782 // If NoGap is true, pop consecutive registers and then leave the rest
1783 // for other instructions. e.g.
1784 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1785 if (NoGap && LastReg && LastReg != Reg-1)
1786 break;
1787
1788 LastReg = Reg;
1789 Regs.push_back(Reg);
1790 }
1791
1792 if (Regs.empty())
1793 continue;
1794
1795 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1796 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1797 });
1798
1799 if (Regs.size() > 1 || LdrOpc == 0) {
1800 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1801 .addReg(ARM::SP)
1804 for (unsigned Reg : Regs)
1805 MIB.addReg(Reg, getDefRegState(true));
1806 if (DeleteRet) {
1807 if (MI != MBB.end()) {
1808 MIB.copyImplicitOps(*MI);
1809 MI->eraseFromParent();
1810 }
1811 }
1812 MI = MIB;
1813 } else if (Regs.size() == 1) {
1814 // If we adjusted the reg to PC from LR above, switch it back here. We
1815 // only do that for LDM.
1816 if (Regs[0] == ARM::PC)
1817 Regs[0] = ARM::LR;
1818 MachineInstrBuilder MIB =
1819 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1820 .addReg(ARM::SP, RegState::Define)
1821 .addReg(ARM::SP)
1823 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1824 // that refactoring is complete (eventually).
1825 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1826 MIB.addReg(0);
1828 } else
1829 MIB.addImm(4);
1830 MIB.add(predOps(ARMCC::AL));
1831 }
1832 Regs.clear();
1833
1834 // Put any subsequent vpop instructions after this one: they will refer to
1835 // higher register numbers so need to be popped afterwards.
1836 if (MI != MBB.end())
1837 ++MI;
1838 }
1839}
1840
1841void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
1844 unsigned PushOpc) const {
1845 MachineFunction &MF = *MBB.getParent();
1846 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1847
1849 auto RegPresent = [&CSI](MCRegister Reg) {
1850 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1851 return C.getReg() == Reg;
1852 });
1853 };
1854
1855 // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
1856 // instruction.
1857 if (RegPresent(ARM::FPSCR)) {
1858 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4)
1861
1862 Regs.push_back(ARM::R4);
1863 }
1864
1865 // If we need to save FPEXC, then we must move FPEXC into R5 with the
1866 // VMRS_FPEXC instruction.
1867 if (RegPresent(ARM::FPEXC)) {
1868 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5)
1871
1872 Regs.push_back(ARM::R5);
1873 }
1874
1875 // If neither FPSCR and FPEXC are present, then do nothing.
1876 if (Regs.size() == 0)
1877 return;
1878
1879 // Push both R4 and R5 onto the stack, if present.
1880 MachineInstrBuilder MIB =
1881 BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP)
1882 .addReg(ARM::SP)
1885
1886 for (Register Reg : Regs) {
1887 MIB.addReg(Reg);
1888 }
1889}
1890
1891void ARMFrameLowering::emitFPStatusRestores(
1893 MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
1894 MachineFunction &MF = *MBB.getParent();
1895 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1896
1897 auto RegPresent = [&CSI](MCRegister Reg) {
1898 return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
1899 return C.getReg() == Reg;
1900 });
1901 };
1902
1903 // Do nothing if we don't need to restore any FP status registers.
1904 if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
1905 return;
1906
1907 // Pop registers off of the stack.
1908 MachineInstrBuilder MIB =
1909 BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP)
1910 .addReg(ARM::SP)
1913
1914 // If FPSCR was saved, it will be popped into R4.
1915 if (RegPresent(ARM::FPSCR)) {
1916 MIB.addReg(ARM::R4, RegState::Define);
1917 }
1918
1919 // If FPEXC was saved, it will be popped into R5.
1920 if (RegPresent(ARM::FPEXC)) {
1921 MIB.addReg(ARM::R5, RegState::Define);
1922 }
1923
1924 // Move the FPSCR value back into the register with the VMSR instruction.
1925 if (RegPresent(ARM::FPSCR)) {
1926 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR))
1927 .addReg(ARM::R4)
1930 }
1931
1932 // Move the FPEXC value back into the register with the VMSR_FPEXC
1933 // instruction.
1934 if (RegPresent(ARM::FPEXC)) {
1935 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC))
1936 .addReg(ARM::R5)
1939 }
1940}
1941
1942/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1943/// starting from d8. Also insert stack realignment code and leave the stack
1944/// pointer pointing to the d8 spill slot.
1947 unsigned NumAlignedDPRCS2Regs,
1949 const TargetRegisterInfo *TRI) {
1950 MachineFunction &MF = *MBB.getParent();
1952 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1954 MachineFrameInfo &MFI = MF.getFrameInfo();
1955
1956 // Mark the D-register spill slots as properly aligned. Since MFI computes
1957 // stack slot layout backwards, this can actually mean that the d-reg stack
1958 // slot offsets can be wrong. The offset for d8 will always be correct.
1959 for (const CalleeSavedInfo &I : CSI) {
1960 unsigned DNum = I.getReg() - ARM::D8;
1961 if (DNum > NumAlignedDPRCS2Regs - 1)
1962 continue;
1963 int FI = I.getFrameIdx();
1964 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1965 // registers will be 8-byte aligned.
1966 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1967
1968 // The stack slot for D8 needs to be maximally aligned because this is
1969 // actually the point where we align the stack pointer. MachineFrameInfo
1970 // computes all offsets relative to the incoming stack pointer which is a
1971 // bit weird when realigning the stack. Any extra padding for this
1972 // over-alignment is not realized because the code inserted below adjusts
1973 // the stack pointer by numregs * 8 before aligning the stack pointer.
1974 if (DNum == 0)
1975 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1976 }
1977
1978 // Move the stack pointer to the d8 spill slot, and align it at the same
1979 // time. Leave the stack slot address in the scratch register r4.
1980 //
1981 // sub r4, sp, #numregs * 8
1982 // bic r4, r4, #align - 1
1983 // mov sp, r4
1984 //
1985 bool isThumb = AFI->isThumbFunction();
1986 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1987 AFI->setShouldRestoreSPFromFP(true);
1988
1989 // sub r4, sp, #numregs * 8
1990 // The immediate is <= 64, so it doesn't need any special encoding.
1991 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1992 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1993 .addReg(ARM::SP)
1994 .addImm(8 * NumAlignedDPRCS2Regs)
1996 .add(condCodeOp());
1997
1998 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1999 // We must set parameter MustBeSingleInstruction to true, since
2000 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
2001 // stack alignment. Luckily, this can always be done since all ARM
2002 // architecture versions that support Neon also support the BFC
2003 // instruction.
2004 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
2005
2006 // mov sp, r4
2007 // The stack pointer must be adjusted before spilling anything, otherwise
2008 // the stack slots could be clobbered by an interrupt handler.
2009 // Leave r4 live, it is used below.
2010 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
2011 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
2012 .addReg(ARM::R4)
2014 if (!isThumb)
2015 MIB.add(condCodeOp());
2016
2017 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
2018 // r4 holds the stack slot address.
2019 unsigned NextReg = ARM::D8;
2020
2021 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
2022 // The writeback is only needed when emitting two vst1.64 instructions.
2023 if (NumAlignedDPRCS2Regs >= 6) {
2024 MCRegister SupReg =
2025 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2026 MBB.addLiveIn(SupReg);
2027 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
2028 .addReg(ARM::R4, RegState::Kill)
2029 .addImm(16)
2030 .addReg(NextReg)
2033 NextReg += 4;
2034 NumAlignedDPRCS2Regs -= 4;
2035 }
2036
2037 // We won't modify r4 beyond this point. It currently points to the next
2038 // register to be spilled.
2039 unsigned R4BaseReg = NextReg;
2040
2041 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
2042 if (NumAlignedDPRCS2Regs >= 4) {
2043 MCRegister SupReg =
2044 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2045 MBB.addLiveIn(SupReg);
2046 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
2047 .addReg(ARM::R4)
2048 .addImm(16)
2049 .addReg(NextReg)
2052 NextReg += 4;
2053 NumAlignedDPRCS2Regs -= 4;
2054 }
2055
2056 // 16-byte aligned vst1.64 with 2 d-regs.
2057 if (NumAlignedDPRCS2Regs >= 2) {
2058 MCRegister SupReg =
2059 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2060 MBB.addLiveIn(SupReg);
2061 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
2062 .addReg(ARM::R4)
2063 .addImm(16)
2064 .addReg(SupReg)
2066 NextReg += 2;
2067 NumAlignedDPRCS2Regs -= 2;
2068 }
2069
2070 // Finally, use a vanilla vstr.64 for the odd last register.
2071 if (NumAlignedDPRCS2Regs) {
2072 MBB.addLiveIn(NextReg);
2073 // vstr.64 uses addrmode5 which has an offset scale of 4.
2074 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
2075 .addReg(NextReg)
2076 .addReg(ARM::R4)
2077 .addImm((NextReg - R4BaseReg) * 2)
2079 }
2080
2081 // The last spill instruction inserted should kill the scratch register r4.
2082 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2083}
2084
2085/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
2086/// iterator to the following instruction.
2089 unsigned NumAlignedDPRCS2Regs) {
2090 // sub r4, sp, #numregs * 8
2091 // bic r4, r4, #align - 1
2092 // mov sp, r4
2093 ++MI; ++MI; ++MI;
2094 assert(MI->mayStore() && "Expecting spill instruction");
2095
2096 // These switches all fall through.
2097 switch(NumAlignedDPRCS2Regs) {
2098 case 7:
2099 ++MI;
2100 assert(MI->mayStore() && "Expecting spill instruction");
2101 [[fallthrough]];
2102 default:
2103 ++MI;
2104 assert(MI->mayStore() && "Expecting spill instruction");
2105 [[fallthrough]];
2106 case 1:
2107 case 2:
2108 case 4:
2109 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
2110 ++MI;
2111 }
2112 return MI;
2113}
2114
2115/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2116/// starting from d8. These instructions are assumed to execute while the
2117/// stack is still aligned, unlike the code inserted by emitPopInst.
2120 unsigned NumAlignedDPRCS2Regs,
2122 const TargetRegisterInfo *TRI) {
2123 MachineFunction &MF = *MBB.getParent();
2125 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
2127
2128 // Find the frame index assigned to d8.
2129 int D8SpillFI = 0;
2130 for (const CalleeSavedInfo &I : CSI)
2131 if (I.getReg() == ARM::D8) {
2132 D8SpillFI = I.getFrameIdx();
2133 break;
2134 }
2135
2136 // Materialize the address of the d8 spill slot into the scratch register r4.
2137 // This can be fairly complicated if the stack frame is large, so just use
2138 // the normal frame index elimination mechanism to do it. This code runs as
2139 // the initial part of the epilog where the stack and base pointers haven't
2140 // been changed yet.
2141 bool isThumb = AFI->isThumbFunction();
2142 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2143
2144 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
2145 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
2146 .addFrameIndex(D8SpillFI)
2147 .addImm(0)
2149 .add(condCodeOp());
2150
2151 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2152 unsigned NextReg = ARM::D8;
2153
2154 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2155 if (NumAlignedDPRCS2Regs >= 6) {
2156 MCRegister SupReg =
2157 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2158 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
2159 .addReg(ARM::R4, RegState::Define)
2160 .addReg(ARM::R4, RegState::Kill)
2161 .addImm(16)
2164 NextReg += 4;
2165 NumAlignedDPRCS2Regs -= 4;
2166 }
2167
2168 // We won't modify r4 beyond this point. It currently points to the next
2169 // register to be spilled.
2170 unsigned R4BaseReg = NextReg;
2171
2172 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2173 if (NumAlignedDPRCS2Regs >= 4) {
2174 MCRegister SupReg =
2175 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass);
2176 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
2177 .addReg(ARM::R4)
2178 .addImm(16)
2181 NextReg += 4;
2182 NumAlignedDPRCS2Regs -= 4;
2183 }
2184
2185 // 16-byte aligned vld1.64 with 2 d-regs.
2186 if (NumAlignedDPRCS2Regs >= 2) {
2187 MCRegister SupReg =
2188 TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass);
2189 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
2190 .addReg(ARM::R4)
2191 .addImm(16)
2193 NextReg += 2;
2194 NumAlignedDPRCS2Regs -= 2;
2195 }
2196
2197 // Finally, use a vanilla vldr.64 for the remaining odd register.
2198 if (NumAlignedDPRCS2Regs)
2199 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
2200 .addReg(ARM::R4)
2201 .addImm(2 * (NextReg - R4BaseReg))
2203
2204 // Last store kills r4.
2205 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
2206}
2207
2211 if (CSI.empty())
2212 return false;
2213
2214 MachineFunction &MF = *MBB.getParent();
2217 STI.getPushPopSplitVariation(MF);
2218 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2219
2220 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
2221 unsigned PushOneOpc = AFI->isThumbFunction() ?
2222 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
2223 unsigned FltOpc = ARM::VSTMDDB_UPD;
2224 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2225 // Compute PAC in R12.
2226 if (AFI->shouldSignReturnAddress()) {
2227 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
2229 }
2230 // Save the non-secure floating point context.
2231 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2232 return C.getReg() == ARM::FPCXTNS;
2233 })) {
2234 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2235 ARM::SP)
2236 .addReg(ARM::SP)
2237 .addImm(-4)
2239 }
2240
2241 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2242 RegInfo](unsigned Reg, SpillArea TestArea) {
2243 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2244 TestArea;
2245 };
2246 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2247 return CheckRegArea(Reg, SpillArea::GPRCS1);
2248 };
2249 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2250 return CheckRegArea(Reg, SpillArea::GPRCS2);
2251 };
2252 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2253 return CheckRegArea(Reg, SpillArea::DPRCS1);
2254 };
2255 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2256 return CheckRegArea(Reg, SpillArea::GPRCS3);
2257 };
2258
2259 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
2260 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
2261 emitFPStatusSaves(MBB, MI, CSI, PushOpc);
2262 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
2263 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
2264
2265 // The code above does not insert spill code for the aligned DPRCS2 registers.
2266 // The stack realignment code will be inserted between the push instructions
2267 // and these spills.
2268 if (NumAlignedDPRCS2Regs)
2269 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2270
2271 return true;
2272}
2273
2277 if (CSI.empty())
2278 return false;
2279
2280 MachineFunction &MF = *MBB.getParent();
2282 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
2283
2284 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2285 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2287 STI.getPushPopSplitVariation(MF);
2288
2289 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2290 // registers. Do that here instead.
2291 if (NumAlignedDPRCS2Regs)
2292 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2293
2294 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2295 unsigned LdrOpc =
2296 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2297 unsigned FltOpc = ARM::VLDMDIA_UPD;
2298
2299 auto CheckRegArea = [PushPopSplit, NumAlignedDPRCS2Regs,
2300 RegInfo](unsigned Reg, SpillArea TestArea) {
2301 return getSpillArea(Reg, PushPopSplit, NumAlignedDPRCS2Regs, RegInfo) ==
2302 TestArea;
2303 };
2304 auto IsGPRCS1 = [&CheckRegArea](unsigned Reg) {
2305 return CheckRegArea(Reg, SpillArea::GPRCS1);
2306 };
2307 auto IsGPRCS2 = [&CheckRegArea](unsigned Reg) {
2308 return CheckRegArea(Reg, SpillArea::GPRCS2);
2309 };
2310 auto IsDPRCS1 = [&CheckRegArea](unsigned Reg) {
2311 return CheckRegArea(Reg, SpillArea::DPRCS1);
2312 };
2313 auto IsGPRCS3 = [&CheckRegArea](unsigned Reg) {
2314 return CheckRegArea(Reg, SpillArea::GPRCS3);
2315 };
2316
2317 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
2318 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
2319 emitFPStatusRestores(MBB, MI, CSI, PopOpc);
2320 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
2321 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
2322
2323 return true;
2324}
2325
2326// FIXME: Make generic?
2328 const ARMBaseInstrInfo &TII) {
2329 unsigned FnSize = 0;
2330 for (auto &MBB : MF) {
2331 for (auto &MI : MBB)
2332 FnSize += TII.getInstSizeInBytes(MI);
2333 }
2334 if (MF.getJumpTableInfo())
2335 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2336 FnSize += Table.MBBs.size() * 4;
2337 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2338 LLVM_DEBUG(dbgs() << "Estimated function size for " << MF.getName() << " = "
2339 << FnSize << " bytes\n");
2340 return FnSize;
2341}
2342
2343/// estimateRSStackSizeLimit - Look at each instruction that references stack
2344/// frames and return the stack size limit beyond which some of these
2345/// instructions will require a scratch register during their expansion later.
2346// FIXME: Move to TII?
2348 const TargetFrameLowering *TFI,
2349 bool &HasNonSPFrameIndex) {
2350 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2351 const ARMBaseInstrInfo &TII =
2352 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2353 unsigned Limit = (1 << 12) - 1;
2354 for (auto &MBB : MF) {
2355 for (auto &MI : MBB) {
2356 if (MI.isDebugInstr())
2357 continue;
2358 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2359 if (!MI.getOperand(i).isFI())
2360 continue;
2361
2362 // When using ADDri to get the address of a stack object, 255 is the
2363 // largest offset guaranteed to fit in the immediate offset.
2364 if (MI.getOpcode() == ARM::ADDri) {
2365 Limit = std::min(Limit, (1U << 8) - 1);
2366 break;
2367 }
2368 // t2ADDri will not require an extra register, it can reuse the
2369 // destination.
2370 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2371 break;
2372
2373 const MCInstrDesc &MCID = MI.getDesc();
2374 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i);
2375 if (RegClass && !RegClass->contains(ARM::SP))
2376 HasNonSPFrameIndex = true;
2377
2378 // Otherwise check the addressing mode.
2379 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2381 case ARMII::AddrMode2:
2382 // Default 12 bit limit.
2383 break;
2384 case ARMII::AddrMode3:
2386 Limit = std::min(Limit, (1U << 8) - 1);
2387 break;
2389 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2390 break;
2391 case ARMII::AddrMode5:
2394 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2395 break;
2397 // i12 supports only positive offset so these will be converted to
2398 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2399 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2400 Limit = std::min(Limit, (1U << 8) - 1);
2401 break;
2402 case ARMII::AddrMode4:
2403 case ARMII::AddrMode6:
2404 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2405 // immediate offset for stack references.
2406 return 0;
2408 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2409 break;
2411 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2412 break;
2414 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2415 break;
2416 default:
2417 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2418 }
2419 break; // At most one FI per instruction
2420 }
2421 }
2422 }
2423
2424 return Limit;
2425}
2426
2427// In functions that realign the stack, it can be an advantage to spill the
2428// callee-saved vector registers after realigning the stack. The vst1 and vld1
2429// instructions take alignment hints that can improve performance.
2430static void
2432 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2434 return;
2435
2436 // Naked functions don't spill callee-saved registers.
2437 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2438 return;
2439
2440 // We are planning to use NEON instructions vst1 / vld1.
2441 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2442 return;
2443
2444 // Don't bother if the default stack alignment is sufficiently high.
2446 return;
2447
2448 // Aligned spills require stack realignment.
2449 if (!static_cast<const ARMBaseRegisterInfo *>(
2451 return;
2452
2453 // We always spill contiguous d-registers starting from d8. Count how many
2454 // needs spilling. The register allocator will almost always use the
2455 // callee-saved registers in order, but it can happen that there are holes in
2456 // the range. Registers above the hole will be spilled to the standard DPRCS
2457 // area.
2458 unsigned NumSpills = 0;
2459 for (; NumSpills < 8; ++NumSpills)
2460 if (!SavedRegs.test(ARM::D8 + NumSpills))
2461 break;
2462
2463 // Don't do this for just one d-register. It's not worth it.
2464 if (NumSpills < 2)
2465 return;
2466
2467 // Spill the first NumSpills D-registers after realigning the stack.
2468 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2469
2470 // A scratch register is required for the vst1 / vld1 instructions.
2471 SavedRegs.set(ARM::R4);
2472}
2473
2475 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2476 // upon function entry (resp. restore it immmediately before return)
2477 if (STI.hasV8_1MMainlineOps() &&
2479 return false;
2480
2481 // We are disabling shrinkwrapping for now when PAC is enabled, as
2482 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2483 // generated. A follow-up patch will fix this in a more performant manner.
2485 true /* SpillsLR */))
2486 return false;
2487
2488 return true;
2489}
2490
2492 const MachineFunction &MF) const {
2493 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2494 return Subtarget.createAAPCSFrameChain() && hasFP(MF);
2495}
2496
2497// Thumb1 may require a spill when storing to a frame index through FP (or any
2498// access with execute-only), for cases where FP is a high register (R11). This
2499// scans the function for cases where this may happen.
2501 const TargetFrameLowering &TFI) {
2502 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2503 if (!AFI->isThumb1OnlyFunction())
2504 return false;
2505
2506 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2507 for (const auto &MBB : MF)
2508 for (const auto &MI : MBB)
2509 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2510 STI.genExecuteOnly())
2511 for (const auto &Op : MI.operands())
2512 if (Op.isFI()) {
2513 Register Reg;
2514 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2515 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2516 return true;
2517 }
2518 return false;
2519}
2520
2522 BitVector &SavedRegs,
2523 RegScavenger *RS) const {
2525 // This tells PEI to spill the FP as if it is any other callee-save register
2526 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2527 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2528 // to combine multiple loads / stores.
2529 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)) &&
2531 bool CS1Spilled = false;
2532 bool LRSpilled = false;
2533 unsigned NumGPRSpills = 0;
2534 unsigned NumFPRSpills = 0;
2535 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2536 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2537 const Function &F = MF.getFunction();
2538 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2539 MF.getSubtarget().getRegisterInfo());
2540 const ARMBaseInstrInfo &TII =
2541 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2543 MachineFrameInfo &MFI = MF.getFrameInfo();
2546 (void)TRI; // Silence unused warning in non-assert builds.
2547 Register FramePtr = STI.getFramePointerReg();
2549 STI.getPushPopSplitVariation(MF);
2550
2551 // For a floating point interrupt, save these registers always, since LLVM
2552 // currently doesn't model reads/writes to these registers.
2553 if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) {
2554 SavedRegs.set(ARM::FPSCR);
2555 SavedRegs.set(ARM::R4);
2556
2557 // This register will only be present on non-MClass registers.
2558 if (STI.isMClass()) {
2559 SavedRegs.reset(ARM::FPEXC);
2560 } else {
2561 SavedRegs.set(ARM::FPEXC);
2562 SavedRegs.set(ARM::R5);
2563 }
2564 }
2565
2566 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2567 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2568 // since it's not always possible to restore sp from fp in a single
2569 // instruction.
2570 // FIXME: It will be better just to find spare register here.
2571 if (AFI->isThumb2Function() &&
2572 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2573 SavedRegs.set(ARM::R4);
2574
2575 // If a stack probe will be emitted, spill R4 and LR, since they are
2576 // clobbered by the stack probe call.
2577 // This estimate should be a safe, conservative estimate. The actual
2578 // stack probe is enabled based on the size of the local objects;
2579 // this estimate also includes the varargs store size.
2580 if (STI.isTargetWindows() &&
2581 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2582 SavedRegs.set(ARM::R4);
2583 SavedRegs.set(ARM::LR);
2584 }
2585
2586 if (AFI->isThumb1OnlyFunction()) {
2587 // Spill LR if Thumb1 function uses variable length argument lists.
2588 if (AFI->getArgRegsSaveSize() > 0)
2589 SavedRegs.set(ARM::LR);
2590
2591 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2592 // requires stack alignment. We don't know for sure what the stack size
2593 // will be, but for this, an estimate is good enough. If there anything
2594 // changes it, it'll be a spill, which implies we've used all the registers
2595 // and so R4 is already used, so not marking it here will be OK.
2596 // FIXME: It will be better just to find spare register here.
2597 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2598 MFI.estimateStackSize(MF) > 508)
2599 SavedRegs.set(ARM::R4);
2600 }
2601
2602 // See if we can spill vector registers to aligned stack.
2603 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2604
2605 // Spill the BasePtr if it's used.
2606 if (RegInfo->hasBasePointer(MF))
2607 SavedRegs.set(RegInfo->getBaseRegister());
2608
2609 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2610 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2611 CanEliminateFrame = false;
2612
2613 // When return address signing is enabled R12 is treated as callee-saved.
2614 if (AFI->shouldSignReturnAddress())
2615 CanEliminateFrame = false;
2616
2617 // Don't spill FP if the frame can be eliminated. This is determined
2618 // by scanning the callee-save registers to see if any is modified.
2619 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2620 for (unsigned i = 0; CSRegs[i]; ++i) {
2621 unsigned Reg = CSRegs[i];
2622 bool Spilled = false;
2623 if (SavedRegs.test(Reg)) {
2624 Spilled = true;
2625 CanEliminateFrame = false;
2626 }
2627
2628 if (!ARM::GPRRegClass.contains(Reg)) {
2629 if (Spilled) {
2630 if (ARM::SPRRegClass.contains(Reg))
2631 NumFPRSpills++;
2632 else if (ARM::DPRRegClass.contains(Reg))
2633 NumFPRSpills += 2;
2634 else if (ARM::QPRRegClass.contains(Reg))
2635 NumFPRSpills += 4;
2636 }
2637 continue;
2638 }
2639
2640 if (Spilled) {
2641 NumGPRSpills++;
2642
2643 if (PushPopSplit != ARMSubtarget::SplitR7) {
2644 if (Reg == ARM::LR)
2645 LRSpilled = true;
2646 CS1Spilled = true;
2647 continue;
2648 }
2649
2650 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2651 switch (Reg) {
2652 case ARM::LR:
2653 LRSpilled = true;
2654 [[fallthrough]];
2655 case ARM::R0: case ARM::R1:
2656 case ARM::R2: case ARM::R3:
2657 case ARM::R4: case ARM::R5:
2658 case ARM::R6: case ARM::R7:
2659 CS1Spilled = true;
2660 break;
2661 default:
2662 break;
2663 }
2664 } else {
2665 if (PushPopSplit != ARMSubtarget::SplitR7) {
2666 UnspilledCS1GPRs.push_back(Reg);
2667 continue;
2668 }
2669
2670 switch (Reg) {
2671 case ARM::R0: case ARM::R1:
2672 case ARM::R2: case ARM::R3:
2673 case ARM::R4: case ARM::R5:
2674 case ARM::R6: case ARM::R7:
2675 case ARM::LR:
2676 UnspilledCS1GPRs.push_back(Reg);
2677 break;
2678 default:
2679 UnspilledCS2GPRs.push_back(Reg);
2680 break;
2681 }
2682 }
2683 }
2684
2685 bool ForceLRSpill = false;
2686 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2687 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2688 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2689 // use of BL to implement far jump.
2690 if (FnSize >= (1 << 11)) {
2691 CanEliminateFrame = false;
2692 ForceLRSpill = true;
2693 }
2694 }
2695
2696 // If any of the stack slot references may be out of range of an immediate
2697 // offset, make sure a register (or a spill slot) is available for the
2698 // register scavenger. Note that if we're indexing off the frame pointer, the
2699 // effective stack size is 4 bytes larger since the FP points to the stack
2700 // slot of the previous FP. Also, if we have variable sized objects in the
2701 // function, stack slot references will often be negative, and some of
2702 // our instructions are positive-offset only, so conservatively consider
2703 // that case to want a spill slot (or register) as well. Similarly, if
2704 // the function adjusts the stack pointer during execution and the
2705 // adjustments aren't already part of our stack size estimate, our offset
2706 // calculations may be off, so be conservative.
2707 // FIXME: We could add logic to be more precise about negative offsets
2708 // and which instructions will need a scratch register for them. Is it
2709 // worth the effort and added fragility?
2710 unsigned EstimatedStackSize =
2711 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2712
2713 // Determine biggest (positive) SP offset in MachineFrameInfo.
2714 int MaxFixedOffset = 0;
2715 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2716 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2717 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2718 }
2719
2720 bool HasFP = hasFP(MF);
2721 if (HasFP) {
2722 if (AFI->hasStackFrame())
2723 EstimatedStackSize += 4;
2724 } else {
2725 // If FP is not used, SP will be used to access arguments, so count the
2726 // size of arguments into the estimation.
2727 EstimatedStackSize += MaxFixedOffset;
2728 }
2729 EstimatedStackSize += 16; // For possible paddings.
2730
2731 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2732 bool HasNonSPFrameIndex = false;
2733 if (AFI->isThumb1OnlyFunction()) {
2734 // For Thumb1, don't bother to iterate over the function. The only
2735 // instruction that requires an emergency spill slot is a store to a
2736 // frame index.
2737 //
2738 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2739 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2740 // a 5-bit unsigned immediate.
2741 //
2742 // We could try to check if the function actually contains a tSTRspi
2743 // that might need the spill slot, but it's not really important.
2744 // Functions with VLAs or extremely large call frames are rare, and
2745 // if a function is allocating more than 1KB of stack, an extra 4-byte
2746 // slot probably isn't relevant.
2747 //
2748 // A special case is the scenario where r11 is used as FP, where accesses
2749 // to a frame index will require its value to be moved into a low reg.
2750 // This is handled later on, once we are able to determine if we have any
2751 // fp-relative accesses.
2752 if (RegInfo->hasBasePointer(MF))
2753 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2754 else
2755 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2756 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2757 } else {
2758 EstimatedRSStackSizeLimit =
2759 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2760 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2761 }
2762 // Final estimate of whether sp or bp-relative accesses might require
2763 // scavenging.
2764 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2765
2766 // If the stack pointer moves and we don't have a base pointer, the
2767 // estimate logic doesn't work. The actual offsets might be larger when
2768 // we're constructing a call frame, or we might need to use negative
2769 // offsets from fp.
2770 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2771 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2772 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2773
2774 // If we have a frame pointer, we assume arguments will be accessed
2775 // relative to the frame pointer. Check whether fp-relative accesses to
2776 // arguments require scavenging.
2777 //
2778 // We could do slightly better on Thumb1; in some cases, an sp-relative
2779 // offset would be legal even though an fp-relative offset is not.
2780 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2781 bool HasLargeArgumentList =
2782 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2783
2784 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2785 HasLargeArgumentList || HasNonSPFrameIndex;
2786 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2787 << "; EstimatedStack: " << EstimatedStackSize
2788 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2789 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2790 if (BigFrameOffsets ||
2791 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2792 AFI->setHasStackFrame(true);
2793
2794 // Save the FP if:
2795 // 1. We currently need it (HasFP), OR
2796 // 2. We might need it later due to stack realignment from aligned DPRCS2
2797 // saves (which will make hasFP() become true in emitPrologue).
2798 if (HasFP || (isFPReserved(MF) && AFI->getNumAlignedDPRCS2Regs() > 0)) {
2799 SavedRegs.set(FramePtr);
2800 // If the frame pointer is required by the ABI, also spill LR so that we
2801 // emit a complete frame record.
2802 if ((requiresAAPCSFrameRecord(MF) ||
2804 !LRSpilled) {
2805 SavedRegs.set(ARM::LR);
2806 LRSpilled = true;
2807 NumGPRSpills++;
2808 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2809 if (LRPos != UnspilledCS1GPRs.end())
2810 UnspilledCS1GPRs.erase(LRPos);
2811 }
2812 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2813 if (FPPos != UnspilledCS1GPRs.end())
2814 UnspilledCS1GPRs.erase(FPPos);
2815 NumGPRSpills++;
2816 if (FramePtr == ARM::R7)
2817 CS1Spilled = true;
2818 }
2819
2820 // This is the number of extra spills inserted for callee-save GPRs which
2821 // would not otherwise be used by the function. When greater than zero it
2822 // guaranteees that it is possible to scavenge a register to hold the
2823 // address of a stack slot. On Thumb1, the register must be a valid operand
2824 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2825 // or lr.
2826 //
2827 // If we don't insert a spill, we instead allocate an emergency spill
2828 // slot, which can be used by scavenging to spill an arbitrary register.
2829 //
2830 // We currently don't try to figure out whether any specific instruction
2831 // requires scavening an additional register.
2832 unsigned NumExtraCSSpill = 0;
2833
2834 if (AFI->isThumb1OnlyFunction()) {
2835 // For Thumb1-only targets, we need some low registers when we save and
2836 // restore the high registers (which aren't allocatable, but could be
2837 // used by inline assembly) because the push/pop instructions can not
2838 // access high registers. If necessary, we might need to push more low
2839 // registers to ensure that there is at least one free that can be used
2840 // for the saving & restoring, and preferably we should ensure that as
2841 // many as are needed are available so that fewer push/pop instructions
2842 // are required.
2843
2844 // Low registers which are not currently pushed, but could be (r4-r7).
2845 SmallVector<unsigned, 4> AvailableRegs;
2846
2847 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2848 // free.
2849 int EntryRegDeficit = 0;
2850 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2851 if (!MF.getRegInfo().isLiveIn(Reg)) {
2852 --EntryRegDeficit;
2854 << printReg(Reg, TRI)
2855 << " is unused argument register, EntryRegDeficit = "
2856 << EntryRegDeficit << "\n");
2857 }
2858 }
2859
2860 // Unused return registers can be clobbered in the epilogue for free.
2861 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2863 << " return regs used, ExitRegDeficit = "
2864 << ExitRegDeficit << "\n");
2865
2866 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2867 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2868
2869 // r4-r6 can be used in the prologue if they are pushed by the first push
2870 // instruction.
2871 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2872 if (SavedRegs.test(Reg)) {
2873 --RegDeficit;
2874 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2875 << " is saved low register, RegDeficit = "
2876 << RegDeficit << "\n");
2877 } else {
2878 AvailableRegs.push_back(Reg);
2879 LLVM_DEBUG(
2880 dbgs()
2881 << printReg(Reg, TRI)
2882 << " is non-saved low register, adding to AvailableRegs\n");
2883 }
2884 }
2885
2886 // r7 can be used if it is not being used as the frame pointer.
2887 if (!HasFP || FramePtr != ARM::R7) {
2888 if (SavedRegs.test(ARM::R7)) {
2889 --RegDeficit;
2890 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2891 << RegDeficit << "\n");
2892 } else {
2893 AvailableRegs.push_back(ARM::R7);
2894 LLVM_DEBUG(
2895 dbgs()
2896 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2897 }
2898 }
2899
2900 // Each of r8-r11 needs to be copied to a low register, then pushed.
2901 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2902 if (SavedRegs.test(Reg)) {
2903 ++RegDeficit;
2904 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2905 << " is saved high register, RegDeficit = "
2906 << RegDeficit << "\n");
2907 }
2908 }
2909
2910 // LR can only be used by PUSH, not POP, and can't be used at all if the
2911 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2912 // are more limited at function entry than exit.
2913 if ((EntryRegDeficit > ExitRegDeficit) &&
2914 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2916 if (SavedRegs.test(ARM::LR)) {
2917 --RegDeficit;
2918 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2919 << RegDeficit << "\n");
2920 } else {
2921 AvailableRegs.push_back(ARM::LR);
2922 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2923 }
2924 }
2925
2926 // If there are more high registers that need pushing than low registers
2927 // available, push some more low registers so that we can use fewer push
2928 // instructions. This might not reduce RegDeficit all the way to zero,
2929 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2930 // need saving.
2931 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2932 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2933 unsigned Reg = AvailableRegs.pop_back_val();
2934 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2935 << " to make up reg deficit\n");
2936 SavedRegs.set(Reg);
2937 NumGPRSpills++;
2938 CS1Spilled = true;
2939 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2940 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2941 NumExtraCSSpill++;
2942 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2943 if (Reg == ARM::LR)
2944 LRSpilled = true;
2945 }
2946 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2947 << "\n");
2948 }
2949
2950 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2951 // restore LR in that case.
2952 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2953
2954 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2955 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2956 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2957 SavedRegs.set(ARM::LR);
2958 NumGPRSpills++;
2960 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2961 if (LRPos != UnspilledCS1GPRs.end())
2962 UnspilledCS1GPRs.erase(LRPos);
2963
2964 ForceLRSpill = false;
2965 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2966 !AFI->isThumb1OnlyFunction())
2967 NumExtraCSSpill++;
2968 }
2969
2970 // If stack and double are 8-byte aligned and we are spilling an odd number
2971 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2972 // the integer and double callee save areas.
2973 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2974 const Align TargetAlign = getStackAlign();
2975 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2976 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2977 for (unsigned Reg : UnspilledCS1GPRs) {
2978 // Don't spill high register if the function is thumb. In the case of
2979 // Windows on ARM, accept R11 (frame pointer)
2980 if (!AFI->isThumbFunction() ||
2981 (STI.isTargetWindows() && Reg == ARM::R11) ||
2982 isARMLowRegister(Reg) ||
2983 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2984 SavedRegs.set(Reg);
2985 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2986 << " to make up alignment\n");
2987 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2988 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2989 NumExtraCSSpill++;
2990 break;
2991 }
2992 }
2993 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2994 unsigned Reg = UnspilledCS2GPRs.front();
2995 SavedRegs.set(Reg);
2996 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2997 << " to make up alignment\n");
2998 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2999 NumExtraCSSpill++;
3000 }
3001 }
3002
3003 // Estimate if we might need to scavenge registers at some point in order
3004 // to materialize a stack offset. If so, either spill one additional
3005 // callee-saved register or reserve a special spill slot to facilitate
3006 // register scavenging. Thumb1 needs a spill slot for stack pointer
3007 // adjustments and for frame index accesses when FP is high register,
3008 // even when the frame itself is small.
3009 unsigned RegsNeeded = 0;
3010 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
3011 RegsNeeded++;
3012 // With thumb1 execute-only we may need an additional register for saving
3013 // and restoring the CPSR.
3014 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
3015 RegsNeeded++;
3016 }
3017
3018 if (RegsNeeded > NumExtraCSSpill) {
3019 // If any non-reserved CS register isn't spilled, just spill one or two
3020 // extra. That should take care of it!
3021 unsigned NumExtras = TargetAlign.value() / 4;
3023 while (NumExtras && !UnspilledCS1GPRs.empty()) {
3024 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
3025 if (!MRI.isReserved(Reg) &&
3026 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
3027 Extras.push_back(Reg);
3028 NumExtras--;
3029 }
3030 }
3031 // For non-Thumb1 functions, also check for hi-reg CS registers
3032 if (!AFI->isThumb1OnlyFunction()) {
3033 while (NumExtras && !UnspilledCS2GPRs.empty()) {
3034 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
3035 if (!MRI.isReserved(Reg)) {
3036 Extras.push_back(Reg);
3037 NumExtras--;
3038 }
3039 }
3040 }
3041 if (NumExtras == 0) {
3042 for (unsigned Reg : Extras) {
3043 SavedRegs.set(Reg);
3044 if (!MRI.isPhysRegUsed(Reg))
3045 NumExtraCSSpill++;
3046 }
3047 }
3048 while ((RegsNeeded > NumExtraCSSpill) && RS) {
3049 // Reserve a slot closest to SP or frame pointer.
3050 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
3051 const TargetRegisterClass &RC = ARM::GPRRegClass;
3052 unsigned Size = TRI->getSpillSize(RC);
3053 Align Alignment = TRI->getSpillAlign(RC);
3054 RS->addScavengingFrameIndex(
3055 MFI.CreateSpillStackObject(Size, Alignment));
3056 --RegsNeeded;
3057 }
3058 }
3059 }
3060
3061 if (ForceLRSpill)
3062 SavedRegs.set(ARM::LR);
3063 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
3064}
3065
3067 MachineFrameInfo &MFI = MF.getFrameInfo();
3068 if (!MFI.isCalleeSavedInfoValid())
3069 return;
3070
3071 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
3072 // into PC so it is not live out of the return block: Clear the Restored bit
3073 // in that case.
3074 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
3075 if (Info.getReg() != ARM::LR)
3076 continue;
3077 if (all_of(MF, [](const MachineBasicBlock &MBB) {
3078 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
3079 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
3080 Term.getOpcode() == ARM::t2LDMIA_RET ||
3081 Term.getOpcode() == ARM::tPOP_RET;
3082 });
3083 })) {
3084 Info.setRestored(false);
3085 break;
3086 }
3087 }
3088}
3089
3095
3097 BitVector &SavedRegs) const {
3099
3100 // If we have the "returned" parameter attribute which guarantees that we
3101 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
3102 // record that fact for IPRA.
3103 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3104 if (AFI->getPreservesR0())
3105 SavedRegs.set(ARM::R0);
3106}
3107
3110 std::vector<CalleeSavedInfo> &CSI) const {
3111 // For CMSE entry functions, handle floating-point context as if it was a
3112 // callee-saved register.
3113 if (STI.hasV8_1MMainlineOps() &&
3115 CSI.emplace_back(ARM::FPCXTNS);
3116 CSI.back().setRestored(false);
3117 }
3118
3119 // For functions, which sign their return address, upon function entry, the
3120 // return address PAC is computed in R12. Treat R12 as a callee-saved register
3121 // in this case.
3122 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
3123 if (AFI.shouldSignReturnAddress()) {
3124 // The order of register must match the order we push them, because the
3125 // PEI assigns frame indices in that order. That order depends on the
3126 // PushPopSplitVariation, there are only two cases which we use with return
3127 // address signing:
3128 switch (STI.getPushPopSplitVariation(MF)) {
3130 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3131 CSI.insert(find_if(CSI,
3132 [=](const auto &CS) {
3133 MCRegister Reg = CS.getReg();
3134 return Reg == ARM::R10 || Reg == ARM::R11 ||
3135 Reg == ARM::R8 || Reg == ARM::R9 ||
3136 ARM::DPRRegClass.contains(Reg);
3137 }),
3138 CalleeSavedInfo(ARM::R12));
3139 break;
3141 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3142 // on the stack.
3143 CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3144 break;
3147 "ABI-required frame pointers need a CSR split when signing return "
3148 "address.");
3149 CSI.insert(find_if(CSI,
3150 [=](const auto &CS) {
3151 MCRegister Reg = CS.getReg();
3152 return Reg != ARM::LR;
3153 }),
3154 CalleeSavedInfo(ARM::R12));
3155 break;
3156 default:
3157 llvm_unreachable("Unexpected CSR split with return address signing");
3158 }
3159 }
3160
3161 return false;
3162}
3163
3166 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
3167 NumEntries = std::size(FixedSpillOffsets);
3168 return FixedSpillOffsets;
3169}
3170
3171MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
3174 const ARMBaseInstrInfo &TII =
3175 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3177 bool isARM = !AFI->isThumbFunction();
3178 DebugLoc dl = I->getDebugLoc();
3179 unsigned Opc = I->getOpcode();
3180 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
3181 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
3182
3183 assert(!AFI->isThumb1OnlyFunction() &&
3184 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3185
3186 int PIdx = I->findFirstPredOperandIdx();
3187 ARMCC::CondCodes Pred = (PIdx == -1)
3188 ? ARMCC::AL
3189 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
3190 unsigned PredReg = TII.getFramePred(*I);
3191
3192 if (!hasReservedCallFrame(MF)) {
3193 // Bail early if the callee is expected to do the adjustment.
3194 if (IsDestroy && CalleePopAmount != -1U)
3195 return MBB.erase(I);
3196
3197 // If we have alloca, convert as follows:
3198 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3199 // ADJCALLSTACKUP -> add, sp, sp, amount
3200 unsigned Amount = TII.getFrameSize(*I);
3201 if (Amount != 0) {
3202 // We need to keep the stack aligned properly. To do this, we round the
3203 // amount of space needed for the outgoing arguments up to the next
3204 // alignment boundary.
3205 Amount = alignSPAdjust(Amount);
3206
3207 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
3208 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
3209 Pred, PredReg);
3210 } else {
3211 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
3212 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
3213 Pred, PredReg);
3214 }
3215 }
3216 } else if (CalleePopAmount != -1U) {
3217 // If the calling convention demands that the callee pops arguments from the
3218 // stack, we want to add it back if we have a reserved call frame.
3219 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
3220 MachineInstr::NoFlags, Pred, PredReg);
3221 }
3222 return MBB.erase(I);
3223}
3224
3225/// Get the minimum constant for ARM that is greater than or equal to the
3226/// argument. In ARM, constants can have any value that can be produced by
3227/// rotating an 8-bit value to the right by an even number of bits within a
3228/// 32-bit word.
3230 unsigned Shifted = 0;
3231
3232 if (Value == 0)
3233 return 0;
3234
3235 while (!(Value & 0xC0000000)) {
3236 Value = Value << 2;
3237 Shifted += 2;
3238 }
3239
3240 bool Carry = (Value & 0x00FFFFFF);
3241 Value = ((Value & 0xFF000000) >> 24) + Carry;
3242
3243 if (Value & 0x0000100)
3244 Value = Value & 0x000001FC;
3245
3246 if (Shifted > 24)
3247 Value = Value >> (Shifted - 24);
3248 else
3249 Value = Value << (24 - Shifted);
3250
3251 return Value;
3252}
3253
3254// The stack limit in the TCB is set to this many bytes above the actual
3255// stack limit.
3257
3258// Adjust the function prologue to enable split stacks. This currently only
3259// supports android and linux.
3260//
3261// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3262// must be well defined in order to allow for consistent implementations of the
3263// __morestack helper function. The ABI is also not a normal ABI in that it
3264// doesn't follow the normal calling conventions because this allows the
3265// prologue of each function to be optimized further.
3266//
3267// Currently, the ABI looks like (when calling __morestack)
3268//
3269// * r4 holds the minimum stack size requested for this function call
3270// * r5 holds the stack size of the arguments to the function
3271// * the beginning of the function is 3 instructions after the call to
3272// __morestack
3273//
3274// Implementations of __morestack should use r4 to allocate a new stack, r5 to
3275// place the arguments on to the new stack, and the 3-instruction knowledge to
3276// jump directly to the body of the function when working on the new stack.
3277//
3278// An old (and possibly no longer compatible) implementation of __morestack for
3279// ARM can be found at [1].
3280//
3281// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3283 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3284 unsigned Opcode;
3285 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
3286 bool Thumb = ST->isThumb();
3287 bool Thumb2 = ST->isThumb2();
3288
3289 // Sadly, this currently doesn't support varargs, platforms other than
3290 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3291 if (MF.getFunction().isVarArg())
3292 report_fatal_error("Segmented stacks do not support vararg functions.");
3293 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
3294 report_fatal_error("Segmented stacks not supported on this platform.");
3295
3296 MachineFrameInfo &MFI = MF.getFrameInfo();
3297 const ARMBaseInstrInfo &TII =
3298 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3300 DebugLoc DL;
3301
3302 if (!MFI.needsSplitStackProlog())
3303 return;
3304
3305 uint64_t StackSize = MFI.getStackSize();
3306
3307 // Use R4 and R5 as scratch registers.
3308 // We save R4 and R5 before use and restore them before leaving the function.
3309 unsigned ScratchReg0 = ARM::R4;
3310 unsigned ScratchReg1 = ARM::R5;
3311 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3312 uint64_t AlignedStackSize;
3313
3314 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3315 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3319
3320 // Grab everything that reaches PrologueMBB to update there liveness as well.
3321 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3323 WalkList.push_back(&PrologueMBB);
3324
3325 do {
3326 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3327 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3328 if (BeforePrologueRegion.insert(PredBB).second)
3329 WalkList.push_back(PredBB);
3330 }
3331 } while (!WalkList.empty());
3332
3333 // The order in that list is important.
3334 // The blocks will all be inserted before PrologueMBB using that order.
3335 // Therefore the block that should appear first in the CFG should appear
3336 // first in the list.
3337 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3338 PostStackMBB};
3339
3340 BeforePrologueRegion.insert_range(AddedBlocks);
3341
3342 for (const auto &LI : PrologueMBB.liveins()) {
3343 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3344 PredBB->addLiveIn(LI);
3345 }
3346
3347 // Remove the newly added blocks from the list, since we know
3348 // we do not have to do the following updates for them.
3349 for (MachineBasicBlock *B : AddedBlocks) {
3350 BeforePrologueRegion.erase(B);
3351 MF.insert(PrologueMBB.getIterator(), B);
3352 }
3353
3354 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3355 // Make sure the LiveIns are still sorted and unique.
3356 MBB->sortUniqueLiveIns();
3357 // Replace the edges to PrologueMBB by edges to the sequences
3358 // we are about to add, but only update for immediate predecessors.
3359 if (MBB->isSuccessor(&PrologueMBB))
3360 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3361 }
3362
3363 // The required stack size that is aligned to ARM constant criterion.
3364 AlignedStackSize = alignToARMConstant(StackSize);
3365
3366 // When the frame size is less than 256 we just compare the stack
3367 // boundary directly to the value of the stack pointer, per gcc.
3368 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3369
3370 // We will use two of the callee save registers as scratch registers so we
3371 // need to save those registers onto the stack.
3372 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3373 // requested and arguments for __morestack().
3374 // SR0: Scratch Register #0
3375 // SR1: Scratch Register #1
3376 // push {SR0, SR1}
3377 if (Thumb) {
3378 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3380 .addReg(ScratchReg0)
3381 .addReg(ScratchReg1);
3382 } else {
3383 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3384 .addReg(ARM::SP, RegState::Define)
3385 .addReg(ARM::SP)
3387 .addReg(ScratchReg0)
3388 .addReg(ScratchReg1);
3389 }
3390
3391 // Emit the relevant DWARF information about the change in stack pointer as
3392 // well as where to find both r4 and r5 (the callee-save registers)
3393 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3394 CFIInstBuilder CFIBuilder(PrevStackMBB, MachineInstr::NoFlags);
3395 CFIBuilder.buildDefCFAOffset(8);
3396 CFIBuilder.buildOffset(ScratchReg1, -4);
3397 CFIBuilder.buildOffset(ScratchReg0, -8);
3398 }
3399
3400 // mov SR1, sp
3401 if (Thumb) {
3402 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3403 .addReg(ARM::SP)
3405 } else if (CompareStackPointer) {
3406 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3407 .addReg(ARM::SP)
3409 .add(condCodeOp());
3410 }
3411
3412 // sub SR1, sp, #StackSize
3413 if (!CompareStackPointer && Thumb) {
3414 if (AlignedStackSize < 256) {
3415 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3416 .add(condCodeOp())
3417 .addReg(ScratchReg1)
3418 .addImm(AlignedStackSize)
3420 } else {
3421 if (Thumb2 || ST->genExecuteOnly()) {
3422 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3423 .addImm(AlignedStackSize);
3424 } else {
3425 auto MBBI = McrMBB->end();
3426 auto RegInfo = STI.getRegisterInfo();
3427 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3428 AlignedStackSize);
3429 }
3430 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3431 .add(condCodeOp())
3432 .addReg(ScratchReg1)
3433 .addReg(ScratchReg0)
3435 }
3436 } else if (!CompareStackPointer) {
3437 if (AlignedStackSize < 256) {
3438 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3439 .addReg(ARM::SP)
3440 .addImm(AlignedStackSize)
3442 .add(condCodeOp());
3443 } else {
3444 auto MBBI = McrMBB->end();
3445 auto RegInfo = STI.getRegisterInfo();
3446 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3447 AlignedStackSize);
3448 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3449 .addReg(ARM::SP)
3450 .addReg(ScratchReg0)
3452 .add(condCodeOp());
3453 }
3454 }
3455
3456 if (Thumb && ST->isThumb1Only()) {
3457 if (ST->genExecuteOnly()) {
3458 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3459 .addExternalSymbol("__STACK_LIMIT");
3460 } else {
3461 unsigned PCLabelId = ARMFI->createPICLabelUId();
3463 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3465 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3466
3467 // ldr SR0, [pc, offset(STACK_LIMIT)]
3468 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3471 }
3472
3473 // ldr SR0, [SR0]
3474 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3475 .addReg(ScratchReg0)
3476 .addImm(0)
3478 } else {
3479 // Get TLS base address from the coprocessor
3480 // mrc p15, #0, SR0, c13, c0, #3
3481 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3482 ScratchReg0)
3483 .addImm(15)
3484 .addImm(0)
3485 .addImm(13)
3486 .addImm(0)
3487 .addImm(3)
3489
3490 // Use the last tls slot on android and a private field of the TCP on linux.
3491 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3492 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3493
3494 // Get the stack limit from the right offset
3495 // ldr SR0, [sr0, #4 * TlsOffset]
3496 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3497 ScratchReg0)
3498 .addReg(ScratchReg0)
3499 .addImm(4 * TlsOffset)
3501 }
3502
3503 // Compare stack limit with stack size requested.
3504 // cmp SR0, SR1
3505 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3506 BuildMI(GetMBB, DL, TII.get(Opcode))
3507 .addReg(ScratchReg0)
3508 .addReg(ScratchReg1)
3510
3511 // This jump is taken if StackLimit <= SP - stack required.
3512 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3513 BuildMI(GetMBB, DL, TII.get(Opcode))
3514 .addMBB(PostStackMBB)
3516 .addReg(ARM::CPSR);
3517
3518 // Calling __morestack(StackSize, Size of stack arguments).
3519 // __morestack knows that the stack size requested is in SR0(r4)
3520 // and amount size of stack arguments is in SR1(r5).
3521
3522 // Pass first argument for the __morestack by Scratch Register #0.
3523 // The amount size of stack required
3524 if (Thumb) {
3525 if (AlignedStackSize < 256) {
3526 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3527 .add(condCodeOp())
3528 .addImm(AlignedStackSize)
3530 } else {
3531 if (Thumb2 || ST->genExecuteOnly()) {
3532 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3533 .addImm(AlignedStackSize);
3534 } else {
3535 auto MBBI = AllocMBB->end();
3536 auto RegInfo = STI.getRegisterInfo();
3537 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3538 AlignedStackSize);
3539 }
3540 }
3541 } else {
3542 if (AlignedStackSize < 256) {
3543 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3544 .addImm(AlignedStackSize)
3546 .add(condCodeOp());
3547 } else {
3548 auto MBBI = AllocMBB->end();
3549 auto RegInfo = STI.getRegisterInfo();
3550 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3551 AlignedStackSize);
3552 }
3553 }
3554
3555 // Pass second argument for the __morestack by Scratch Register #1.
3556 // The amount size of stack consumed to save function arguments.
3557 if (Thumb) {
3558 if (ARMFI->getArgumentStackSize() < 256) {
3559 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3560 .add(condCodeOp())
3563 } else {
3564 if (Thumb2 || ST->genExecuteOnly()) {
3565 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3567 } else {
3568 auto MBBI = AllocMBB->end();
3569 auto RegInfo = STI.getRegisterInfo();
3570 RegInfo->emitLoadConstPool(
3571 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3573 }
3574 }
3575 } else {
3576 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3577 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3580 .add(condCodeOp());
3581 } else {
3582 auto MBBI = AllocMBB->end();
3583 auto RegInfo = STI.getRegisterInfo();
3584 RegInfo->emitLoadConstPool(
3585 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3587 }
3588 }
3589
3590 // push {lr} - Save return address of this function.
3591 if (Thumb) {
3592 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3594 .addReg(ARM::LR);
3595 } else {
3596 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3597 .addReg(ARM::SP, RegState::Define)
3598 .addReg(ARM::SP)
3600 .addReg(ARM::LR);
3601 }
3602
3603 // Emit the DWARF info about the change in stack as well as where to find the
3604 // previous link register
3605 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3606 CFIInstBuilder CFIBuilder(AllocMBB, MachineInstr::NoFlags);
3607 CFIBuilder.buildDefCFAOffset(12);
3608 CFIBuilder.buildOffset(ARM::LR, -12);
3609 }
3610
3611 // Call __morestack().
3612 if (Thumb) {
3613 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3615 .addExternalSymbol("__morestack");
3616 } else {
3617 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3618 .addExternalSymbol("__morestack");
3619 }
3620
3621 // pop {lr} - Restore return address of this original function.
3622 if (Thumb) {
3623 if (ST->isThumb1Only()) {
3624 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3626 .addReg(ScratchReg0);
3627 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3628 .addReg(ScratchReg0)
3630 } else {
3631 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3632 .addReg(ARM::LR, RegState::Define)
3633 .addReg(ARM::SP, RegState::Define)
3634 .addReg(ARM::SP)
3635 .addImm(4)
3637 }
3638 } else {
3639 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3640 .addReg(ARM::SP, RegState::Define)
3641 .addReg(ARM::SP)
3643 .addReg(ARM::LR);
3644 }
3645
3646 // Restore SR0 and SR1 in case of __morestack() was called.
3647 // __morestack() will skip PostStackMBB block so we need to restore
3648 // scratch registers from here.
3649 // pop {SR0, SR1}
3650 if (Thumb) {
3651 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3653 .addReg(ScratchReg0)
3654 .addReg(ScratchReg1);
3655 } else {
3656 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3657 .addReg(ARM::SP, RegState::Define)
3658 .addReg(ARM::SP)
3660 .addReg(ScratchReg0)
3661 .addReg(ScratchReg1);
3662 }
3663
3664 // Update the CFA offset now that we've popped
3665 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
3667
3668 // Return from this function.
3669 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3670
3671 // Restore SR0 and SR1 in case of __morestack() was not called.
3672 // pop {SR0, SR1}
3673 if (Thumb) {
3674 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3676 .addReg(ScratchReg0)
3677 .addReg(ScratchReg1);
3678 } else {
3679 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3680 .addReg(ARM::SP, RegState::Define)
3681 .addReg(ARM::SP)
3683 .addReg(ScratchReg0)
3684 .addReg(ScratchReg1);
3685 }
3686
3687 // Update the CFA offset now that we've popped
3688 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3689 CFIInstBuilder CFIBuilder(PostStackMBB, MachineInstr::NoFlags);
3690 CFIBuilder.buildDefCFAOffset(0);
3691
3692 // Tell debuggers that r4 and r5 are now the same as they were in the
3693 // previous function, that they're the "Same Value".
3694 CFIBuilder.buildSameValue(ScratchReg0);
3695 CFIBuilder.buildSameValue(ScratchReg1);
3696 }
3697
3698 // Organizing MBB lists
3699 PostStackMBB->addSuccessor(&PrologueMBB);
3700
3701 AllocMBB->addSuccessor(PostStackMBB);
3702
3703 GetMBB->addSuccessor(PostStackMBB);
3704 GetMBB->addSuccessor(AllocMBB);
3705
3706 McrMBB->addSuccessor(GetMBB);
3707
3708 PrevStackMBB->addSuccessor(McrMBB);
3709
3710#ifdef EXPENSIVE_CHECKS
3711 MF.verify();
3712#endif
3713}
unsigned const MachineRegisterInfo * MRI
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
static bool needsWinCFI(const MachineFunction *MF)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static int getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
SpillArea getSpillArea(Register Reg, ARMSubtarget::PushPopSplitVariation Variation, unsigned NumAlignedDPRCS2Regs, const ARMBaseRegisterInfo *RegInfo)
Get the spill area that Reg should be saved into in the prologue.
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
static const unsigned FramePtr
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
bool keepFramePointer(const MachineFunction &MF) const
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool requiresAAPCSFrameRecord(const MachineFunction &MF) const
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
unsigned getDPRCalleeSavedArea1Size() const
void setDPRCalleeSavedArea1Offset(unsigned o)
void setGPRCalleeSavedArea2Size(unsigned s)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getGPRCalleeSavedArea3Size() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedArea1Size(unsigned s)
void setDPRCalleeSavedGapSize(unsigned s)
void setFPStatusSavesSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
void setGPRCalleeSavedArea3Size(unsigned s)
unsigned getFPStatusSavesSize() const
const ARMBaseRegisterInfo * getRegisterInfo() const override
enum PushPopSplitVariation getPushPopSplitVariation(const MachineFunction &MF) const
PushPopSplitVariation
How the push and pop instructions of callee saved general-purpose registers should be split.
@ SplitR11WindowsSEH
When the stack frame size is not known (because of variable-sized objects or realignment),...
@ SplitR7
R7 and LR must be adjacent, because R7 is the frame pointer, and must point to a frame record consist...
@ SplitR11AAPCSSignRA
When generating AAPCS-compilant frame chains, R11 is the frame pointer, and must be pushed adjacent t...
@ NoSplit
All GPRs can be pushed in a single instruction.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
BitVector & set()
Definition BitVector.h:370
Helper class for creating CFI instructions and inserting them into MIR.
void buildDefCFAOffset(int64_t Offset, MCSymbol *Label=nullptr) const
void buildDefCFARegister(MCRegister Reg) const
void buildSameValue(MCRegister Reg) const
void buildOffset(MCRegister Reg, int64_t Offset) const
void buildDefCFA(MCRegister Reg, int64_t Offset) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition DebugLoc.h:123
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
bool usesWindowsCFI() const
Definition MCAsmInfo.h:656
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
iterator_range< livein_iterator > liveins() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getStackProtectorIndex() const
Return the index for the stack protector object.
int64_t getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
LLVM_ABI BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
LLVM_ABI bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Wrapper class representing virtual and physical registers.
Definition Register.h:20
bool erase(PtrType Ptr)
Remove pointer from the set.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
Primary interface to the complete machine description for the target machine.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
LLVM_ABI bool FramePointerIsReserved(const MachineFunction &MF) const
FramePointerIsReserved - This returns true if the frame pointer must always either point to a new fra...
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
LLVM Value Representation.
Definition Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ D16
Only 16 D registers.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
static bool isARMLowRegister(MCRegister Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77