LLVM 18.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri;
261 }
263
264 int ArgumentPopSize = 0;
265 if (IsTailCallReturn) {
266 MachineOperand &StackAdjust = MBBI->getOperand(1);
267
268 // For a tail-call in a callee-pops-arguments environment, some or all of
269 // the stack may actually be in use for the call's arguments, this is
270 // calculated during LowerCall and consumed here...
271 ArgumentPopSize = StackAdjust.getImm();
272 } else {
273 // ... otherwise the amount to pop is *all* of the argument space,
274 // conveniently stored in the MachineFunctionInfo by
275 // LowerFormalArguments. This will, of course, be zero for the C calling
276 // convention.
277 ArgumentPopSize = AFI->getArgumentStackToRestore();
278 }
279
280 return ArgumentPopSize;
281}
282
283static bool needsWinCFI(const MachineFunction &MF) {
284 const Function &F = MF.getFunction();
285 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
286 F.needsUnwindTableEntry();
287}
288
289// Given a load or a store instruction, generate an appropriate unwinding SEH
290// code on Windows.
292 const TargetInstrInfo &TII,
293 unsigned Flags) {
294 unsigned Opc = MBBI->getOpcode();
296 MachineFunction &MF = *MBB->getParent();
297 DebugLoc DL = MBBI->getDebugLoc();
299 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
300 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
301
302 Flags |= MachineInstr::NoMerge;
303
304 switch (Opc) {
305 default:
306 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
307 break;
308 case ARM::t2ADDri: // add.w r11, sp, #xx
309 case ARM::t2ADDri12: // add.w r11, sp, #xx
310 case ARM::t2MOVTi16: // movt r4, #xx
311 case ARM::tBL: // bl __chkstk
312 // These are harmless if used for just setting up a frame pointer,
313 // but that frame pointer can't be relied upon for unwinding, unless
314 // set up with SEH_SaveSP.
315 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
316 .addImm(/*Wide=*/1)
317 .setMIFlags(Flags);
318 break;
319
320 case ARM::t2MOVi16: { // mov(w) r4, #xx
321 bool Wide = MBBI->getOperand(1).getImm() >= 256;
322 if (!Wide) {
323 MachineInstrBuilder NewInstr =
324 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
325 NewInstr.add(MBBI->getOperand(0));
326 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
327 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
328 NewInstr.add(MO);
329 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
330 MBB->erase(MBBI);
331 MBBI = NewMBBI;
332 }
333 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
334 break;
335 }
336
337 case ARM::tBLXr: // blx r12 (__chkstk)
338 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
339 .addImm(/*Wide=*/0)
340 .setMIFlags(Flags);
341 break;
342
343 case ARM::t2MOVi32imm: // movw+movt
344 // This pseudo instruction expands into two mov instructions. If the
345 // second operand is a symbol reference, this will stay as two wide
346 // instructions, movw+movt. If they're immediates, the first one can
347 // end up as a narrow mov though.
348 // As two SEH instructions are appended here, they won't get interleaved
349 // between the two final movw/movt instructions, but it doesn't make any
350 // practical difference.
351 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
352 .addImm(/*Wide=*/1)
353 .setMIFlags(Flags);
354 MBB->insertAfter(MBBI, MIB);
355 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
356 .addImm(/*Wide=*/1)
357 .setMIFlags(Flags);
358 break;
359
360 case ARM::t2STR_PRE:
361 if (MBBI->getOperand(0).getReg() == ARM::SP &&
362 MBBI->getOperand(2).getReg() == ARM::SP &&
363 MBBI->getOperand(3).getImm() == -4) {
364 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
365 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
366 .addImm(1ULL << Reg)
367 .addImm(/*Wide=*/1)
368 .setMIFlags(Flags);
369 } else {
370 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
371 }
372 break;
373
374 case ARM::t2LDR_POST:
375 if (MBBI->getOperand(1).getReg() == ARM::SP &&
376 MBBI->getOperand(2).getReg() == ARM::SP &&
377 MBBI->getOperand(3).getImm() == 4) {
378 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
379 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
380 .addImm(1ULL << Reg)
381 .addImm(/*Wide=*/1)
382 .setMIFlags(Flags);
383 } else {
384 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
385 }
386 break;
387
388 case ARM::t2LDMIA_RET:
389 case ARM::t2LDMIA_UPD:
390 case ARM::t2STMDB_UPD: {
391 unsigned Mask = 0;
392 bool Wide = false;
393 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
394 const MachineOperand &MO = MBBI->getOperand(i);
395 if (!MO.isReg() || MO.isImplicit())
396 continue;
397 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
398 if (Reg == 15)
399 Reg = 14;
400 if (Reg >= 8 && Reg <= 13)
401 Wide = true;
402 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
403 Wide = true;
404 Mask |= 1 << Reg;
405 }
406 if (!Wide) {
407 unsigned NewOpc;
408 switch (Opc) {
409 case ARM::t2LDMIA_RET:
410 NewOpc = ARM::tPOP_RET;
411 break;
412 case ARM::t2LDMIA_UPD:
413 NewOpc = ARM::tPOP;
414 break;
415 case ARM::t2STMDB_UPD:
416 NewOpc = ARM::tPUSH;
417 break;
418 default:
420 }
421 MachineInstrBuilder NewInstr =
422 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
423 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
424 NewInstr.add(MBBI->getOperand(i));
425 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
426 MBB->erase(MBBI);
427 MBBI = NewMBBI;
428 }
429 unsigned SEHOpc =
430 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
431 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
432 .addImm(Mask)
433 .addImm(Wide ? 1 : 0)
434 .setMIFlags(Flags);
435 break;
436 }
437 case ARM::VSTMDDB_UPD:
438 case ARM::VLDMDIA_UPD: {
439 int First = -1, Last = 0;
440 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
441 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
442 if (First == -1)
443 First = Reg;
444 Last = Reg;
445 }
446 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
447 .addImm(First)
448 .addImm(Last)
449 .setMIFlags(Flags);
450 break;
451 }
452 case ARM::tSUBspi:
453 case ARM::tADDspi:
454 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
455 .addImm(MBBI->getOperand(2).getImm() * 4)
456 .addImm(/*Wide=*/0)
457 .setMIFlags(Flags);
458 break;
459 case ARM::t2SUBspImm:
460 case ARM::t2SUBspImm12:
461 case ARM::t2ADDspImm:
462 case ARM::t2ADDspImm12:
463 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
464 .addImm(MBBI->getOperand(2).getImm())
465 .addImm(/*Wide=*/1)
466 .setMIFlags(Flags);
467 break;
468
469 case ARM::tMOVr:
470 if (MBBI->getOperand(1).getReg() == ARM::SP &&
471 (Flags & MachineInstr::FrameSetup)) {
472 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
473 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
474 .addImm(Reg)
475 .setMIFlags(Flags);
476 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
477 (Flags & MachineInstr::FrameDestroy)) {
478 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
479 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
480 .addImm(Reg)
481 .setMIFlags(Flags);
482 } else {
483 report_fatal_error("No SEH Opcode for MOV");
484 }
485 break;
486
487 case ARM::tBX_RET:
488 case ARM::TCRETURNri:
489 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
490 .addImm(/*Wide=*/0)
491 .setMIFlags(Flags);
492 break;
493
494 case ARM::TCRETURNdi:
495 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
496 .addImm(/*Wide=*/1)
497 .setMIFlags(Flags);
498 break;
499 }
500 return MBB->insertAfter(MBBI, MIB);
501}
502
505 if (MBBI == MBB.begin())
507 return std::prev(MBBI);
508}
509
513 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
514 if (Start.isValid())
515 Start = std::next(Start);
516 else
517 Start = MBB.begin();
518
519 for (auto MI = Start; MI != End;) {
520 auto Next = std::next(MI);
521 // Check if this instruction already has got a SEH opcode added. In that
522 // case, don't do this generic mapping.
523 if (Next != End && isSEHInstruction(*Next)) {
524 MI = std::next(Next);
525 while (MI != End && isSEHInstruction(*MI))
526 ++MI;
527 continue;
528 }
529 insertSEH(MI, TII, MIFlags);
530 MI = Next;
531 }
532}
533
536 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
537 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
538 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
539 if (isARM)
540 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
541 Pred, PredReg, TII, MIFlags);
542 else
543 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
544 Pred, PredReg, TII, MIFlags);
545}
546
547static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
549 const ARMBaseInstrInfo &TII, int NumBytes,
550 unsigned MIFlags = MachineInstr::NoFlags,
552 unsigned PredReg = 0) {
553 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
554 MIFlags, Pred, PredReg);
555}
556
558 int RegSize;
559 switch (MI.getOpcode()) {
560 case ARM::VSTMDDB_UPD:
561 RegSize = 8;
562 break;
563 case ARM::STMDB_UPD:
564 case ARM::t2STMDB_UPD:
565 RegSize = 4;
566 break;
567 case ARM::t2STR_PRE:
568 case ARM::STR_PRE_IMM:
569 return 4;
570 default:
571 llvm_unreachable("Unknown push or pop like instruction");
572 }
573
574 int count = 0;
575 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
576 // pred) so the list starts at 4.
577 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
578 count += RegSize;
579 return count;
580}
581
583 size_t StackSizeInBytes) {
584 const MachineFrameInfo &MFI = MF.getFrameInfo();
585 const Function &F = MF.getFunction();
586 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
587
588 StackProbeSize =
589 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
590 return (StackSizeInBytes >= StackProbeSize) &&
591 !F.hasFnAttribute("no-stack-arg-probe");
592}
593
594namespace {
595
596struct StackAdjustingInsts {
597 struct InstInfo {
599 unsigned SPAdjust;
600 bool BeforeFPSet;
601 };
602
604
605 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
606 bool BeforeFPSet = false) {
607 InstInfo Info = {I, SPAdjust, BeforeFPSet};
608 Insts.push_back(Info);
609 }
610
611 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
612 auto Info =
613 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
614 assert(Info != Insts.end() && "invalid sp adjusting instruction");
615 Info->SPAdjust += ExtraBytes;
616 }
617
618 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
619 const ARMBaseInstrInfo &TII, bool HasFP) {
621 unsigned CFAOffset = 0;
622 for (auto &Info : Insts) {
623 if (HasFP && !Info.BeforeFPSet)
624 return;
625
626 CFAOffset += Info.SPAdjust;
627 unsigned CFIIndex = MF.addFrameInst(
628 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
629 BuildMI(MBB, std::next(Info.I), dl,
630 TII.get(TargetOpcode::CFI_INSTRUCTION))
631 .addCFIIndex(CFIIndex)
633 }
634 }
635};
636
637} // end anonymous namespace
638
639/// Emit an instruction sequence that will align the address in
640/// register Reg by zero-ing out the lower bits. For versions of the
641/// architecture that support Neon, this must be done in a single
642/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
643/// single instruction. That function only gets called when optimizing
644/// spilling of D registers on a core with the Neon instruction set
645/// present.
647 const TargetInstrInfo &TII,
650 const DebugLoc &DL, const unsigned Reg,
651 const Align Alignment,
652 const bool MustBeSingleInstruction) {
653 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
654 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
655 const unsigned AlignMask = Alignment.value() - 1U;
656 const unsigned NrBitsToZero = Log2(Alignment);
657 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
658 if (!AFI->isThumbFunction()) {
659 // if the BFC instruction is available, use that to zero the lower
660 // bits:
661 // bfc Reg, #0, log2(Alignment)
662 // otherwise use BIC, if the mask to zero the required number of bits
663 // can be encoded in the bic immediate field
664 // bic Reg, Reg, Alignment-1
665 // otherwise, emit
666 // lsr Reg, Reg, log2(Alignment)
667 // lsl Reg, Reg, log2(Alignment)
668 if (CanUseBFC) {
669 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
671 .addImm(~AlignMask)
673 } else if (AlignMask <= 255) {
674 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
676 .addImm(AlignMask)
678 .add(condCodeOp());
679 } else {
680 assert(!MustBeSingleInstruction &&
681 "Shouldn't call emitAligningInstructions demanding a single "
682 "instruction to be emitted for large stack alignment for a target "
683 "without BFC.");
684 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
686 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
688 .add(condCodeOp());
689 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
691 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
693 .add(condCodeOp());
694 }
695 } else {
696 // Since this is only reached for Thumb-2 targets, the BFC instruction
697 // should always be available.
698 assert(CanUseBFC);
699 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
701 .addImm(~AlignMask)
703 }
704}
705
706/// We need the offset of the frame pointer relative to other MachineFrameInfo
707/// offsets which are encoded relative to SP at function begin.
708/// See also emitPrologue() for how the FP is set up.
709/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
710/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
711/// this to produce a conservative estimate that we check in an assert() later.
712static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
713 const MachineFunction &MF) {
714 // For Thumb1, push.w isn't available, so the first push will always push
715 // r7 and lr onto the stack first.
716 if (AFI.isThumb1OnlyFunction())
717 return -AFI.getArgRegsSaveSize() - (2 * 4);
718 // This is a conservative estimation: Assume the frame pointer being r7 and
719 // pc("r15") up to r8 getting spilled before (= 8 registers).
720 int MaxRegBytes = 8 * 4;
721 if (STI.splitFramePointerPush(MF)) {
722 // Here, r11 can be stored below all of r4-r15 (3 registers more than
723 // above), plus d8-d15.
724 MaxRegBytes = 11 * 4 + 8 * 8;
725 }
726 int FPCXTSaveSize =
727 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
728 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
729}
730
732 MachineBasicBlock &MBB) const {
734 MachineFrameInfo &MFI = MF.getFrameInfo();
736 MachineModuleInfo &MMI = MF.getMMI();
737 MCContext &Context = MMI.getContext();
738 const TargetMachine &TM = MF.getTarget();
739 const MCRegisterInfo *MRI = Context.getRegisterInfo();
740 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
743 "This emitPrologue does not support Thumb1!");
744 bool isARM = !AFI->isThumbFunction();
745 Align Alignment = STI.getFrameLowering()->getStackAlign();
746 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
747 unsigned NumBytes = MFI.getStackSize();
748 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
749 int FPCXTSaveSize = 0;
750 bool NeedsWinCFI = needsWinCFI(MF);
751
752 // Debug location must be unknown since the first debug location is used
753 // to determine the end of the prologue.
754 DebugLoc dl;
755
756 Register FramePtr = RegInfo->getFrameRegister(MF);
757
758 // Determine the sizes of each callee-save spill areas and record which frame
759 // belongs to which callee-save spill areas.
760 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
761 int FramePtrSpillFI = 0;
762 int D8SpillFI = 0;
763
764 // All calls are tail calls in GHC calling conv, and functions have no
765 // prologue/epilogue.
767 return;
768
769 StackAdjustingInsts DefCFAOffsetCandidates;
770 bool HasFP = hasFP(MF);
771
772 if (!AFI->hasStackFrame() &&
773 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
774 if (NumBytes != 0) {
775 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
777 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
778 }
779 if (!NeedsWinCFI)
780 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
781 if (NeedsWinCFI && MBBI != MBB.begin()) {
783 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
785 MF.setHasWinCFI(true);
786 }
787 return;
788 }
789
790 // Determine spill area sizes.
791 if (STI.splitFramePointerPush(MF)) {
792 for (const CalleeSavedInfo &I : CSI) {
793 Register Reg = I.getReg();
794 int FI = I.getFrameIdx();
795 switch (Reg) {
796 case ARM::R11:
797 case ARM::LR:
798 if (Reg == FramePtr)
799 FramePtrSpillFI = FI;
800 GPRCS2Size += 4;
801 break;
802 case ARM::R0:
803 case ARM::R1:
804 case ARM::R2:
805 case ARM::R3:
806 case ARM::R4:
807 case ARM::R5:
808 case ARM::R6:
809 case ARM::R7:
810 case ARM::R8:
811 case ARM::R9:
812 case ARM::R10:
813 case ARM::R12:
814 GPRCS1Size += 4;
815 break;
816 case ARM::FPCXTNS:
817 FPCXTSaveSize = 4;
818 break;
819 default:
820 // This is a DPR. Exclude the aligned DPRCS2 spills.
821 if (Reg == ARM::D8)
822 D8SpillFI = FI;
823 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
824 DPRCSSize += 8;
825 }
826 }
827 } else {
828 for (const CalleeSavedInfo &I : CSI) {
829 Register Reg = I.getReg();
830 int FI = I.getFrameIdx();
831 switch (Reg) {
832 case ARM::R8:
833 case ARM::R9:
834 case ARM::R10:
835 case ARM::R11:
836 case ARM::R12:
837 if (STI.splitFramePushPop(MF)) {
838 GPRCS2Size += 4;
839 break;
840 }
841 [[fallthrough]];
842 case ARM::R0:
843 case ARM::R1:
844 case ARM::R2:
845 case ARM::R3:
846 case ARM::R4:
847 case ARM::R5:
848 case ARM::R6:
849 case ARM::R7:
850 case ARM::LR:
851 if (Reg == FramePtr)
852 FramePtrSpillFI = FI;
853 GPRCS1Size += 4;
854 break;
855 case ARM::FPCXTNS:
856 FPCXTSaveSize = 4;
857 break;
858 default:
859 // This is a DPR. Exclude the aligned DPRCS2 spills.
860 if (Reg == ARM::D8)
861 D8SpillFI = FI;
862 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
863 DPRCSSize += 8;
864 }
865 }
866 }
867
868 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
869
870 // Move past the PAC computation.
871 if (AFI->shouldSignReturnAddress())
872 LastPush = MBBI++;
873
874 // Move past FPCXT area.
875 if (FPCXTSaveSize > 0) {
876 LastPush = MBBI++;
877 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
878 }
879
880 // Allocate the vararg register save area.
881 if (ArgRegsSaveSize) {
882 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
884 LastPush = std::prev(MBBI);
885 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
886 }
887
888 // Move past area 1.
889 if (GPRCS1Size > 0) {
890 GPRCS1Push = LastPush = MBBI++;
891 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
892 }
893
894 // Determine starting offsets of spill areas.
895 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
896 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
897 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
898 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
899 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
900 if (!STI.splitFramePointerPush(MF)) {
901 DPRGapSize += GPRCS2Size;
902 }
903 DPRGapSize %= DPRAlign.value();
904
905 unsigned DPRCSOffset;
906 if (STI.splitFramePointerPush(MF)) {
907 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
908 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
909 } else {
910 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
911 }
912 int FramePtrOffsetInPush = 0;
913 if (HasFP) {
914 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
915 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
916 "Max FP estimation is wrong");
917 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
918 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
919 NumBytes);
920 }
921 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
922 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
923 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
924
925 // Move past area 2.
926 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
927 GPRCS2Push = LastPush = MBBI++;
928 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
929 }
930
931 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
932 // .cfi_offset operations will reflect that.
933 if (DPRGapSize) {
934 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
935 if (LastPush != MBB.end() &&
936 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
937 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
938 else {
939 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
941 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
942 }
943 }
944
945 // Move past area 3.
946 if (DPRCSSize > 0) {
947 // Since vpush register list cannot have gaps, there may be multiple vpush
948 // instructions in the prologue.
949 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
950 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
951 LastPush = MBBI++;
952 }
953 }
954
955 // Move past the aligned DPRCS2 area.
956 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
958 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
959 // leaves the stack pointer pointing to the DPRCS2 area.
960 //
961 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
962 NumBytes += MFI.getObjectOffset(D8SpillFI);
963 } else
964 NumBytes = DPRCSOffset;
965
966 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
967 GPRCS2Push = LastPush = MBBI++;
968 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
969 }
970
971 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
972 if (STI.splitFramePointerPush(MF) && HasFP)
973 NeedsWinCFIStackAlloc = false;
974
975 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
976 uint32_t NumWords = NumBytes >> 2;
977
978 if (NumWords < 65536) {
979 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
980 .addImm(NumWords)
983 } else {
984 // Split into two instructions here, instead of using t2MOVi32imm,
985 // to allow inserting accurate SEH instructions (including accurate
986 // instruction size for each of them).
987 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
988 .addImm(NumWords & 0xffff)
991 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
992 .addReg(ARM::R4)
993 .addImm(NumWords >> 16)
996 }
997
998 switch (TM.getCodeModel()) {
999 case CodeModel::Tiny:
1000 llvm_unreachable("Tiny code model not available on ARM.");
1001 case CodeModel::Small:
1002 case CodeModel::Medium:
1003 case CodeModel::Kernel:
1004 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1006 .addExternalSymbol("__chkstk")
1007 .addReg(ARM::R4, RegState::Implicit)
1009 break;
1010 case CodeModel::Large:
1011 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1012 .addExternalSymbol("__chkstk")
1014
1015 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1017 .addReg(ARM::R12, RegState::Kill)
1018 .addReg(ARM::R4, RegState::Implicit)
1020 break;
1021 }
1022
1023 MachineInstrBuilder Instr, SEH;
1024 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1025 .addReg(ARM::SP, RegState::Kill)
1026 .addReg(ARM::R4, RegState::Kill)
1029 .add(condCodeOp());
1030 if (NeedsWinCFIStackAlloc) {
1031 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1032 .addImm(NumBytes)
1033 .addImm(/*Wide=*/1)
1035 MBB.insertAfter(Instr, SEH);
1036 }
1037 NumBytes = 0;
1038 }
1039
1040 if (NumBytes) {
1041 // Adjust SP after all the callee-save spills.
1042 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1043 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1044 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1045 else {
1046 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1048 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1049 }
1050
1051 if (HasFP && isARM)
1052 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1053 // Note it's not safe to do this in Thumb2 mode because it would have
1054 // taken two instructions:
1055 // mov sp, r7
1056 // sub sp, #24
1057 // If an interrupt is taken between the two instructions, then sp is in
1058 // an inconsistent state (pointing to the middle of callee-saved area).
1059 // The interrupt handler can end up clobbering the registers.
1060 AFI->setShouldRestoreSPFromFP(true);
1061 }
1062
1063 // Set FP to point to the stack slot that contains the previous FP.
1064 // For iOS, FP is R7, which has now been stored in spill area 1.
1065 // Otherwise, if this is not iOS, all the callee-saved registers go
1066 // into spill area 1, including the FP in R11. In either case, it
1067 // is in area one and the adjustment needs to take place just after
1068 // that push.
1069 // FIXME: The above is not necessary true when PACBTI is enabled.
1070 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1071 // so FP ends up on area two.
1073 if (HasFP) {
1074 AfterPush = std::next(GPRCS1Push);
1075 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1076 int FPOffset = PushSize + FramePtrOffsetInPush;
1077 if (STI.splitFramePointerPush(MF)) {
1078 AfterPush = std::next(GPRCS2Push);
1079 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1080 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1081 } else {
1082 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1083 FramePtr, ARM::SP, FPOffset,
1085 }
1086 if (!NeedsWinCFI) {
1087 if (FramePtrOffsetInPush + PushSize != 0) {
1088 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1089 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1090 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1091 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1092 .addCFIIndex(CFIIndex)
1094 } else {
1095 unsigned CFIIndex =
1097 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1098 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1099 .addCFIIndex(CFIIndex)
1101 }
1102 }
1103 }
1104
1105 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1106 // instructions below don't need to be replayed to unwind the stack.
1107 if (NeedsWinCFI && MBBI != MBB.begin()) {
1109 if (HasFP && STI.splitFramePointerPush(MF))
1110 End = AfterPush;
1112 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1114 MF.setHasWinCFI(true);
1115 }
1116
1117 // Now that the prologue's actual instructions are finalised, we can insert
1118 // the necessary DWARF cf instructions to describe the situation. Start by
1119 // recording where each register ended up:
1120 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1121 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1122 int CFIIndex;
1123 for (const auto &Entry : CSI) {
1124 Register Reg = Entry.getReg();
1125 int FI = Entry.getFrameIdx();
1126 switch (Reg) {
1127 case ARM::R8:
1128 case ARM::R9:
1129 case ARM::R10:
1130 case ARM::R11:
1131 case ARM::R12:
1132 if (STI.splitFramePushPop(MF))
1133 break;
1134 [[fallthrough]];
1135 case ARM::R0:
1136 case ARM::R1:
1137 case ARM::R2:
1138 case ARM::R3:
1139 case ARM::R4:
1140 case ARM::R5:
1141 case ARM::R6:
1142 case ARM::R7:
1143 case ARM::LR:
1145 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1146 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1147 .addCFIIndex(CFIIndex)
1149 break;
1150 }
1151 }
1152 }
1153
1154 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1155 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1156 for (const auto &Entry : CSI) {
1157 Register Reg = Entry.getReg();
1158 int FI = Entry.getFrameIdx();
1159 switch (Reg) {
1160 case ARM::R8:
1161 case ARM::R9:
1162 case ARM::R10:
1163 case ARM::R11:
1164 case ARM::R12:
1165 if (STI.splitFramePushPop(MF)) {
1166 unsigned DwarfReg = MRI->getDwarfRegNum(
1167 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1168 unsigned Offset = MFI.getObjectOffset(FI);
1169 unsigned CFIIndex = MF.addFrameInst(
1170 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1171 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1172 .addCFIIndex(CFIIndex)
1174 }
1175 break;
1176 }
1177 }
1178 }
1179
1180 if (DPRCSSize > 0 && !NeedsWinCFI) {
1181 // Since vpush register list cannot have gaps, there may be multiple vpush
1182 // instructions in the prologue.
1183 MachineBasicBlock::iterator Pos = std::next(LastPush);
1184 for (const auto &Entry : CSI) {
1185 Register Reg = Entry.getReg();
1186 int FI = Entry.getFrameIdx();
1187 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1188 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1189 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1190 unsigned Offset = MFI.getObjectOffset(FI);
1191 unsigned CFIIndex = MF.addFrameInst(
1192 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1193 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1194 .addCFIIndex(CFIIndex)
1196 }
1197 }
1198 }
1199
1200 // Now we can emit descriptions of where the canonical frame address was
1201 // throughout the process. If we have a frame pointer, it takes over the job
1202 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1203 // actually get emitted.
1204 if (!NeedsWinCFI)
1205 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1206
1207 if (STI.isTargetELF() && hasFP(MF))
1209 AFI->getFramePtrSpillOffset());
1210
1211 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1212 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1213 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1214 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1215 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1216
1217 // If we need dynamic stack realignment, do it here. Be paranoid and make
1218 // sure if we also have VLAs, we have a base pointer for frame access.
1219 // If aligned NEON registers were spilled, the stack has already been
1220 // realigned.
1221 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1222 Align MaxAlign = MFI.getMaxAlign();
1224 if (!AFI->isThumbFunction()) {
1225 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1226 false);
1227 } else {
1228 // We cannot use sp as source/dest register here, thus we're using r4 to
1229 // perform the calculations. We're emitting the following sequence:
1230 // mov r4, sp
1231 // -- use emitAligningInstructions to produce best sequence to zero
1232 // -- out lower bits in r4
1233 // mov sp, r4
1234 // FIXME: It will be better just to find spare register here.
1235 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1236 .addReg(ARM::SP, RegState::Kill)
1238 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1239 false);
1240 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1241 .addReg(ARM::R4, RegState::Kill)
1243 }
1244
1245 AFI->setShouldRestoreSPFromFP(true);
1246 }
1247
1248 // If we need a base pointer, set it up here. It's whatever the value
1249 // of the stack pointer is at this point. Any variable size objects
1250 // will be allocated after this, so we can still use the base pointer
1251 // to reference locals.
1252 // FIXME: Clarify FrameSetup flags here.
1253 if (RegInfo->hasBasePointer(MF)) {
1254 if (isARM)
1255 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1256 .addReg(ARM::SP)
1258 .add(condCodeOp());
1259 else
1260 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1261 .addReg(ARM::SP)
1263 }
1264
1265 // If the frame has variable sized objects then the epilogue must restore
1266 // the sp from fp. We can assume there's an FP here since hasFP already
1267 // checks for hasVarSizedObjects.
1268 if (MFI.hasVarSizedObjects())
1269 AFI->setShouldRestoreSPFromFP(true);
1270}
1271
1273 MachineBasicBlock &MBB) const {
1274 MachineFrameInfo &MFI = MF.getFrameInfo();
1276 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1277 const ARMBaseInstrInfo &TII =
1278 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1279 assert(!AFI->isThumb1OnlyFunction() &&
1280 "This emitEpilogue does not support Thumb1!");
1281 bool isARM = !AFI->isThumbFunction();
1282
1283 // Amount of stack space we reserved next to incoming args for either
1284 // varargs registers or stack arguments in tail calls made by this function.
1285 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1286
1287 // How much of the stack used by incoming arguments this function is expected
1288 // to restore in this particular epilogue.
1289 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1290 int NumBytes = (int)MFI.getStackSize();
1291 Register FramePtr = RegInfo->getFrameRegister(MF);
1292
1293 // All calls are tail calls in GHC calling conv, and functions have no
1294 // prologue/epilogue.
1296 return;
1297
1298 // First put ourselves on the first (from top) terminator instructions.
1300 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1301
1302 MachineBasicBlock::iterator RangeStart;
1303 if (!AFI->hasStackFrame()) {
1304 if (MF.hasWinCFI()) {
1305 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1307 RangeStart = initMBBRange(MBB, MBBI);
1308 }
1309
1310 if (NumBytes + IncomingArgStackToRestore != 0)
1311 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1312 NumBytes + IncomingArgStackToRestore,
1314 } else {
1315 // Unwind MBBI to point to first LDR / VLDRD.
1316 if (MBBI != MBB.begin()) {
1317 do {
1318 --MBBI;
1319 } while (MBBI != MBB.begin() &&
1321 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1322 ++MBBI;
1323 }
1324
1325 if (MF.hasWinCFI()) {
1326 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1328 RangeStart = initMBBRange(MBB, MBBI);
1329 }
1330
1331 // Move SP to start of FP callee save spill area.
1332 NumBytes -= (ReservedArgStack +
1333 AFI->getFPCXTSaveAreaSize() +
1338
1339 // Reset SP based on frame pointer only if the stack frame extends beyond
1340 // frame pointer stack slot or target is ELF and the function has FP.
1341 if (AFI->shouldRestoreSPFromFP()) {
1342 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1343 if (NumBytes) {
1344 if (isARM)
1345 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1346 ARMCC::AL, 0, TII,
1348 else {
1349 // It's not possible to restore SP from FP in a single instruction.
1350 // For iOS, this looks like:
1351 // mov sp, r7
1352 // sub sp, #24
1353 // This is bad, if an interrupt is taken after the mov, sp is in an
1354 // inconsistent state.
1355 // Use the first callee-saved register as a scratch register.
1356 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1357 "No scratch register to restore SP from FP!");
1358 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1360 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1361 .addReg(ARM::R4)
1364 }
1365 } else {
1366 // Thumb2 or ARM.
1367 if (isARM)
1368 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1371 .add(condCodeOp())
1373 else
1374 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1378 }
1379 } else if (NumBytes &&
1380 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1381 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1383
1384 // Increment past our save areas.
1386 MBBI++;
1387
1388 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1389 MBBI++;
1390 // Since vpop register list cannot have gaps, there may be multiple vpop
1391 // instructions in the epilogue.
1392 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1393 MBBI++;
1394 }
1395 if (AFI->getDPRCalleeSavedGapSize()) {
1396 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1397 "unexpected DPR alignment gap");
1398 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1400 }
1401
1403 MBBI++;
1404 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1405
1406 if (ReservedArgStack || IncomingArgStackToRestore) {
1407 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1408 "attempting to restore negative stack amount");
1409 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1410 ReservedArgStack + IncomingArgStackToRestore,
1412 }
1413
1414 // Validate PAC, It should have been already popped into R12. For CMSE entry
1415 // function, the validation instruction is emitted during expansion of the
1416 // tBXNS_RET, since the validation must use the value of SP at function
1417 // entry, before saving, resp. after restoring, FPCXTNS.
1418 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1419 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1420 }
1421
1422 if (MF.hasWinCFI()) {
1424 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1426 }
1427}
1428
1429/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1430/// debug info. It's the same as what we use for resolving the code-gen
1431/// references for now. FIXME: This can go wrong when references are
1432/// SP-relative and simple call frames aren't used.
1434 int FI,
1435 Register &FrameReg) const {
1436 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1437}
1438
1440 int FI, Register &FrameReg,
1441 int SPAdj) const {
1442 const MachineFrameInfo &MFI = MF.getFrameInfo();
1443 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1445 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1446 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1447 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1448 bool isFixed = MFI.isFixedObjectIndex(FI);
1449
1450 FrameReg = ARM::SP;
1451 Offset += SPAdj;
1452
1453 // SP can move around if there are allocas. We may also lose track of SP
1454 // when emergency spilling inside a non-reserved call frame setup.
1455 bool hasMovingSP = !hasReservedCallFrame(MF);
1456
1457 // When dynamically realigning the stack, use the frame pointer for
1458 // parameters, and the stack/base pointer for locals.
1459 if (RegInfo->hasStackRealignment(MF)) {
1460 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1461 if (isFixed) {
1462 FrameReg = RegInfo->getFrameRegister(MF);
1463 Offset = FPOffset;
1464 } else if (hasMovingSP) {
1465 assert(RegInfo->hasBasePointer(MF) &&
1466 "VLAs and dynamic stack alignment, but missing base pointer!");
1467 FrameReg = RegInfo->getBaseRegister();
1468 Offset -= SPAdj;
1469 }
1470 return Offset;
1471 }
1472
1473 // If there is a frame pointer, use it when we can.
1474 if (hasFP(MF) && AFI->hasStackFrame()) {
1475 // Use frame pointer to reference fixed objects. Use it for locals if
1476 // there are VLAs (and thus the SP isn't reliable as a base).
1477 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1478 FrameReg = RegInfo->getFrameRegister(MF);
1479 return FPOffset;
1480 } else if (hasMovingSP) {
1481 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1482 if (AFI->isThumb2Function()) {
1483 // Try to use the frame pointer if we can, else use the base pointer
1484 // since it's available. This is handy for the emergency spill slot, in
1485 // particular.
1486 if (FPOffset >= -255 && FPOffset < 0) {
1487 FrameReg = RegInfo->getFrameRegister(MF);
1488 return FPOffset;
1489 }
1490 }
1491 } else if (AFI->isThumbFunction()) {
1492 // Prefer SP to base pointer, if the offset is suitably aligned and in
1493 // range as the effective range of the immediate offset is bigger when
1494 // basing off SP.
1495 // Use add <rd>, sp, #<imm8>
1496 // ldr <rd>, [sp, #<imm8>]
1497 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1498 return Offset;
1499 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1500 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1501 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1502 FrameReg = RegInfo->getFrameRegister(MF);
1503 return FPOffset;
1504 }
1505 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1506 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1507 FrameReg = RegInfo->getFrameRegister(MF);
1508 return FPOffset;
1509 }
1510 }
1511 // Use the base pointer if we have one.
1512 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1513 // That can happen if we forced a base pointer for a large call frame.
1514 if (RegInfo->hasBasePointer(MF)) {
1515 FrameReg = RegInfo->getBaseRegister();
1516 Offset -= SPAdj;
1517 }
1518 return Offset;
1519}
1520
1521void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1524 unsigned StmOpc, unsigned StrOpc,
1525 bool NoGap, bool (*Func)(unsigned, bool),
1526 unsigned NumAlignedDPRCS2Regs,
1527 unsigned MIFlags) const {
1528 MachineFunction &MF = *MBB.getParent();
1531
1532 DebugLoc DL;
1533
1534 using RegAndKill = std::pair<unsigned, bool>;
1535
1537 unsigned i = CSI.size();
1538 while (i != 0) {
1539 unsigned LastReg = 0;
1540 for (; i != 0; --i) {
1541 Register Reg = CSI[i-1].getReg();
1542 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1543
1544 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1545 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1546 continue;
1547
1548 const MachineRegisterInfo &MRI = MF.getRegInfo();
1549 bool isLiveIn = MRI.isLiveIn(Reg);
1550 if (!isLiveIn && !MRI.isReserved(Reg))
1551 MBB.addLiveIn(Reg);
1552 // If NoGap is true, push consecutive registers and then leave the rest
1553 // for other instructions. e.g.
1554 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1555 if (NoGap && LastReg && LastReg != Reg-1)
1556 break;
1557 LastReg = Reg;
1558 // Do not set a kill flag on values that are also marked as live-in. This
1559 // happens with the @llvm-returnaddress intrinsic and with arguments
1560 // passed in callee saved registers.
1561 // Omitting the kill flags is conservatively correct even if the live-in
1562 // is not used after all.
1563 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1564 }
1565
1566 if (Regs.empty())
1567 continue;
1568
1569 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1570 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1571 });
1572
1573 if (Regs.size() > 1 || StrOpc== 0) {
1574 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1575 .addReg(ARM::SP)
1576 .setMIFlags(MIFlags)
1578 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1579 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1580 } else if (Regs.size() == 1) {
1581 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1582 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1583 .addReg(ARM::SP)
1584 .setMIFlags(MIFlags)
1585 .addImm(-4)
1587 }
1588 Regs.clear();
1589
1590 // Put any subsequent vpush instructions before this one: they will refer to
1591 // higher register numbers so need to be pushed first in order to preserve
1592 // monotonicity.
1593 if (MI != MBB.begin())
1594 --MI;
1595 }
1596}
1597
1598void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1601 unsigned LdmOpc, unsigned LdrOpc,
1602 bool isVarArg, bool NoGap,
1603 bool (*Func)(unsigned, bool),
1604 unsigned NumAlignedDPRCS2Regs) const {
1605 MachineFunction &MF = *MBB.getParent();
1609 bool hasPAC = AFI->shouldSignReturnAddress();
1610 DebugLoc DL;
1611 bool isTailCall = false;
1612 bool isInterrupt = false;
1613 bool isTrap = false;
1614 bool isCmseEntry = false;
1615 if (MBB.end() != MI) {
1616 DL = MI->getDebugLoc();
1617 unsigned RetOpcode = MI->getOpcode();
1618 isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
1619 isInterrupt =
1620 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1621 isTrap =
1622 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1623 RetOpcode == ARM::tTRAP;
1624 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1625 }
1626
1628 unsigned i = CSI.size();
1629 while (i != 0) {
1630 unsigned LastReg = 0;
1631 bool DeleteRet = false;
1632 for (; i != 0; --i) {
1633 CalleeSavedInfo &Info = CSI[i-1];
1634 Register Reg = Info.getReg();
1635 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1636
1637 // The aligned reloads from area DPRCS2 are not inserted here.
1638 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1639 continue;
1640 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1641 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1642 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1644 Reg = ARM::PC;
1645 // Fold the return instruction into the LDM.
1646 DeleteRet = true;
1647 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1648 // We 'restore' LR into PC so it is not live out of the return block:
1649 // Clear Restored bit.
1650 Info.setRestored(false);
1651 }
1652
1653 // If NoGap is true, pop consecutive registers and then leave the rest
1654 // for other instructions. e.g.
1655 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1656 if (NoGap && LastReg && LastReg != Reg-1)
1657 break;
1658
1659 LastReg = Reg;
1660 Regs.push_back(Reg);
1661 }
1662
1663 if (Regs.empty())
1664 continue;
1665
1666 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1667 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1668 });
1669
1670 if (Regs.size() > 1 || LdrOpc == 0) {
1671 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1672 .addReg(ARM::SP)
1675 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1676 MIB.addReg(Regs[i], getDefRegState(true));
1677 if (DeleteRet) {
1678 if (MI != MBB.end()) {
1679 MIB.copyImplicitOps(*MI);
1680 MI->eraseFromParent();
1681 }
1682 }
1683 MI = MIB;
1684 } else if (Regs.size() == 1) {
1685 // If we adjusted the reg to PC from LR above, switch it back here. We
1686 // only do that for LDM.
1687 if (Regs[0] == ARM::PC)
1688 Regs[0] = ARM::LR;
1690 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1691 .addReg(ARM::SP, RegState::Define)
1692 .addReg(ARM::SP)
1694 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1695 // that refactoring is complete (eventually).
1696 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1697 MIB.addReg(0);
1699 } else
1700 MIB.addImm(4);
1701 MIB.add(predOps(ARMCC::AL));
1702 }
1703 Regs.clear();
1704
1705 // Put any subsequent vpop instructions after this one: they will refer to
1706 // higher register numbers so need to be popped afterwards.
1707 if (MI != MBB.end())
1708 ++MI;
1709 }
1710}
1711
1712/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1713/// starting from d8. Also insert stack realignment code and leave the stack
1714/// pointer pointing to the d8 spill slot.
1717 unsigned NumAlignedDPRCS2Regs,
1719 const TargetRegisterInfo *TRI) {
1720 MachineFunction &MF = *MBB.getParent();
1722 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1724 MachineFrameInfo &MFI = MF.getFrameInfo();
1725
1726 // Mark the D-register spill slots as properly aligned. Since MFI computes
1727 // stack slot layout backwards, this can actually mean that the d-reg stack
1728 // slot offsets can be wrong. The offset for d8 will always be correct.
1729 for (const CalleeSavedInfo &I : CSI) {
1730 unsigned DNum = I.getReg() - ARM::D8;
1731 if (DNum > NumAlignedDPRCS2Regs - 1)
1732 continue;
1733 int FI = I.getFrameIdx();
1734 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1735 // registers will be 8-byte aligned.
1736 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1737
1738 // The stack slot for D8 needs to be maximally aligned because this is
1739 // actually the point where we align the stack pointer. MachineFrameInfo
1740 // computes all offsets relative to the incoming stack pointer which is a
1741 // bit weird when realigning the stack. Any extra padding for this
1742 // over-alignment is not realized because the code inserted below adjusts
1743 // the stack pointer by numregs * 8 before aligning the stack pointer.
1744 if (DNum == 0)
1745 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1746 }
1747
1748 // Move the stack pointer to the d8 spill slot, and align it at the same
1749 // time. Leave the stack slot address in the scratch register r4.
1750 //
1751 // sub r4, sp, #numregs * 8
1752 // bic r4, r4, #align - 1
1753 // mov sp, r4
1754 //
1755 bool isThumb = AFI->isThumbFunction();
1756 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1757 AFI->setShouldRestoreSPFromFP(true);
1758
1759 // sub r4, sp, #numregs * 8
1760 // The immediate is <= 64, so it doesn't need any special encoding.
1761 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1762 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1763 .addReg(ARM::SP)
1764 .addImm(8 * NumAlignedDPRCS2Regs)
1766 .add(condCodeOp());
1767
1768 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1769 // We must set parameter MustBeSingleInstruction to true, since
1770 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1771 // stack alignment. Luckily, this can always be done since all ARM
1772 // architecture versions that support Neon also support the BFC
1773 // instruction.
1774 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1775
1776 // mov sp, r4
1777 // The stack pointer must be adjusted before spilling anything, otherwise
1778 // the stack slots could be clobbered by an interrupt handler.
1779 // Leave r4 live, it is used below.
1780 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1781 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1782 .addReg(ARM::R4)
1784 if (!isThumb)
1785 MIB.add(condCodeOp());
1786
1787 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1788 // r4 holds the stack slot address.
1789 unsigned NextReg = ARM::D8;
1790
1791 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1792 // The writeback is only needed when emitting two vst1.64 instructions.
1793 if (NumAlignedDPRCS2Regs >= 6) {
1794 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1795 &ARM::QQPRRegClass);
1796 MBB.addLiveIn(SupReg);
1797 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1798 .addReg(ARM::R4, RegState::Kill)
1799 .addImm(16)
1800 .addReg(NextReg)
1803 NextReg += 4;
1804 NumAlignedDPRCS2Regs -= 4;
1805 }
1806
1807 // We won't modify r4 beyond this point. It currently points to the next
1808 // register to be spilled.
1809 unsigned R4BaseReg = NextReg;
1810
1811 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1812 if (NumAlignedDPRCS2Regs >= 4) {
1813 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1814 &ARM::QQPRRegClass);
1815 MBB.addLiveIn(SupReg);
1816 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1817 .addReg(ARM::R4)
1818 .addImm(16)
1819 .addReg(NextReg)
1822 NextReg += 4;
1823 NumAlignedDPRCS2Regs -= 4;
1824 }
1825
1826 // 16-byte aligned vst1.64 with 2 d-regs.
1827 if (NumAlignedDPRCS2Regs >= 2) {
1828 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1829 &ARM::QPRRegClass);
1830 MBB.addLiveIn(SupReg);
1831 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1832 .addReg(ARM::R4)
1833 .addImm(16)
1834 .addReg(SupReg)
1836 NextReg += 2;
1837 NumAlignedDPRCS2Regs -= 2;
1838 }
1839
1840 // Finally, use a vanilla vstr.64 for the odd last register.
1841 if (NumAlignedDPRCS2Regs) {
1842 MBB.addLiveIn(NextReg);
1843 // vstr.64 uses addrmode5 which has an offset scale of 4.
1844 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1845 .addReg(NextReg)
1846 .addReg(ARM::R4)
1847 .addImm((NextReg - R4BaseReg) * 2)
1849 }
1850
1851 // The last spill instruction inserted should kill the scratch register r4.
1852 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1853}
1854
1855/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1856/// iterator to the following instruction.
1859 unsigned NumAlignedDPRCS2Regs) {
1860 // sub r4, sp, #numregs * 8
1861 // bic r4, r4, #align - 1
1862 // mov sp, r4
1863 ++MI; ++MI; ++MI;
1864 assert(MI->mayStore() && "Expecting spill instruction");
1865
1866 // These switches all fall through.
1867 switch(NumAlignedDPRCS2Regs) {
1868 case 7:
1869 ++MI;
1870 assert(MI->mayStore() && "Expecting spill instruction");
1871 [[fallthrough]];
1872 default:
1873 ++MI;
1874 assert(MI->mayStore() && "Expecting spill instruction");
1875 [[fallthrough]];
1876 case 1:
1877 case 2:
1878 case 4:
1879 assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
1880 ++MI;
1881 }
1882 return MI;
1883}
1884
1885/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1886/// starting from d8. These instructions are assumed to execute while the
1887/// stack is still aligned, unlike the code inserted by emitPopInst.
1890 unsigned NumAlignedDPRCS2Regs,
1892 const TargetRegisterInfo *TRI) {
1893 MachineFunction &MF = *MBB.getParent();
1895 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1897
1898 // Find the frame index assigned to d8.
1899 int D8SpillFI = 0;
1900 for (const CalleeSavedInfo &I : CSI)
1901 if (I.getReg() == ARM::D8) {
1902 D8SpillFI = I.getFrameIdx();
1903 break;
1904 }
1905
1906 // Materialize the address of the d8 spill slot into the scratch register r4.
1907 // This can be fairly complicated if the stack frame is large, so just use
1908 // the normal frame index elimination mechanism to do it. This code runs as
1909 // the initial part of the epilog where the stack and base pointers haven't
1910 // been changed yet.
1911 bool isThumb = AFI->isThumbFunction();
1912 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1913
1914 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1915 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1916 .addFrameIndex(D8SpillFI)
1917 .addImm(0)
1919 .add(condCodeOp());
1920
1921 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1922 unsigned NextReg = ARM::D8;
1923
1924 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1925 if (NumAlignedDPRCS2Regs >= 6) {
1926 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1927 &ARM::QQPRRegClass);
1928 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1929 .addReg(ARM::R4, RegState::Define)
1930 .addReg(ARM::R4, RegState::Kill)
1931 .addImm(16)
1934 NextReg += 4;
1935 NumAlignedDPRCS2Regs -= 4;
1936 }
1937
1938 // We won't modify r4 beyond this point. It currently points to the next
1939 // register to be spilled.
1940 unsigned R4BaseReg = NextReg;
1941
1942 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1943 if (NumAlignedDPRCS2Regs >= 4) {
1944 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1945 &ARM::QQPRRegClass);
1946 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1947 .addReg(ARM::R4)
1948 .addImm(16)
1951 NextReg += 4;
1952 NumAlignedDPRCS2Regs -= 4;
1953 }
1954
1955 // 16-byte aligned vld1.64 with 2 d-regs.
1956 if (NumAlignedDPRCS2Regs >= 2) {
1957 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1958 &ARM::QPRRegClass);
1959 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1960 .addReg(ARM::R4)
1961 .addImm(16)
1963 NextReg += 2;
1964 NumAlignedDPRCS2Regs -= 2;
1965 }
1966
1967 // Finally, use a vanilla vldr.64 for the remaining odd register.
1968 if (NumAlignedDPRCS2Regs)
1969 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1970 .addReg(ARM::R4)
1971 .addImm(2 * (NextReg - R4BaseReg))
1973
1974 // Last store kills r4.
1975 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1976}
1977
1981 if (CSI.empty())
1982 return false;
1983
1984 MachineFunction &MF = *MBB.getParent();
1986
1987 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1988 unsigned PushOneOpc = AFI->isThumbFunction() ?
1989 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1990 unsigned FltOpc = ARM::VSTMDDB_UPD;
1991 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1992 // Compute PAC in R12.
1993 if (AFI->shouldSignReturnAddress()) {
1994 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1996 }
1997 // Save the non-secure floating point context.
1998 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
1999 return C.getReg() == ARM::FPCXTNS;
2000 })) {
2001 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2002 ARM::SP)
2003 .addReg(ARM::SP)
2004 .addImm(-4)
2006 }
2007 if (STI.splitFramePointerPush(MF)) {
2008 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2010 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2011 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2012 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2014 } else {
2015 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2017 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2019 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2020 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2021 }
2022
2023 // The code above does not insert spill code for the aligned DPRCS2 registers.
2024 // The stack realignment code will be inserted between the push instructions
2025 // and these spills.
2026 if (NumAlignedDPRCS2Regs)
2027 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2028
2029 return true;
2030}
2031
2035 if (CSI.empty())
2036 return false;
2037
2038 MachineFunction &MF = *MBB.getParent();
2040 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2041 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2042
2043 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2044 // registers. Do that here instead.
2045 if (NumAlignedDPRCS2Regs)
2046 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2047
2048 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2049 unsigned LdrOpc =
2050 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2051 unsigned FltOpc = ARM::VLDMDIA_UPD;
2052 if (STI.splitFramePointerPush(MF)) {
2053 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2055 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2056 NumAlignedDPRCS2Regs);
2057 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2059 } else {
2060 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2061 NumAlignedDPRCS2Regs);
2062 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2063 &isARMArea2Register, 0);
2064 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2065 &isARMArea1Register, 0);
2066 }
2067
2068 return true;
2069}
2070
2071// FIXME: Make generic?
2073 const ARMBaseInstrInfo &TII) {
2074 unsigned FnSize = 0;
2075 for (auto &MBB : MF) {
2076 for (auto &MI : MBB)
2077 FnSize += TII.getInstSizeInBytes(MI);
2078 }
2079 if (MF.getJumpTableInfo())
2080 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2081 FnSize += Table.MBBs.size() * 4;
2082 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2083 return FnSize;
2084}
2085
2086/// estimateRSStackSizeLimit - Look at each instruction that references stack
2087/// frames and return the stack size limit beyond which some of these
2088/// instructions will require a scratch register during their expansion later.
2089// FIXME: Move to TII?
2091 const TargetFrameLowering *TFI,
2092 bool &HasNonSPFrameIndex) {
2093 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2094 const ARMBaseInstrInfo &TII =
2095 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2097 unsigned Limit = (1 << 12) - 1;
2098 for (auto &MBB : MF) {
2099 for (auto &MI : MBB) {
2100 if (MI.isDebugInstr())
2101 continue;
2102 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2103 if (!MI.getOperand(i).isFI())
2104 continue;
2105
2106 // When using ADDri to get the address of a stack object, 255 is the
2107 // largest offset guaranteed to fit in the immediate offset.
2108 if (MI.getOpcode() == ARM::ADDri) {
2109 Limit = std::min(Limit, (1U << 8) - 1);
2110 break;
2111 }
2112 // t2ADDri will not require an extra register, it can reuse the
2113 // destination.
2114 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2115 break;
2116
2117 const MCInstrDesc &MCID = MI.getDesc();
2118 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2119 if (RegClass && !RegClass->contains(ARM::SP))
2120 HasNonSPFrameIndex = true;
2121
2122 // Otherwise check the addressing mode.
2123 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2125 case ARMII::AddrMode2:
2126 // Default 12 bit limit.
2127 break;
2128 case ARMII::AddrMode3:
2130 Limit = std::min(Limit, (1U << 8) - 1);
2131 break;
2133 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2134 break;
2135 case ARMII::AddrMode5:
2138 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2139 break;
2141 // i12 supports only positive offset so these will be converted to
2142 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2143 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2144 Limit = std::min(Limit, (1U << 8) - 1);
2145 break;
2146 case ARMII::AddrMode4:
2147 case ARMII::AddrMode6:
2148 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2149 // immediate offset for stack references.
2150 return 0;
2152 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2153 break;
2155 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2156 break;
2158 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2159 break;
2160 default:
2161 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2162 }
2163 break; // At most one FI per instruction
2164 }
2165 }
2166 }
2167
2168 return Limit;
2169}
2170
2171// In functions that realign the stack, it can be an advantage to spill the
2172// callee-saved vector registers after realigning the stack. The vst1 and vld1
2173// instructions take alignment hints that can improve performance.
2174static void
2176 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2178 return;
2179
2180 // Naked functions don't spill callee-saved registers.
2181 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2182 return;
2183
2184 // We are planning to use NEON instructions vst1 / vld1.
2185 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2186 return;
2187
2188 // Don't bother if the default stack alignment is sufficiently high.
2190 return;
2191
2192 // Aligned spills require stack realignment.
2193 if (!static_cast<const ARMBaseRegisterInfo *>(
2195 return;
2196
2197 // We always spill contiguous d-registers starting from d8. Count how many
2198 // needs spilling. The register allocator will almost always use the
2199 // callee-saved registers in order, but it can happen that there are holes in
2200 // the range. Registers above the hole will be spilled to the standard DPRCS
2201 // area.
2202 unsigned NumSpills = 0;
2203 for (; NumSpills < 8; ++NumSpills)
2204 if (!SavedRegs.test(ARM::D8 + NumSpills))
2205 break;
2206
2207 // Don't do this for just one d-register. It's not worth it.
2208 if (NumSpills < 2)
2209 return;
2210
2211 // Spill the first NumSpills D-registers after realigning the stack.
2212 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2213
2214 // A scratch register is required for the vst1 / vld1 instructions.
2215 SavedRegs.set(ARM::R4);
2216}
2217
2219 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2220 // upon function entry (resp. restore it immmediately before return)
2221 if (STI.hasV8_1MMainlineOps() &&
2223 return false;
2224
2225 // We are disabling shrinkwrapping for now when PAC is enabled, as
2226 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2227 // generated. A follow-up patch will fix this in a more performant manner.
2229 true /* SpillsLR */))
2230 return false;
2231
2232 return true;
2233}
2234
2236 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2237 return Subtarget.createAAPCSFrameChainLeaf() ||
2238 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2239}
2240
2241// Thumb1 may require a spill when storing to a frame index through FP (or any
2242// access with execute-only), for cases where FP is a high register (R11). This
2243// scans the function for cases where this may happen.
2245 const TargetFrameLowering &TFI) {
2246 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2247 if (!AFI->isThumb1OnlyFunction())
2248 return false;
2249
2250 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2251 for (const auto &MBB : MF)
2252 for (const auto &MI : MBB)
2253 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2254 STI.genExecuteOnly())
2255 for (const auto &Op : MI.operands())
2256 if (Op.isFI()) {
2257 Register Reg;
2258 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2259 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2260 return true;
2261 }
2262 return false;
2263}
2264
2266 BitVector &SavedRegs,
2267 RegScavenger *RS) const {
2269 // This tells PEI to spill the FP as if it is any other callee-save register
2270 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2271 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2272 // to combine multiple loads / stores.
2273 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2274 bool CS1Spilled = false;
2275 bool LRSpilled = false;
2276 unsigned NumGPRSpills = 0;
2277 unsigned NumFPRSpills = 0;
2278 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2279 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2280 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2282 const ARMBaseInstrInfo &TII =
2283 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2285 MachineFrameInfo &MFI = MF.getFrameInfo();
2288 (void)TRI; // Silence unused warning in non-assert builds.
2289 Register FramePtr = RegInfo->getFrameRegister(MF);
2290
2291 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2292 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2293 // since it's not always possible to restore sp from fp in a single
2294 // instruction.
2295 // FIXME: It will be better just to find spare register here.
2296 if (AFI->isThumb2Function() &&
2297 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2298 SavedRegs.set(ARM::R4);
2299
2300 // If a stack probe will be emitted, spill R4 and LR, since they are
2301 // clobbered by the stack probe call.
2302 // This estimate should be a safe, conservative estimate. The actual
2303 // stack probe is enabled based on the size of the local objects;
2304 // this estimate also includes the varargs store size.
2305 if (STI.isTargetWindows() &&
2306 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2307 SavedRegs.set(ARM::R4);
2308 SavedRegs.set(ARM::LR);
2309 }
2310
2311 if (AFI->isThumb1OnlyFunction()) {
2312 // Spill LR if Thumb1 function uses variable length argument lists.
2313 if (AFI->getArgRegsSaveSize() > 0)
2314 SavedRegs.set(ARM::LR);
2315
2316 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2317 // requires stack alignment. We don't know for sure what the stack size
2318 // will be, but for this, an estimate is good enough. If there anything
2319 // changes it, it'll be a spill, which implies we've used all the registers
2320 // and so R4 is already used, so not marking it here will be OK.
2321 // FIXME: It will be better just to find spare register here.
2322 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2323 MFI.estimateStackSize(MF) > 508)
2324 SavedRegs.set(ARM::R4);
2325 }
2326
2327 // See if we can spill vector registers to aligned stack.
2328 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2329
2330 // Spill the BasePtr if it's used.
2331 if (RegInfo->hasBasePointer(MF))
2332 SavedRegs.set(RegInfo->getBaseRegister());
2333
2334 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2335 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2336 CanEliminateFrame = false;
2337
2338 // When return address signing is enabled R12 is treated as callee-saved.
2339 if (AFI->shouldSignReturnAddress())
2340 CanEliminateFrame = false;
2341
2342 // Don't spill FP if the frame can be eliminated. This is determined
2343 // by scanning the callee-save registers to see if any is modified.
2344 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2345 for (unsigned i = 0; CSRegs[i]; ++i) {
2346 unsigned Reg = CSRegs[i];
2347 bool Spilled = false;
2348 if (SavedRegs.test(Reg)) {
2349 Spilled = true;
2350 CanEliminateFrame = false;
2351 }
2352
2353 if (!ARM::GPRRegClass.contains(Reg)) {
2354 if (Spilled) {
2355 if (ARM::SPRRegClass.contains(Reg))
2356 NumFPRSpills++;
2357 else if (ARM::DPRRegClass.contains(Reg))
2358 NumFPRSpills += 2;
2359 else if (ARM::QPRRegClass.contains(Reg))
2360 NumFPRSpills += 4;
2361 }
2362 continue;
2363 }
2364
2365 if (Spilled) {
2366 NumGPRSpills++;
2367
2368 if (!STI.splitFramePushPop(MF)) {
2369 if (Reg == ARM::LR)
2370 LRSpilled = true;
2371 CS1Spilled = true;
2372 continue;
2373 }
2374
2375 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2376 switch (Reg) {
2377 case ARM::LR:
2378 LRSpilled = true;
2379 [[fallthrough]];
2380 case ARM::R0: case ARM::R1:
2381 case ARM::R2: case ARM::R3:
2382 case ARM::R4: case ARM::R5:
2383 case ARM::R6: case ARM::R7:
2384 CS1Spilled = true;
2385 break;
2386 default:
2387 break;
2388 }
2389 } else {
2390 if (!STI.splitFramePushPop(MF)) {
2391 UnspilledCS1GPRs.push_back(Reg);
2392 continue;
2393 }
2394
2395 switch (Reg) {
2396 case ARM::R0: case ARM::R1:
2397 case ARM::R2: case ARM::R3:
2398 case ARM::R4: case ARM::R5:
2399 case ARM::R6: case ARM::R7:
2400 case ARM::LR:
2401 UnspilledCS1GPRs.push_back(Reg);
2402 break;
2403 default:
2404 UnspilledCS2GPRs.push_back(Reg);
2405 break;
2406 }
2407 }
2408 }
2409
2410 bool ForceLRSpill = false;
2411 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2412 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2413 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2414 // use of BL to implement far jump.
2415 if (FnSize >= (1 << 11)) {
2416 CanEliminateFrame = false;
2417 ForceLRSpill = true;
2418 }
2419 }
2420
2421 // If any of the stack slot references may be out of range of an immediate
2422 // offset, make sure a register (or a spill slot) is available for the
2423 // register scavenger. Note that if we're indexing off the frame pointer, the
2424 // effective stack size is 4 bytes larger since the FP points to the stack
2425 // slot of the previous FP. Also, if we have variable sized objects in the
2426 // function, stack slot references will often be negative, and some of
2427 // our instructions are positive-offset only, so conservatively consider
2428 // that case to want a spill slot (or register) as well. Similarly, if
2429 // the function adjusts the stack pointer during execution and the
2430 // adjustments aren't already part of our stack size estimate, our offset
2431 // calculations may be off, so be conservative.
2432 // FIXME: We could add logic to be more precise about negative offsets
2433 // and which instructions will need a scratch register for them. Is it
2434 // worth the effort and added fragility?
2435 unsigned EstimatedStackSize =
2436 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2437
2438 // Determine biggest (positive) SP offset in MachineFrameInfo.
2439 int MaxFixedOffset = 0;
2440 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2441 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2442 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2443 }
2444
2445 bool HasFP = hasFP(MF);
2446 if (HasFP) {
2447 if (AFI->hasStackFrame())
2448 EstimatedStackSize += 4;
2449 } else {
2450 // If FP is not used, SP will be used to access arguments, so count the
2451 // size of arguments into the estimation.
2452 EstimatedStackSize += MaxFixedOffset;
2453 }
2454 EstimatedStackSize += 16; // For possible paddings.
2455
2456 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2457 bool HasNonSPFrameIndex = false;
2458 if (AFI->isThumb1OnlyFunction()) {
2459 // For Thumb1, don't bother to iterate over the function. The only
2460 // instruction that requires an emergency spill slot is a store to a
2461 // frame index.
2462 //
2463 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2464 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2465 // a 5-bit unsigned immediate.
2466 //
2467 // We could try to check if the function actually contains a tSTRspi
2468 // that might need the spill slot, but it's not really important.
2469 // Functions with VLAs or extremely large call frames are rare, and
2470 // if a function is allocating more than 1KB of stack, an extra 4-byte
2471 // slot probably isn't relevant.
2472 //
2473 // A special case is the scenario where r11 is used as FP, where accesses
2474 // to a frame index will require its value to be moved into a low reg.
2475 // This is handled later on, once we are able to determine if we have any
2476 // fp-relative accesses.
2477 if (RegInfo->hasBasePointer(MF))
2478 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2479 else
2480 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2481 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2482 } else {
2483 EstimatedRSStackSizeLimit =
2484 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2485 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2486 }
2487 // Final estimate of whether sp or bp-relative accesses might require
2488 // scavenging.
2489 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2490
2491 // If the stack pointer moves and we don't have a base pointer, the
2492 // estimate logic doesn't work. The actual offsets might be larger when
2493 // we're constructing a call frame, or we might need to use negative
2494 // offsets from fp.
2495 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2496 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2497 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2498
2499 // If we have a frame pointer, we assume arguments will be accessed
2500 // relative to the frame pointer. Check whether fp-relative accesses to
2501 // arguments require scavenging.
2502 //
2503 // We could do slightly better on Thumb1; in some cases, an sp-relative
2504 // offset would be legal even though an fp-relative offset is not.
2505 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2506 bool HasLargeArgumentList =
2507 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2508
2509 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2510 HasLargeArgumentList || HasNonSPFrameIndex;
2511 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2512 << "; EstimatedStack: " << EstimatedStackSize
2513 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2514 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2515 if (BigFrameOffsets ||
2516 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2517 AFI->setHasStackFrame(true);
2518
2519 if (HasFP) {
2520 SavedRegs.set(FramePtr);
2521 // If the frame pointer is required by the ABI, also spill LR so that we
2522 // emit a complete frame record.
2523 if ((requiresAAPCSFrameRecord(MF) ||
2525 !LRSpilled) {
2526 SavedRegs.set(ARM::LR);
2527 LRSpilled = true;
2528 NumGPRSpills++;
2529 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2530 if (LRPos != UnspilledCS1GPRs.end())
2531 UnspilledCS1GPRs.erase(LRPos);
2532 }
2533 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2534 if (FPPos != UnspilledCS1GPRs.end())
2535 UnspilledCS1GPRs.erase(FPPos);
2536 NumGPRSpills++;
2537 if (FramePtr == ARM::R7)
2538 CS1Spilled = true;
2539 }
2540
2541 // This is the number of extra spills inserted for callee-save GPRs which
2542 // would not otherwise be used by the function. When greater than zero it
2543 // guaranteees that it is possible to scavenge a register to hold the
2544 // address of a stack slot. On Thumb1, the register must be a valid operand
2545 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2546 // or lr.
2547 //
2548 // If we don't insert a spill, we instead allocate an emergency spill
2549 // slot, which can be used by scavenging to spill an arbitrary register.
2550 //
2551 // We currently don't try to figure out whether any specific instruction
2552 // requires scavening an additional register.
2553 unsigned NumExtraCSSpill = 0;
2554
2555 if (AFI->isThumb1OnlyFunction()) {
2556 // For Thumb1-only targets, we need some low registers when we save and
2557 // restore the high registers (which aren't allocatable, but could be
2558 // used by inline assembly) because the push/pop instructions can not
2559 // access high registers. If necessary, we might need to push more low
2560 // registers to ensure that there is at least one free that can be used
2561 // for the saving & restoring, and preferably we should ensure that as
2562 // many as are needed are available so that fewer push/pop instructions
2563 // are required.
2564
2565 // Low registers which are not currently pushed, but could be (r4-r7).
2566 SmallVector<unsigned, 4> AvailableRegs;
2567
2568 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2569 // free.
2570 int EntryRegDeficit = 0;
2571 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2572 if (!MF.getRegInfo().isLiveIn(Reg)) {
2573 --EntryRegDeficit;
2575 << printReg(Reg, TRI)
2576 << " is unused argument register, EntryRegDeficit = "
2577 << EntryRegDeficit << "\n");
2578 }
2579 }
2580
2581 // Unused return registers can be clobbered in the epilogue for free.
2582 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2584 << " return regs used, ExitRegDeficit = "
2585 << ExitRegDeficit << "\n");
2586
2587 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2588 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2589
2590 // r4-r6 can be used in the prologue if they are pushed by the first push
2591 // instruction.
2592 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2593 if (SavedRegs.test(Reg)) {
2594 --RegDeficit;
2595 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2596 << " is saved low register, RegDeficit = "
2597 << RegDeficit << "\n");
2598 } else {
2599 AvailableRegs.push_back(Reg);
2600 LLVM_DEBUG(
2601 dbgs()
2602 << printReg(Reg, TRI)
2603 << " is non-saved low register, adding to AvailableRegs\n");
2604 }
2605 }
2606
2607 // r7 can be used if it is not being used as the frame pointer.
2608 if (!HasFP || FramePtr != ARM::R7) {
2609 if (SavedRegs.test(ARM::R7)) {
2610 --RegDeficit;
2611 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2612 << RegDeficit << "\n");
2613 } else {
2614 AvailableRegs.push_back(ARM::R7);
2615 LLVM_DEBUG(
2616 dbgs()
2617 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2618 }
2619 }
2620
2621 // Each of r8-r11 needs to be copied to a low register, then pushed.
2622 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2623 if (SavedRegs.test(Reg)) {
2624 ++RegDeficit;
2625 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2626 << " is saved high register, RegDeficit = "
2627 << RegDeficit << "\n");
2628 }
2629 }
2630
2631 // LR can only be used by PUSH, not POP, and can't be used at all if the
2632 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2633 // are more limited at function entry than exit.
2634 if ((EntryRegDeficit > ExitRegDeficit) &&
2635 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2637 if (SavedRegs.test(ARM::LR)) {
2638 --RegDeficit;
2639 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2640 << RegDeficit << "\n");
2641 } else {
2642 AvailableRegs.push_back(ARM::LR);
2643 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2644 }
2645 }
2646
2647 // If there are more high registers that need pushing than low registers
2648 // available, push some more low registers so that we can use fewer push
2649 // instructions. This might not reduce RegDeficit all the way to zero,
2650 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2651 // need saving.
2652 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2653 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2654 unsigned Reg = AvailableRegs.pop_back_val();
2655 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2656 << " to make up reg deficit\n");
2657 SavedRegs.set(Reg);
2658 NumGPRSpills++;
2659 CS1Spilled = true;
2660 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2661 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2662 NumExtraCSSpill++;
2663 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2664 if (Reg == ARM::LR)
2665 LRSpilled = true;
2666 }
2667 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2668 << "\n");
2669 }
2670
2671 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2672 // restore LR in that case.
2673 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2674
2675 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2676 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2677 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2678 SavedRegs.set(ARM::LR);
2679 NumGPRSpills++;
2681 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2682 if (LRPos != UnspilledCS1GPRs.end())
2683 UnspilledCS1GPRs.erase(LRPos);
2684
2685 ForceLRSpill = false;
2686 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2687 !AFI->isThumb1OnlyFunction())
2688 NumExtraCSSpill++;
2689 }
2690
2691 // If stack and double are 8-byte aligned and we are spilling an odd number
2692 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2693 // the integer and double callee save areas.
2694 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2695 const Align TargetAlign = getStackAlign();
2696 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2697 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2698 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
2699 unsigned Reg = UnspilledCS1GPRs[i];
2700 // Don't spill high register if the function is thumb. In the case of
2701 // Windows on ARM, accept R11 (frame pointer)
2702 if (!AFI->isThumbFunction() ||
2703 (STI.isTargetWindows() && Reg == ARM::R11) ||
2704 isARMLowRegister(Reg) ||
2705 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2706 SavedRegs.set(Reg);
2707 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2708 << " to make up alignment\n");
2709 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2710 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2711 NumExtraCSSpill++;
2712 break;
2713 }
2714 }
2715 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2716 unsigned Reg = UnspilledCS2GPRs.front();
2717 SavedRegs.set(Reg);
2718 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2719 << " to make up alignment\n");
2720 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2721 NumExtraCSSpill++;
2722 }
2723 }
2724
2725 // Estimate if we might need to scavenge registers at some point in order
2726 // to materialize a stack offset. If so, either spill one additional
2727 // callee-saved register or reserve a special spill slot to facilitate
2728 // register scavenging. Thumb1 needs a spill slot for stack pointer
2729 // adjustments and for frame index accesses when FP is high register,
2730 // even when the frame itself is small.
2731 unsigned RegsNeeded = 0;
2732 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2733 RegsNeeded++;
2734 // With thumb1 execute-only we may need an additional register for saving
2735 // and restoring the CPSR.
2736 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2737 RegsNeeded++;
2738 }
2739
2740 if (RegsNeeded > NumExtraCSSpill) {
2741 // If any non-reserved CS register isn't spilled, just spill one or two
2742 // extra. That should take care of it!
2743 unsigned NumExtras = TargetAlign.value() / 4;
2745 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2746 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2747 if (!MRI.isReserved(Reg) &&
2748 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2749 Extras.push_back(Reg);
2750 NumExtras--;
2751 }
2752 }
2753 // For non-Thumb1 functions, also check for hi-reg CS registers
2754 if (!AFI->isThumb1OnlyFunction()) {
2755 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2756 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2757 if (!MRI.isReserved(Reg)) {
2758 Extras.push_back(Reg);
2759 NumExtras--;
2760 }
2761 }
2762 }
2763 if (NumExtras == 0) {
2764 for (unsigned Reg : Extras) {
2765 SavedRegs.set(Reg);
2766 if (!MRI.isPhysRegUsed(Reg))
2767 NumExtraCSSpill++;
2768 }
2769 }
2770 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2771 // Reserve a slot closest to SP or frame pointer.
2772 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2773 const TargetRegisterClass &RC = ARM::GPRRegClass;
2774 unsigned Size = TRI->getSpillSize(RC);
2775 Align Alignment = TRI->getSpillAlign(RC);
2777 MFI.CreateStackObject(Size, Alignment, false));
2778 --RegsNeeded;
2779 }
2780 }
2781 }
2782
2783 if (ForceLRSpill)
2784 SavedRegs.set(ARM::LR);
2785 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2786}
2787
2789 BitVector &SavedRegs) const {
2791
2792 // If we have the "returned" parameter attribute which guarantees that we
2793 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2794 // record that fact for IPRA.
2795 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2796 if (AFI->getPreservesR0())
2797 SavedRegs.set(ARM::R0);
2798}
2799
2802 std::vector<CalleeSavedInfo> &CSI) const {
2803 // For CMSE entry functions, handle floating-point context as if it was a
2804 // callee-saved register.
2805 if (STI.hasV8_1MMainlineOps() &&
2807 CSI.emplace_back(ARM::FPCXTNS);
2808 CSI.back().setRestored(false);
2809 }
2810
2811 // For functions, which sign their return address, upon function entry, the
2812 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2813 // in this case.
2814 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2815 if (AFI.shouldSignReturnAddress()) {
2816 // The order of register must match the order we push them, because the
2817 // PEI assigns frame indices in that order. When compiling for return
2818 // address sign and authenication, we use split push, therefore the orders
2819 // we want are:
2820 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2821 CSI.insert(find_if(CSI,
2822 [=](const auto &CS) {
2823 Register Reg = CS.getReg();
2824 return Reg == ARM::R10 || Reg == ARM::R11 ||
2825 Reg == ARM::R8 || Reg == ARM::R9 ||
2826 ARM::DPRRegClass.contains(Reg);
2827 }),
2828 CalleeSavedInfo(ARM::R12));
2829 }
2830
2831 return false;
2832}
2833
2836 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2837 NumEntries = std::size(FixedSpillOffsets);
2838 return FixedSpillOffsets;
2839}
2840
2841MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2844 const ARMBaseInstrInfo &TII =
2845 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2847 bool isARM = !AFI->isThumbFunction();
2848 DebugLoc dl = I->getDebugLoc();
2849 unsigned Opc = I->getOpcode();
2850 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2851 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2852
2853 assert(!AFI->isThumb1OnlyFunction() &&
2854 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2855
2856 int PIdx = I->findFirstPredOperandIdx();
2857 ARMCC::CondCodes Pred = (PIdx == -1)
2858 ? ARMCC::AL
2859 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2860 unsigned PredReg = TII.getFramePred(*I);
2861
2862 if (!hasReservedCallFrame(MF)) {
2863 // Bail early if the callee is expected to do the adjustment.
2864 if (IsDestroy && CalleePopAmount != -1U)
2865 return MBB.erase(I);
2866
2867 // If we have alloca, convert as follows:
2868 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2869 // ADJCALLSTACKUP -> add, sp, sp, amount
2870 unsigned Amount = TII.getFrameSize(*I);
2871 if (Amount != 0) {
2872 // We need to keep the stack aligned properly. To do this, we round the
2873 // amount of space needed for the outgoing arguments up to the next
2874 // alignment boundary.
2875 Amount = alignSPAdjust(Amount);
2876
2877 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2878 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2879 Pred, PredReg);
2880 } else {
2881 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2882 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2883 Pred, PredReg);
2884 }
2885 }
2886 } else if (CalleePopAmount != -1U) {
2887 // If the calling convention demands that the callee pops arguments from the
2888 // stack, we want to add it back if we have a reserved call frame.
2889 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2890 MachineInstr::NoFlags, Pred, PredReg);
2891 }
2892 return MBB.erase(I);
2893}
2894
2895/// Get the minimum constant for ARM that is greater than or equal to the
2896/// argument. In ARM, constants can have any value that can be produced by
2897/// rotating an 8-bit value to the right by an even number of bits within a
2898/// 32-bit word.
2900 unsigned Shifted = 0;
2901
2902 if (Value == 0)
2903 return 0;
2904
2905 while (!(Value & 0xC0000000)) {
2906 Value = Value << 2;
2907 Shifted += 2;
2908 }
2909
2910 bool Carry = (Value & 0x00FFFFFF);
2911 Value = ((Value & 0xFF000000) >> 24) + Carry;
2912
2913 if (Value & 0x0000100)
2914 Value = Value & 0x000001FC;
2915
2916 if (Shifted > 24)
2917 Value = Value >> (Shifted - 24);
2918 else
2919 Value = Value << (24 - Shifted);
2920
2921 return Value;
2922}
2923
2924// The stack limit in the TCB is set to this many bytes above the actual
2925// stack limit.
2927
2928// Adjust the function prologue to enable split stacks. This currently only
2929// supports android and linux.
2930//
2931// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2932// must be well defined in order to allow for consistent implementations of the
2933// __morestack helper function. The ABI is also not a normal ABI in that it
2934// doesn't follow the normal calling conventions because this allows the
2935// prologue of each function to be optimized further.
2936//
2937// Currently, the ABI looks like (when calling __morestack)
2938//
2939// * r4 holds the minimum stack size requested for this function call
2940// * r5 holds the stack size of the arguments to the function
2941// * the beginning of the function is 3 instructions after the call to
2942// __morestack
2943//
2944// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2945// place the arguments on to the new stack, and the 3-instruction knowledge to
2946// jump directly to the body of the function when working on the new stack.
2947//
2948// An old (and possibly no longer compatible) implementation of __morestack for
2949// ARM can be found at [1].
2950//
2951// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2953 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2954 unsigned Opcode;
2955 unsigned CFIIndex;
2956 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2957 bool Thumb = ST->isThumb();
2958 bool Thumb2 = ST->isThumb2();
2959
2960 // Sadly, this currently doesn't support varargs, platforms other than
2961 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2962 if (MF.getFunction().isVarArg())
2963 report_fatal_error("Segmented stacks do not support vararg functions.");
2964 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2965 report_fatal_error("Segmented stacks not supported on this platform.");
2966
2967 MachineFrameInfo &MFI = MF.getFrameInfo();
2968 MachineModuleInfo &MMI = MF.getMMI();
2969 MCContext &Context = MMI.getContext();
2970 const MCRegisterInfo *MRI = Context.getRegisterInfo();
2971 const ARMBaseInstrInfo &TII =
2972 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2974 DebugLoc DL;
2975
2976 if (!MFI.needsSplitStackProlog())
2977 return;
2978
2979 uint64_t StackSize = MFI.getStackSize();
2980
2981 // Use R4 and R5 as scratch registers.
2982 // We save R4 and R5 before use and restore them before leaving the function.
2983 unsigned ScratchReg0 = ARM::R4;
2984 unsigned ScratchReg1 = ARM::R5;
2985 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
2986 uint64_t AlignedStackSize;
2987
2988 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
2989 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
2993
2994 // Grab everything that reaches PrologueMBB to update there liveness as well.
2995 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
2997 WalkList.push_back(&PrologueMBB);
2998
2999 do {
3000 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3001 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3002 if (BeforePrologueRegion.insert(PredBB).second)
3003 WalkList.push_back(PredBB);
3004 }
3005 } while (!WalkList.empty());
3006
3007 // The order in that list is important.
3008 // The blocks will all be inserted before PrologueMBB using that order.
3009 // Therefore the block that should appear first in the CFG should appear
3010 // first in the list.
3011 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3012 PostStackMBB};
3013
3014 for (MachineBasicBlock *B : AddedBlocks)
3015 BeforePrologueRegion.insert(B);
3016
3017 for (const auto &LI : PrologueMBB.liveins()) {
3018 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3019 PredBB->addLiveIn(LI);
3020 }
3021
3022 // Remove the newly added blocks from the list, since we know
3023 // we do not have to do the following updates for them.
3024 for (MachineBasicBlock *B : AddedBlocks) {
3025 BeforePrologueRegion.erase(B);
3026 MF.insert(PrologueMBB.getIterator(), B);
3027 }
3028
3029 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3030 // Make sure the LiveIns are still sorted and unique.
3032 // Replace the edges to PrologueMBB by edges to the sequences
3033 // we are about to add, but only update for immediate predecessors.
3034 if (MBB->isSuccessor(&PrologueMBB))
3035 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3036 }
3037
3038 // The required stack size that is aligned to ARM constant criterion.
3039 AlignedStackSize = alignToARMConstant(StackSize);
3040
3041 // When the frame size is less than 256 we just compare the stack
3042 // boundary directly to the value of the stack pointer, per gcc.
3043 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3044
3045 // We will use two of the callee save registers as scratch registers so we
3046 // need to save those registers onto the stack.
3047 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3048 // requested and arguments for __morestack().
3049 // SR0: Scratch Register #0
3050 // SR1: Scratch Register #1
3051 // push {SR0, SR1}
3052 if (Thumb) {
3053 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3055 .addReg(ScratchReg0)
3056 .addReg(ScratchReg1);
3057 } else {
3058 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3059 .addReg(ARM::SP, RegState::Define)
3060 .addReg(ARM::SP)
3062 .addReg(ScratchReg0)
3063 .addReg(ScratchReg1);
3064 }
3065
3066 // Emit the relevant DWARF information about the change in stack pointer as
3067 // well as where to find both r4 and r5 (the callee-save registers)
3068 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3069 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3070 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3071 .addCFIIndex(CFIIndex);
3073 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3074 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3075 .addCFIIndex(CFIIndex);
3077 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3078 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3079 .addCFIIndex(CFIIndex);
3080 }
3081
3082 // mov SR1, sp
3083 if (Thumb) {
3084 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3085 .addReg(ARM::SP)
3087 } else if (CompareStackPointer) {
3088 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3089 .addReg(ARM::SP)
3091 .add(condCodeOp());
3092 }
3093
3094 // sub SR1, sp, #StackSize
3095 if (!CompareStackPointer && Thumb) {
3096 if (AlignedStackSize < 256) {
3097 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3098 .add(condCodeOp())
3099 .addReg(ScratchReg1)
3100 .addImm(AlignedStackSize)
3102 } else {
3103 if (Thumb2 || ST->genExecuteOnly()) {
3104 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3105 .addImm(AlignedStackSize);
3106 } else {
3107 auto MBBI = McrMBB->end();
3108 auto RegInfo = STI.getRegisterInfo();
3109 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3110 AlignedStackSize);
3111 }
3112 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3113 .add(condCodeOp())
3114 .addReg(ScratchReg1)
3115 .addReg(ScratchReg0)
3117 }
3118 } else if (!CompareStackPointer) {
3119 if (AlignedStackSize < 256) {
3120 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3121 .addReg(ARM::SP)
3122 .addImm(AlignedStackSize)
3124 .add(condCodeOp());
3125 } else {
3126 auto MBBI = McrMBB->end();
3127 auto RegInfo = STI.getRegisterInfo();
3128 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3129 AlignedStackSize);
3130 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3131 .addReg(ARM::SP)
3132 .addReg(ScratchReg0)
3134 .add(condCodeOp());
3135 }
3136 }
3137
3138 if (Thumb && ST->isThumb1Only()) {
3139 if (ST->genExecuteOnly()) {
3140 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3141 .addExternalSymbol("__STACK_LIMIT");
3142 } else {
3143 unsigned PCLabelId = ARMFI->createPICLabelUId();
3145 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3147 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3148
3149 // ldr SR0, [pc, offset(STACK_LIMIT)]
3150 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3153 }
3154
3155 // ldr SR0, [SR0]
3156 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3157 .addReg(ScratchReg0)
3158 .addImm(0)
3160 } else {
3161 // Get TLS base address from the coprocessor
3162 // mrc p15, #0, SR0, c13, c0, #3
3163 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3164 ScratchReg0)
3165 .addImm(15)
3166 .addImm(0)
3167 .addImm(13)
3168 .addImm(0)
3169 .addImm(3)
3171
3172 // Use the last tls slot on android and a private field of the TCP on linux.
3173 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3174 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3175
3176 // Get the stack limit from the right offset
3177 // ldr SR0, [sr0, #4 * TlsOffset]
3178 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3179 ScratchReg0)
3180 .addReg(ScratchReg0)
3181 .addImm(4 * TlsOffset)
3183 }
3184
3185 // Compare stack limit with stack size requested.
3186 // cmp SR0, SR1
3187 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3188 BuildMI(GetMBB, DL, TII.get(Opcode))
3189 .addReg(ScratchReg0)
3190 .addReg(ScratchReg1)
3192
3193 // This jump is taken if StackLimit <= SP - stack required.
3194 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3195 BuildMI(GetMBB, DL, TII.get(Opcode))
3196 .addMBB(PostStackMBB)
3198 .addReg(ARM::CPSR);
3199
3200 // Calling __morestack(StackSize, Size of stack arguments).
3201 // __morestack knows that the stack size requested is in SR0(r4)
3202 // and amount size of stack arguments is in SR1(r5).
3203
3204 // Pass first argument for the __morestack by Scratch Register #0.
3205 // The amount size of stack required
3206 if (Thumb) {
3207 if (AlignedStackSize < 256) {
3208 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3209 .add(condCodeOp())
3210 .addImm(AlignedStackSize)
3212 } else {
3213 if (Thumb2 || ST->genExecuteOnly()) {
3214 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3215 .addImm(AlignedStackSize);
3216 } else {
3217 auto MBBI = AllocMBB->end();
3218 auto RegInfo = STI.getRegisterInfo();
3219 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3220 AlignedStackSize);
3221 }
3222 }
3223 } else {
3224 if (AlignedStackSize < 256) {
3225 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3226 .addImm(AlignedStackSize)
3228 .add(condCodeOp());
3229 } else {
3230 auto MBBI = AllocMBB->end();
3231 auto RegInfo = STI.getRegisterInfo();
3232 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3233 AlignedStackSize);
3234 }
3235 }
3236
3237 // Pass second argument for the __morestack by Scratch Register #1.
3238 // The amount size of stack consumed to save function arguments.
3239 if (Thumb) {
3240 if (ARMFI->getArgumentStackSize() < 256) {
3241 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3242 .add(condCodeOp())
3245 } else {
3246 if (Thumb2 || ST->genExecuteOnly()) {
3247 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3249 } else {
3250 auto MBBI = AllocMBB->end();
3251 auto RegInfo = STI.getRegisterInfo();
3252 RegInfo->emitLoadConstPool(
3253 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3255 }
3256 }
3257 } else {
3258 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3259 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3262 .add(condCodeOp());
3263 } else {
3264 auto MBBI = AllocMBB->end();
3265 auto RegInfo = STI.getRegisterInfo();
3266 RegInfo->emitLoadConstPool(
3267 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3269 }
3270 }
3271
3272 // push {lr} - Save return address of this function.
3273 if (Thumb) {
3274 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3276 .addReg(ARM::LR);
3277 } else {
3278 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3279 .addReg(ARM::SP, RegState::Define)
3280 .addReg(ARM::SP)
3282 .addReg(ARM::LR);
3283 }
3284
3285 // Emit the DWARF info about the change in stack as well as where to find the
3286 // previous link register
3287 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3288 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3289 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3290 .addCFIIndex(CFIIndex);
3292 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3293 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3294 .addCFIIndex(CFIIndex);
3295 }
3296
3297 // Call __morestack().
3298 if (Thumb) {
3299 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3301 .addExternalSymbol("__morestack");
3302 } else {
3303 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3304 .addExternalSymbol("__morestack");
3305 }
3306
3307 // pop {lr} - Restore return address of this original function.
3308 if (Thumb) {
3309 if (ST->isThumb1Only()) {
3310 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3312 .addReg(ScratchReg0);
3313 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3314 .addReg(ScratchReg0)
3316 } else {
3317 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3318 .addReg(ARM::LR, RegState::Define)
3319 .addReg(ARM::SP, RegState::Define)
3320 .addReg(ARM::SP)
3321 .addImm(4)
3323 }
3324 } else {
3325 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3326 .addReg(ARM::SP, RegState::Define)
3327 .addReg(ARM::SP)
3329 .addReg(ARM::LR);
3330 }
3331
3332 // Restore SR0 and SR1 in case of __morestack() was called.
3333 // __morestack() will skip PostStackMBB block so we need to restore
3334 // scratch registers from here.
3335 // pop {SR0, SR1}
3336 if (Thumb) {
3337 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3339 .addReg(ScratchReg0)
3340 .addReg(ScratchReg1);
3341 } else {
3342 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3343 .addReg(ARM::SP, RegState::Define)
3344 .addReg(ARM::SP)
3346 .addReg(ScratchReg0)
3347 .addReg(ScratchReg1);
3348 }
3349
3350 // Update the CFA offset now that we've popped
3351 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3352 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3353 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3354 .addCFIIndex(CFIIndex);
3355 }
3356
3357 // Return from this function.
3358 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3359
3360 // Restore SR0 and SR1 in case of __morestack() was not called.
3361 // pop {SR0, SR1}
3362 if (Thumb) {
3363 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3365 .addReg(ScratchReg0)
3366 .addReg(ScratchReg1);
3367 } else {
3368 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3369 .addReg(ARM::SP, RegState::Define)
3370 .addReg(ARM::SP)
3372 .addReg(ScratchReg0)
3373 .addReg(ScratchReg1);
3374 }
3375
3376 // Update the CFA offset now that we've popped
3377 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3378 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3379 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3380 .addCFIIndex(CFIIndex);
3381
3382 // Tell debuggers that r4 and r5 are now the same as they were in the
3383 // previous function, that they're the "Same Value".
3385 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3386 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3387 .addCFIIndex(CFIIndex);
3389 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3390 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3391 .addCFIIndex(CFIIndex);
3392 }
3393
3394 // Organizing MBB lists
3395 PostStackMBB->addSuccessor(&PrologueMBB);
3396
3397 AllocMBB->addSuccessor(PostStackMBB);
3398
3399 GetMBB->addSuccessor(PostStackMBB);
3400 GetMBB->addSuccessor(AllocMBB);
3401
3402 McrMBB->addSuccessor(GetMBB);
3403
3404 PrevStackMBB->addSuccessor(McrMBB);
3405
3406#ifdef EXPENSIVE_CHECKS
3407 MF.verify();
3408#endif
3409}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool requiresAAPCSFrameRecord(const MachineFunction &MF)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
bool End
Definition: ELF_riscv.cpp:469
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:470
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
bool isTargetWindows() const
Definition: ARMSubtarget.h:367
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:274
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:444
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:370
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:189
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:645
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:582
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:555
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:540
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:629
Context object for machine code objects.
Definition: MCContext.h:76
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int getOffsetAdjustment() const
Return the correction for frame offsets.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:380
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:330
@ Offset
Definition: DWP.cpp:440
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1652
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1919
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1754
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85