LLVM 19.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/MC/MCAsmInfo.h"
33#include "llvm/MC/MCSymbol.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFP - Return true if the specified function should have a dedicated frame
94/// pointer register. This is true if the function has variable sized allocas
95/// or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 unsigned Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 uint64_t Chunk = (1LL << 31) - 1;
238
242 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
243
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe && !InEpilogue) {
247
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
251 return;
252 } else if (Offset > Chunk) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
255 unsigned Reg = 0;
256 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
257
258 if (isSub && !isEAXLiveIn(MBB))
259 Reg = Rax;
260 else
262
263 unsigned AddSubRROpc =
265 if (Reg) {
267 .addImm(Offset)
268 .setMIFlag(Flag);
269 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
271 .addReg(Reg);
272 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
273 return;
274 } else if (Offset > 8 * Chunk) {
275 // If we would need more than 8 add or sub instructions (a >16GB stack
276 // frame), it's worth spilling RAX to materialize this immediate.
277 // pushq %rax
278 // movabsq +-$Offset+-SlotSize, %rax
279 // addq %rsp, %rax
280 // xchg %rax, (%rsp)
281 // movq (%rsp), %rsp
282 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
283 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
285 .setMIFlag(Flag);
286 // Subtract is not commutative, so negate the offset and always use add.
287 // Subtract 8 less and add 8 more to account for the PUSH we just did.
288 if (isSub)
289 Offset = -(Offset - SlotSize);
290 else
293 .addImm(Offset)
294 .setMIFlag(Flag);
295 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
296 .addReg(Rax)
298 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
299 // Exchange the new SP in RAX with the top of the stack.
301 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
302 StackPtr, false, 0);
303 // Load new SP from the top of the stack into RSP.
304 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
305 StackPtr, false, 0);
306 return;
307 }
308 }
309
310 while (Offset) {
311 uint64_t ThisVal = std::min(Offset, Chunk);
312 if (ThisVal == SlotSize) {
313 // Use push / pop for slot sized adjustments as a size optimization. We
314 // need to find a dead register when using pop.
315 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
317 if (Reg) {
318 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
319 : (Is64Bit ? X86::POP64r : X86::POP32r);
320 BuildMI(MBB, MBBI, DL, TII.get(Opc))
321 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
322 .setMIFlag(Flag);
323 Offset -= ThisVal;
324 continue;
325 }
326 }
327
328 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
329 .setMIFlag(Flag);
330
331 Offset -= ThisVal;
332 }
333}
334
335MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
337 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
338 assert(Offset != 0 && "zero offset stack adjustment requested");
339
340 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
341 // is tricky.
342 bool UseLEA;
343 if (!InEpilogue) {
344 // Check if inserting the prologue at the beginning
345 // of MBB would require to use LEA operations.
346 // We need to use LEA operations if EFLAGS is live in, because
347 // it means an instruction will read it before it gets defined.
348 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
349 } else {
350 // If we can use LEA for SP but we shouldn't, check that none
351 // of the terminators uses the eflags. Otherwise we will insert
352 // a ADD that will redefine the eflags and break the condition.
353 // Alternatively, we could move the ADD, but this may not be possible
354 // and is an optimization anyway.
356 if (UseLEA && !STI.useLeaForSP())
358 // If that assert breaks, that means we do not do the right thing
359 // in canUseAsEpilogue.
361 "We shouldn't have allowed this insertion point");
362 }
363
365 if (UseLEA) {
368 StackPtr),
369 StackPtr, false, Offset);
370 } else {
371 bool IsSub = Offset < 0;
372 uint64_t AbsOffset = IsSub ? -Offset : Offset;
373 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
375 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
377 .addImm(AbsOffset);
378 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
379 }
380 return MI;
381}
382
385 bool doMergeWithPrevious) const {
386 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
387 (!doMergeWithPrevious && MBBI == MBB.end()))
388 return 0;
389
390 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
391
393 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
394 // instruction, and that there are no DBG_VALUE or other instructions between
395 // ADD/SUB/LEA and its corresponding CFI instruction.
396 /* TODO: Add support for the case where there are multiple CFI instructions
397 below the ADD/SUB/LEA, e.g.:
398 ...
399 add
400 cfi_def_cfa_offset
401 cfi_offset
402 ...
403 */
404 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
405 PI = std::prev(PI);
406
407 unsigned Opc = PI->getOpcode();
408 int Offset = 0;
409
410 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
411 PI->getOperand(0).getReg() == StackPtr) {
412 assert(PI->getOperand(1).getReg() == StackPtr);
413 Offset = PI->getOperand(2).getImm();
414 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
415 PI->getOperand(0).getReg() == StackPtr &&
416 PI->getOperand(1).getReg() == StackPtr &&
417 PI->getOperand(2).getImm() == 1 &&
418 PI->getOperand(3).getReg() == X86::NoRegister &&
419 PI->getOperand(5).getReg() == X86::NoRegister) {
420 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
421 Offset = PI->getOperand(4).getImm();
422 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
423 PI->getOperand(0).getReg() == StackPtr) {
424 assert(PI->getOperand(1).getReg() == StackPtr);
425 Offset = -PI->getOperand(2).getImm();
426 } else
427 return 0;
428
429 PI = MBB.erase(PI);
430 if (PI != MBB.end() && PI->isCFIInstruction()) {
431 auto CIs = MBB.getParent()->getFrameInstructions();
432 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
435 PI = MBB.erase(PI);
436 }
437 if (!doMergeWithPrevious)
439
440 return Offset;
441}
442
445 const DebugLoc &DL,
446 const MCCFIInstruction &CFIInst,
447 MachineInstr::MIFlag Flag) const {
449 unsigned CFIIndex = MF.addFrameInst(CFIInst);
450
452 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
453
454 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
455 .addCFIIndex(CFIIndex)
456 .setMIFlag(Flag);
457}
458
459/// Emits Dwarf Info specifying offsets of callee saved registers and
460/// frame pointer. This is called only when basic block sections are enabled.
464 if (!hasFP(MF)) {
466 return;
467 }
468 const MachineModuleInfo &MMI = MF.getMMI();
471 const Register MachineFramePtr =
473 : FramePtr;
474 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
475 // Offset = space for return address + size of the frame pointer itself.
476 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
478 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
480}
481
484 const DebugLoc &DL, bool IsPrologue) const {
486 MachineFrameInfo &MFI = MF.getFrameInfo();
487 MachineModuleInfo &MMI = MF.getMMI();
490
491 // Add callee saved registers to move list.
492 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
493
494 // Calculate offsets.
495 for (const CalleeSavedInfo &I : CSI) {
496 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
497 Register Reg = I.getReg();
498 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
499
500 if (IsPrologue) {
501 if (X86FI->getStackPtrSaveMI()) {
502 // +2*SlotSize because there is return address and ebp at the bottom
503 // of the stack.
504 // | retaddr |
505 // | ebp |
506 // | |<--ebp
507 Offset += 2 * SlotSize;
508 SmallString<64> CfaExpr;
509 CfaExpr.push_back(dwarf::DW_CFA_expression);
510 uint8_t buffer[16];
511 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
512 CfaExpr.push_back(2);
514 const Register MachineFramePtr =
517 : FramePtr;
518 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
519 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
520 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
522 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
524 } else {
526 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
527 }
528 } else {
530 MCCFIInstruction::createRestore(nullptr, DwarfReg));
531 }
532 }
533 if (auto *MI = X86FI->getStackPtrSaveMI()) {
534 int FI = MI->getOperand(1).getIndex();
535 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
536 SmallString<64> CfaExpr;
538 const Register MachineFramePtr =
541 : FramePtr;
542 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
543 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
544 uint8_t buffer[16];
545 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
546 CfaExpr.push_back(dwarf::DW_OP_deref);
547
548 SmallString<64> DefCfaExpr;
549 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
550 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
551 DefCfaExpr.append(CfaExpr.str());
552 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
554 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
556 }
557}
558
559void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
560 MachineBasicBlock &MBB) const {
561 const MachineFunction &MF = *MBB.getParent();
562
563 // Insertion point.
565
566 // Fake a debug loc.
567 DebugLoc DL;
568 if (MBBI != MBB.end())
569 DL = MBBI->getDebugLoc();
570
571 // Zero out FP stack if referenced. Do this outside of the loop below so that
572 // it's done only once.
573 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
574 for (MCRegister Reg : RegsToZero.set_bits()) {
575 if (!X86::RFP80RegClass.contains(Reg))
576 continue;
577
578 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
579 for (unsigned i = 0; i != NumFPRegs; ++i)
580 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
581
582 for (unsigned i = 0; i != NumFPRegs; ++i)
583 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
584 break;
585 }
586
587 // For GPRs, we only care to clear out the 32-bit register.
588 BitVector GPRsToZero(TRI->getNumRegs());
589 for (MCRegister Reg : RegsToZero.set_bits())
590 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
591 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
592 RegsToZero.reset(Reg);
593 }
594
595 // Zero out the GPRs first.
596 for (MCRegister Reg : GPRsToZero.set_bits())
598
599 // Zero out the remaining registers.
600 for (MCRegister Reg : RegsToZero.set_bits())
602}
603
606 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
607 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
610 if (InProlog) {
611 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
612 .addImm(0 /* no explicit stack size */);
613 } else {
614 emitStackProbeInline(MF, MBB, MBBI, DL, false);
615 }
616 } else {
617 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
618 }
619}
620
622 return STI.isOSWindows() && !STI.isTargetWin64();
623}
624
626 MachineBasicBlock &PrologMBB) const {
627 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
628 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
629 });
630 if (Where != PrologMBB.end()) {
631 DebugLoc DL = PrologMBB.findDebugLoc(Where);
632 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
633 Where->eraseFromParent();
634 }
635}
636
637void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
640 const DebugLoc &DL,
641 bool InProlog) const {
643 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
644 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
645 else
646 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
647}
648
649void X86FrameLowering::emitStackProbeInlineGeneric(
651 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
652 MachineInstr &AllocWithProbe = *MBBI;
653 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
654
657 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
658 "different expansion expected for CoreCLR 64 bit");
659
660 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
661 uint64_t ProbeChunk = StackProbeSize * 8;
662
663 uint64_t MaxAlign =
664 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
665
666 // Synthesize a loop or unroll it, depending on the number of iterations.
667 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
668 // between the unaligned rsp and current rsp.
669 if (Offset > ProbeChunk) {
670 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
671 MaxAlign % StackProbeSize);
672 } else {
673 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
674 MaxAlign % StackProbeSize);
675 }
676}
677
678void X86FrameLowering::emitStackProbeInlineGenericBlock(
681 uint64_t AlignOffset) const {
682
683 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
684 const bool HasFP = hasFP(MF);
687 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
688 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
689
690 uint64_t CurrentOffset = 0;
691
692 assert(AlignOffset < StackProbeSize);
693
694 // If the offset is so small it fits within a page, there's nothing to do.
695 if (StackProbeSize < Offset + AlignOffset) {
696
697 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
698 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
700 if (!HasFP && NeedsDwarfCFI) {
701 BuildCFI(
702 MBB, MBBI, DL,
703 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
704 }
705
706 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
708 StackPtr, false, 0)
709 .addImm(0)
711 NumFrameExtraProbe++;
712 CurrentOffset = StackProbeSize - AlignOffset;
713 }
714
715 // For the next N - 1 pages, just probe. I tried to take advantage of
716 // natural probes but it implies much more logic and there was very few
717 // interesting natural probes to interleave.
718 while (CurrentOffset + StackProbeSize < Offset) {
719 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
721
722 if (!HasFP && NeedsDwarfCFI) {
723 BuildCFI(
724 MBB, MBBI, DL,
725 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
726 }
727 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
729 StackPtr, false, 0)
730 .addImm(0)
732 NumFrameExtraProbe++;
733 CurrentOffset += StackProbeSize;
734 }
735
736 // No need to probe the tail, it is smaller than a Page.
737 uint64_t ChunkSize = Offset - CurrentOffset;
738 if (ChunkSize == SlotSize) {
739 // Use push for slot sized adjustments as a size optimization,
740 // like emitSPUpdate does when not probing.
741 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
742 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
743 BuildMI(MBB, MBBI, DL, TII.get(Opc))
746 } else {
747 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
749 }
750 // No need to adjust Dwarf CFA offset here, the last position of the stack has
751 // been defined
752}
753
754void X86FrameLowering::emitStackProbeInlineGenericLoop(
757 uint64_t AlignOffset) const {
758 assert(Offset && "null offset");
759
760 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
762 "Inline stack probe loop will clobber live EFLAGS.");
763
764 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
765 const bool HasFP = hasFP(MF);
768 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
769 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
770
771 if (AlignOffset) {
772 if (AlignOffset < StackProbeSize) {
773 // Perform a first smaller allocation followed by a probe.
774 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
776
777 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
779 StackPtr, false, 0)
780 .addImm(0)
782 NumFrameExtraProbe++;
783 Offset -= AlignOffset;
784 }
785 }
786
787 // Synthesize a loop
788 NumFrameLoopProbe++;
789 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
790
791 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
792 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
793
795 MF.insert(MBBIter, testMBB);
796 MF.insert(MBBIter, tailMBB);
797
798 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
799 : Is64Bit ? X86::R11D
800 : X86::EAX;
801
802 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
805
806 // save loop bound
807 {
808 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
809 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
810 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
811 .addReg(FinalStackProbed)
812 .addImm(BoundOffset)
814
815 // while in the loop, use loop-invariant reg for CFI,
816 // instead of the stack pointer, which changes during the loop
817 if (!HasFP && NeedsDwarfCFI) {
818 // x32 uses the same DWARF register numbers as x86-64,
819 // so there isn't a register number for r11d, we must use r11 instead
820 const Register DwarfFinalStackProbed =
822 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
823 : FinalStackProbed;
824
827 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
829 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
830 }
831 }
832
833 // allocate a page
834 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
835 /*InEpilogue=*/false)
837
838 // touch the page
839 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
841 StackPtr, false, 0)
842 .addImm(0)
844
845 // cmp with stack pointer bound
846 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
848 .addReg(FinalStackProbed)
850
851 // jump
852 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
853 .addMBB(testMBB)
856 testMBB->addSuccessor(testMBB);
857 testMBB->addSuccessor(tailMBB);
858
859 // BB management
860 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
862 MBB.addSuccessor(testMBB);
863
864 // handle tail
865 const uint64_t TailOffset = Offset % StackProbeSize;
866 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
867 if (TailOffset) {
868 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
869 /*InEpilogue=*/false)
871 }
872
873 // after the loop, switch back to stack pointer for CFI
874 if (!HasFP && NeedsDwarfCFI) {
875 // x32 uses the same DWARF register numbers as x86-64,
876 // so there isn't a register number for esp, we must use rsp instead
877 const Register DwarfStackPtr =
881
882 BuildCFI(*tailMBB, TailMBBIter, DL,
884 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
885 }
886
887 // Update Live In information
888 fullyRecomputeLiveIns({tailMBB, testMBB});
889}
890
891void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
893 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
895 assert(STI.is64Bit() && "different expansion needed for 32 bit");
896 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
898 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
899
900 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
902 "Inline stack probe loop will clobber live EFLAGS.");
903
904 // RAX contains the number of bytes of desired stack adjustment.
905 // The handling here assumes this value has already been updated so as to
906 // maintain stack alignment.
907 //
908 // We need to exit with RSP modified by this amount and execute suitable
909 // page touches to notify the OS that we're growing the stack responsibly.
910 // All stack probing must be done without modifying RSP.
911 //
912 // MBB:
913 // SizeReg = RAX;
914 // ZeroReg = 0
915 // CopyReg = RSP
916 // Flags, TestReg = CopyReg - SizeReg
917 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
918 // LimitReg = gs magic thread env access
919 // if FinalReg >= LimitReg goto ContinueMBB
920 // RoundBB:
921 // RoundReg = page address of FinalReg
922 // LoopMBB:
923 // LoopReg = PHI(LimitReg,ProbeReg)
924 // ProbeReg = LoopReg - PageSize
925 // [ProbeReg] = 0
926 // if (ProbeReg > RoundReg) goto LoopMBB
927 // ContinueMBB:
928 // RSP = RSP - RAX
929 // [rest of original MBB]
930
931 // Set up the new basic blocks
932 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
933 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
934 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
935
936 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
937 MF.insert(MBBIter, RoundMBB);
938 MF.insert(MBBIter, LoopMBB);
939 MF.insert(MBBIter, ContinueMBB);
940
941 // Split MBB and move the tail portion down to ContinueMBB.
942 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
943 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
945
946 // Some useful constants
947 const int64_t ThreadEnvironmentStackLimit = 0x10;
948 const int64_t PageSize = 0x1000;
949 const int64_t PageMask = ~(PageSize - 1);
950
951 // Registers we need. For the normal case we use virtual
952 // registers. For the prolog expansion we use RAX, RCX and RDX.
954 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
955 const Register
956 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
957 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
958 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
959 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
960 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
961 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
962 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
963 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
964 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
965
966 // SP-relative offsets where we can save RCX and RDX.
967 int64_t RCXShadowSlot = 0;
968 int64_t RDXShadowSlot = 0;
969
970 // If inlining in the prolog, save RCX and RDX.
971 if (InProlog) {
972 // Compute the offsets. We need to account for things already
973 // pushed onto the stack at this point: return address, frame
974 // pointer (if used), and callee saves.
976 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
977 const bool HasFP = hasFP(MF);
978
979 // Check if we need to spill RCX and/or RDX.
980 // Here we assume that no earlier prologue instruction changes RCX and/or
981 // RDX, so checking the block live-ins is enough.
982 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
983 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
984 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
985 // Assign the initial slot to both registers, then change RDX's slot if both
986 // need to be spilled.
987 if (IsRCXLiveIn)
988 RCXShadowSlot = InitSlot;
989 if (IsRDXLiveIn)
990 RDXShadowSlot = InitSlot;
991 if (IsRDXLiveIn && IsRCXLiveIn)
992 RDXShadowSlot += 8;
993 // Emit the saves if needed.
994 if (IsRCXLiveIn)
995 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
996 RCXShadowSlot)
997 .addReg(X86::RCX);
998 if (IsRDXLiveIn)
999 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1000 RDXShadowSlot)
1001 .addReg(X86::RDX);
1002 } else {
1003 // Not in the prolog. Copy RAX to a virtual reg.
1004 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1005 }
1006
1007 // Add code to MBB to check for overflow and set the new target stack pointer
1008 // to zero if so.
1009 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1010 .addReg(ZeroReg, RegState::Undef)
1011 .addReg(ZeroReg, RegState::Undef);
1012 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1013 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1014 .addReg(CopyReg)
1015 .addReg(SizeReg);
1016 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1017 .addReg(TestReg)
1018 .addReg(ZeroReg)
1020
1021 // FinalReg now holds final stack pointer value, or zero if
1022 // allocation would overflow. Compare against the current stack
1023 // limit from the thread environment block. Note this limit is the
1024 // lowest touched page on the stack, not the point at which the OS
1025 // will cause an overflow exception, so this is just an optimization
1026 // to avoid unnecessarily touching pages that are below the current
1027 // SP but already committed to the stack by the OS.
1028 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1029 .addReg(0)
1030 .addImm(1)
1031 .addReg(0)
1032 .addImm(ThreadEnvironmentStackLimit)
1033 .addReg(X86::GS);
1034 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1035 // Jump if the desired stack pointer is at or above the stack limit.
1036 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1037 .addMBB(ContinueMBB)
1039
1040 // Add code to roundMBB to round the final stack pointer to a page boundary.
1041 RoundMBB->addLiveIn(FinalReg);
1042 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1043 .addReg(FinalReg)
1044 .addImm(PageMask);
1045 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1046
1047 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1048 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1049 // and probe until we reach RoundedReg.
1050 if (!InProlog) {
1051 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1052 .addReg(LimitReg)
1053 .addMBB(RoundMBB)
1054 .addReg(ProbeReg)
1055 .addMBB(LoopMBB);
1056 }
1057
1058 LoopMBB->addLiveIn(JoinReg);
1059 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1060 false, -PageSize);
1061
1062 // Probe by storing a byte onto the stack.
1063 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1064 .addReg(ProbeReg)
1065 .addImm(1)
1066 .addReg(0)
1067 .addImm(0)
1068 .addReg(0)
1069 .addImm(0);
1070
1071 LoopMBB->addLiveIn(RoundedReg);
1072 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1073 .addReg(RoundedReg)
1074 .addReg(ProbeReg);
1075 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1076 .addMBB(LoopMBB)
1078
1079 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1080
1081 // If in prolog, restore RDX and RCX.
1082 if (InProlog) {
1083 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1084 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1085 TII.get(X86::MOV64rm), X86::RCX),
1086 X86::RSP, false, RCXShadowSlot);
1087 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1088 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1089 TII.get(X86::MOV64rm), X86::RDX),
1090 X86::RSP, false, RDXShadowSlot);
1091 }
1092
1093 // Now that the probing is done, add code to continueMBB to update
1094 // the stack pointer for real.
1095 ContinueMBB->addLiveIn(SizeReg);
1096 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1097 .addReg(X86::RSP)
1098 .addReg(SizeReg);
1099
1100 // Add the control flow edges we need.
1101 MBB.addSuccessor(ContinueMBB);
1102 MBB.addSuccessor(RoundMBB);
1103 RoundMBB->addSuccessor(LoopMBB);
1104 LoopMBB->addSuccessor(ContinueMBB);
1105 LoopMBB->addSuccessor(LoopMBB);
1106
1107 // Mark all the instructions added to the prolog as frame setup.
1108 if (InProlog) {
1109 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1110 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1111 }
1112 for (MachineInstr &MI : *RoundMBB) {
1114 }
1115 for (MachineInstr &MI : *LoopMBB) {
1117 }
1118 for (MachineInstr &MI :
1119 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1121 }
1122 }
1123}
1124
1125void X86FrameLowering::emitStackProbeCall(
1127 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1128 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1129 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1130
1131 // FIXME: Add indirect thunk support and remove this.
1132 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1133 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1134 "code model and indirect thunks not yet implemented.");
1135
1136 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1138 "Stack probe calls will clobber live EFLAGS.");
1139
1140 unsigned CallOp;
1141 if (Is64Bit)
1142 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1143 else
1144 CallOp = X86::CALLpcrel32;
1145
1147
1149 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1150
1151 // All current stack probes take AX and SP as input, clobber flags, and
1152 // preserve all registers. x86_64 probes leave RSP unmodified.
1154 // For the large code model, we have to call through a register. Use R11,
1155 // as it is scratch in all supported calling conventions.
1156 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1158 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1159 } else {
1160 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1162 }
1163
1164 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1165 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1171
1172 MachineInstr *ModInst = CI;
1173 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1174 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1175 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1176 // themselves. They also does not clobber %rax so we can reuse it when
1177 // adjusting %rsp.
1178 // All other platforms do not specify a particular ABI for the stack probe
1179 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1180 ModInst =
1182 .addReg(SP)
1183 .addReg(AX);
1184 }
1185
1186 // DebugInfo variable locations -- if there's an instruction number for the
1187 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1188 // modifies SP.
1189 if (InstrNum) {
1190 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1191 // Label destination operand of the subtract.
1192 MF.makeDebugValueSubstitution(*InstrNum,
1193 {ModInst->getDebugInstrNum(), 0});
1194 } else {
1195 // Label the call. The operand number is the penultimate operand, zero
1196 // based.
1197 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1199 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1200 }
1201 }
1202
1203 if (InProlog) {
1204 // Apply the frame setup flag to all inserted instrs.
1205 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1206 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1207 }
1208}
1209
1210static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1211 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1212 // and might require smaller successive adjustments.
1213 const uint64_t Win64MaxSEHOffset = 128;
1214 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1215 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1216 return SEHFrameOffset & -16;
1217}
1218
1219// If we're forcing a stack realignment we can't rely on just the frame
1220// info, we need to know the ABI stack alignment as well in case we
1221// have a call out. Otherwise just make sure we have some alignment - we'll
1222// go with the minimum SlotSize.
1224X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1225 const MachineFrameInfo &MFI = MF.getFrameInfo();
1226 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1227 Align StackAlign = getStackAlign();
1228 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1229 if (HasRealign) {
1230 if (MFI.hasCalls())
1231 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1232 else if (MaxAlign < SlotSize)
1233 MaxAlign = Align(SlotSize);
1234 }
1235
1237 if (HasRealign)
1238 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1239 else
1240 MaxAlign = Align(16);
1241 }
1242 return MaxAlign.value();
1243}
1244
1245void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1247 const DebugLoc &DL, unsigned Reg,
1248 uint64_t MaxAlign) const {
1249 uint64_t Val = -MaxAlign;
1250 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1251
1252 MachineFunction &MF = *MBB.getParent();
1254 const X86TargetLowering &TLI = *STI.getTargetLowering();
1255 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1256 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1257
1258 // We want to make sure that (in worst case) less than StackProbeSize bytes
1259 // are not probed after the AND. This assumption is used in
1260 // emitStackProbeInlineGeneric.
1261 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1262 {
1263 NumFrameLoopProbe++;
1264 MachineBasicBlock *entryMBB =
1266 MachineBasicBlock *headMBB =
1268 MachineBasicBlock *bodyMBB =
1270 MachineBasicBlock *footMBB =
1272
1274 MF.insert(MBBIter, entryMBB);
1275 MF.insert(MBBIter, headMBB);
1276 MF.insert(MBBIter, bodyMBB);
1277 MF.insert(MBBIter, footMBB);
1278 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1279 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1280 : Is64Bit ? X86::R11D
1281 : X86::EAX;
1282
1283 // Setup entry block
1284 {
1285
1286 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1287 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1290 MachineInstr *MI =
1291 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1292 .addReg(FinalStackProbed)
1293 .addImm(Val)
1295
1296 // The EFLAGS implicit def is dead.
1297 MI->getOperand(3).setIsDead();
1298
1299 BuildMI(entryMBB, DL,
1300 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1301 .addReg(FinalStackProbed)
1304 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1305 .addMBB(&MBB)
1308 entryMBB->addSuccessor(headMBB);
1309 entryMBB->addSuccessor(&MBB);
1310 }
1311
1312 // Loop entry block
1313
1314 {
1315 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1316 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1318 .addImm(StackProbeSize)
1320
1321 BuildMI(headMBB, DL,
1322 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1324 .addReg(FinalStackProbed)
1326
1327 // jump to the footer if StackPtr < FinalStackProbed
1328 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1329 .addMBB(footMBB)
1332
1333 headMBB->addSuccessor(bodyMBB);
1334 headMBB->addSuccessor(footMBB);
1335 }
1336
1337 // setup loop body
1338 {
1339 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1341 StackPtr, false, 0)
1342 .addImm(0)
1344
1345 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1346 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1348 .addImm(StackProbeSize)
1350
1351 // cmp with stack pointer bound
1352 BuildMI(bodyMBB, DL,
1353 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1354 .addReg(FinalStackProbed)
1357
1358 // jump back while FinalStackProbed < StackPtr
1359 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1360 .addMBB(bodyMBB)
1363 bodyMBB->addSuccessor(bodyMBB);
1364 bodyMBB->addSuccessor(footMBB);
1365 }
1366
1367 // setup loop footer
1368 {
1369 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1370 .addReg(FinalStackProbed)
1372 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1374 StackPtr, false, 0)
1375 .addImm(0)
1377 footMBB->addSuccessor(&MBB);
1378 }
1379
1380 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1381 }
1382 } else {
1383 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1384 .addReg(Reg)
1385 .addImm(Val)
1387
1388 // The EFLAGS implicit def is dead.
1389 MI->getOperand(3).setIsDead();
1390 }
1391}
1392
1394 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1395 // clobbered by any interrupt handler.
1396 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1397 "MF used frame lowering for wrong subtarget");
1398 const Function &Fn = MF.getFunction();
1399 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1400 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1401}
1402
1403/// Return true if we need to use the restricted Windows x64 prologue and
1404/// epilogue code patterns that can be described with WinCFI (.seh_*
1405/// directives).
1406bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1407 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1408}
1409
1410bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1411 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1412}
1413
1414/// Return true if an opcode is part of the REP group of instructions
1415static bool isOpcodeRep(unsigned Opcode) {
1416 switch (Opcode) {
1417 case X86::REPNE_PREFIX:
1418 case X86::REP_MOVSB_32:
1419 case X86::REP_MOVSB_64:
1420 case X86::REP_MOVSD_32:
1421 case X86::REP_MOVSD_64:
1422 case X86::REP_MOVSQ_32:
1423 case X86::REP_MOVSQ_64:
1424 case X86::REP_MOVSW_32:
1425 case X86::REP_MOVSW_64:
1426 case X86::REP_PREFIX:
1427 case X86::REP_STOSB_32:
1428 case X86::REP_STOSB_64:
1429 case X86::REP_STOSD_32:
1430 case X86::REP_STOSD_64:
1431 case X86::REP_STOSQ_32:
1432 case X86::REP_STOSQ_64:
1433 case X86::REP_STOSW_32:
1434 case X86::REP_STOSW_64:
1435 return true;
1436 default:
1437 break;
1438 }
1439 return false;
1440}
1441
1442/// emitPrologue - Push callee-saved registers onto the stack, which
1443/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1444/// space for local variables. Also emit labels used by the exception handler to
1445/// generate the exception handling frames.
1446
1447/*
1448 Here's a gist of what gets emitted:
1449
1450 ; Establish frame pointer, if needed
1451 [if needs FP]
1452 push %rbp
1453 .cfi_def_cfa_offset 16
1454 .cfi_offset %rbp, -16
1455 .seh_pushreg %rpb
1456 mov %rsp, %rbp
1457 .cfi_def_cfa_register %rbp
1458
1459 ; Spill general-purpose registers
1460 [for all callee-saved GPRs]
1461 pushq %<reg>
1462 [if not needs FP]
1463 .cfi_def_cfa_offset (offset from RETADDR)
1464 .seh_pushreg %<reg>
1465
1466 ; If the required stack alignment > default stack alignment
1467 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1468 ; of unknown size in the stack frame.
1469 [if stack needs re-alignment]
1470 and $MASK, %rsp
1471
1472 ; Allocate space for locals
1473 [if target is Windows and allocated space > 4096 bytes]
1474 ; Windows needs special care for allocations larger
1475 ; than one page.
1476 mov $NNN, %rax
1477 call ___chkstk_ms/___chkstk
1478 sub %rax, %rsp
1479 [else]
1480 sub $NNN, %rsp
1481
1482 [if needs FP]
1483 .seh_stackalloc (size of XMM spill slots)
1484 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1485 [else]
1486 .seh_stackalloc NNN
1487
1488 ; Spill XMMs
1489 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1490 ; they may get spilled on any platform, if the current function
1491 ; calls @llvm.eh.unwind.init
1492 [if needs FP]
1493 [for all callee-saved XMM registers]
1494 movaps %<xmm reg>, -MMM(%rbp)
1495 [for all callee-saved XMM registers]
1496 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1497 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1498 [else]
1499 [for all callee-saved XMM registers]
1500 movaps %<xmm reg>, KKK(%rsp)
1501 [for all callee-saved XMM registers]
1502 .seh_savexmm %<xmm reg>, KKK
1503
1504 .seh_endprologue
1505
1506 [if needs base pointer]
1507 mov %rsp, %rbx
1508 [if needs to restore base pointer]
1509 mov %rsp, -MMM(%rbp)
1510
1511 ; Emit CFI info
1512 [if needs FP]
1513 [for all callee-saved registers]
1514 .cfi_offset %<reg>, (offset from %rbp)
1515 [else]
1516 .cfi_def_cfa_offset (offset from RETADDR)
1517 [for all callee-saved registers]
1518 .cfi_offset %<reg>, (offset from %rsp)
1519
1520 Notes:
1521 - .seh directives are emitted only for Windows 64 ABI
1522 - .cv_fpo directives are emitted on win32 when emitting CodeView
1523 - .cfi directives are emitted for all other ABIs
1524 - for 32-bit code, substitute %e?? registers for %r??
1525*/
1526
1528 MachineBasicBlock &MBB) const {
1529 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1530 "MF used frame lowering for wrong subtarget");
1532 MachineFrameInfo &MFI = MF.getFrameInfo();
1533 const Function &Fn = MF.getFunction();
1534 MachineModuleInfo &MMI = MF.getMMI();
1536 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1537 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1538 bool IsFunclet = MBB.isEHFuncletEntry();
1540 if (Fn.hasPersonalityFn())
1541 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1542 bool FnHasClrFunclet =
1543 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1544 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1545 bool HasFP = hasFP(MF);
1546 bool IsWin64Prologue = isWin64Prologue(MF);
1547 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1548 // FIXME: Emit FPO data for EH funclets.
1549 bool NeedsWinFPO =
1550 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1551 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1552 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1554 const Register MachineFramePtr =
1556 : FramePtr;
1557 Register BasePtr = TRI->getBaseRegister();
1558 bool HasWinCFI = false;
1559
1560 // Debug location must be unknown since the first debug location is used
1561 // to determine the end of the prologue.
1562 DebugLoc DL;
1563 Register ArgBaseReg;
1564
1565 // Emit extra prolog for argument stack slot reference.
1566 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1567 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1568 // Creat extra prolog for stack realignment.
1569 ArgBaseReg = MI->getOperand(0).getReg();
1570 // leal 4(%esp), %basereg
1571 // .cfi_def_cfa %basereg, 0
1572 // andl $-128, %esp
1573 // pushl -4(%basereg)
1574 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1575 ArgBaseReg)
1577 .addImm(1)
1578 .addUse(X86::NoRegister)
1580 .addUse(X86::NoRegister)
1582 if (NeedsDwarfCFI) {
1583 // .cfi_def_cfa %basereg, 0
1584 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1585 BuildCFI(MBB, MBBI, DL,
1586 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1588 }
1589 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1590 int64_t Offset = -(int64_t)SlotSize;
1591 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1592 .addReg(ArgBaseReg)
1593 .addImm(1)
1594 .addReg(X86::NoRegister)
1595 .addImm(Offset)
1596 .addReg(X86::NoRegister)
1598 }
1599
1600 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1601 // tail call.
1602 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1603 if (TailCallArgReserveSize && IsWin64Prologue)
1604 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1605
1606 const bool EmitStackProbeCall =
1608 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1609
1610 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1614 // The special symbol below is absolute and has a *value* suitable to be
1615 // combined with the frame pointer directly.
1616 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1617 .addUse(MachineFramePtr)
1618 .addUse(X86::RIP)
1619 .addImm(1)
1620 .addUse(X86::NoRegister)
1621 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1623 .addUse(X86::NoRegister);
1624 break;
1625 }
1626 [[fallthrough]];
1627
1629 assert(
1630 !IsWin64Prologue &&
1631 "win64 prologue does not set the bit 60 in the saved frame pointer");
1632 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1633 .addUse(MachineFramePtr)
1634 .addImm(60)
1636 break;
1637
1639 break;
1640 }
1641 }
1642
1643 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1644 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1645 // stack alignment.
1647 Fn.arg_size() == 2) {
1648 StackSize += 8;
1649 MFI.setStackSize(StackSize);
1650
1651 // Update the stack pointer by pushing a register. This is the instruction
1652 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1653 // Hard-coding the update to a push avoids emitting a second
1654 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1655 // probing isn't needed anyways for an 8-byte update.
1656 // Pushing a register leaves us in a similar situation to a regular
1657 // function call where we know that the address at (rsp-8) is writeable.
1658 // That way we avoid any off-by-ones with stack probing for additional
1659 // stack pointer updates later on.
1660 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1661 .addReg(X86::RAX, RegState::Undef)
1663 }
1664
1665 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1666 // function, and use up to 128 bytes of stack space, don't have a frame
1667 // pointer, calls, or dynamic alloca then we do not need to adjust the
1668 // stack pointer (we fit in the Red Zone). We also check that we don't
1669 // push and pop from the stack.
1670 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1671 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1672 !MFI.adjustsStack() && // No calls.
1673 !EmitStackProbeCall && // No stack probes.
1674 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1675 !MF.shouldSplitStack()) { // Regular stack
1676 uint64_t MinSize =
1678 if (HasFP)
1679 MinSize += SlotSize;
1680 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1681 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1682 MFI.setStackSize(StackSize);
1683 }
1684
1685 // Insert stack pointer adjustment for later moving of return addr. Only
1686 // applies to tail call optimized functions where the callee argument stack
1687 // size is bigger than the callers.
1688 if (TailCallArgReserveSize != 0) {
1689 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1690 /*InEpilogue=*/false)
1692 }
1693
1694 // Mapping for machine moves:
1695 //
1696 // DST: VirtualFP AND
1697 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1698 // ELSE => DW_CFA_def_cfa
1699 //
1700 // SRC: VirtualFP AND
1701 // DST: Register => DW_CFA_def_cfa_register
1702 //
1703 // ELSE
1704 // OFFSET < 0 => DW_CFA_offset_extended_sf
1705 // REG < 64 => DW_CFA_offset + Reg
1706 // ELSE => DW_CFA_offset_extended
1707
1708 uint64_t NumBytes = 0;
1709 int stackGrowth = -SlotSize;
1710
1711 // Find the funclet establisher parameter
1712 Register Establisher = X86::NoRegister;
1713 if (IsClrFunclet)
1714 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1715 else if (IsFunclet)
1716 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1717
1718 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1719 // Immediately spill establisher into the home slot.
1720 // The runtime cares about this.
1721 // MOV64mr %rdx, 16(%rsp)
1722 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1723 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1724 .addReg(Establisher)
1726 MBB.addLiveIn(Establisher);
1727 }
1728
1729 if (HasFP) {
1730 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1731
1732 // Calculate required stack adjustment.
1733 uint64_t FrameSize = StackSize - SlotSize;
1734 NumBytes =
1735 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1736
1737 // Callee-saved registers are pushed on stack before the stack is realigned.
1738 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1739 NumBytes = alignTo(NumBytes, MaxAlign);
1740
1741 // Save EBP/RBP into the appropriate stack slot.
1742 BuildMI(MBB, MBBI, DL,
1744 .addReg(MachineFramePtr, RegState::Kill)
1746
1747 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1748 // Mark the place where EBP/RBP was saved.
1749 // Define the current CFA rule to use the provided offset.
1750 assert(StackSize);
1751 BuildCFI(MBB, MBBI, DL,
1753 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1755
1756 // Change the rule for the FramePtr to be an "offset" rule.
1757 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1758 BuildCFI(MBB, MBBI, DL,
1759 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1760 2 * stackGrowth -
1761 (int)TailCallArgReserveSize),
1763 }
1764
1765 if (NeedsWinCFI) {
1766 HasWinCFI = true;
1767 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1770 }
1771
1772 if (!IsFunclet) {
1773 if (X86FI->hasSwiftAsyncContext()) {
1774 assert(!IsWin64Prologue &&
1775 "win64 prologue does not store async context right below rbp");
1776 const auto &Attrs = MF.getFunction().getAttributes();
1777
1778 // Before we update the live frame pointer we have to ensure there's a
1779 // valid (or null) asynchronous context in its slot just before FP in
1780 // the frame record, so store it now.
1781 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1782 // We have an initial context in r14, store it just before the frame
1783 // pointer.
1784 MBB.addLiveIn(X86::R14);
1785 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1786 .addReg(X86::R14)
1788 } else {
1789 // No initial context, store null so that there's no pointer that
1790 // could be misused.
1791 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1792 .addImm(0)
1794 }
1795
1796 if (NeedsWinCFI) {
1797 HasWinCFI = true;
1798 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1799 .addImm(X86::R14)
1801 }
1802
1803 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1804 .addUse(X86::RSP)
1805 .addImm(1)
1806 .addUse(X86::NoRegister)
1807 .addImm(8)
1808 .addUse(X86::NoRegister)
1810 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1811 .addUse(X86::RSP)
1812 .addImm(8)
1814 }
1815
1816 if (!IsWin64Prologue && !IsFunclet) {
1817 // Update EBP with the new base value.
1818 if (!X86FI->hasSwiftAsyncContext())
1819 BuildMI(MBB, MBBI, DL,
1820 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1821 FramePtr)
1824
1825 if (NeedsDwarfCFI) {
1826 if (ArgBaseReg.isValid()) {
1827 SmallString<64> CfaExpr;
1828 CfaExpr.push_back(dwarf::DW_CFA_expression);
1829 uint8_t buffer[16];
1830 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1831 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1832 CfaExpr.push_back(2);
1833 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1834 CfaExpr.push_back(0);
1835 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1836 BuildCFI(MBB, MBBI, DL,
1837 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1839 } else {
1840 // Mark effective beginning of when frame pointer becomes valid.
1841 // Define the current CFA to use the EBP/RBP register.
1842 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1843 BuildCFI(
1844 MBB, MBBI, DL,
1845 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1847 }
1848 }
1849
1850 if (NeedsWinFPO) {
1851 // .cv_fpo_setframe $FramePtr
1852 HasWinCFI = true;
1853 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1855 .addImm(0)
1857 }
1858 }
1859 }
1860 } else {
1861 assert(!IsFunclet && "funclets without FPs not yet implemented");
1862 NumBytes =
1863 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1864 }
1865
1866 // Update the offset adjustment, which is mainly used by codeview to translate
1867 // from ESP to VFRAME relative local variable offsets.
1868 if (!IsFunclet) {
1869 if (HasFP && TRI->hasStackRealignment(MF))
1870 MFI.setOffsetAdjustment(-NumBytes);
1871 else
1872 MFI.setOffsetAdjustment(-StackSize);
1873 }
1874
1875 // For EH funclets, only allocate enough space for outgoing calls. Save the
1876 // NumBytes value that we would've used for the parent frame.
1877 unsigned ParentFrameNumBytes = NumBytes;
1878 if (IsFunclet)
1879 NumBytes = getWinEHFuncletFrameSize(MF);
1880
1881 // Skip the callee-saved push instructions.
1882 bool PushedRegs = false;
1883 int StackOffset = 2 * stackGrowth;
1885 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1886 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1887 return false;
1888 unsigned Opc = MBBI->getOpcode();
1889 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1890 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1891 };
1892
1893 while (IsCSPush(MBBI)) {
1894 PushedRegs = true;
1895 Register Reg = MBBI->getOperand(0).getReg();
1896 LastCSPush = MBBI;
1897 ++MBBI;
1898 unsigned Opc = LastCSPush->getOpcode();
1899
1900 if (!HasFP && NeedsDwarfCFI) {
1901 // Mark callee-saved push instruction.
1902 // Define the current CFA rule to use the provided offset.
1903 assert(StackSize);
1904 // Compared to push, push2 introduces more stack offset (one more
1905 // register).
1906 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1907 StackOffset += stackGrowth;
1908 BuildCFI(MBB, MBBI, DL,
1911 StackOffset += stackGrowth;
1912 }
1913
1914 if (NeedsWinCFI) {
1915 HasWinCFI = true;
1916 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1917 .addImm(Reg)
1919 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1920 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1921 .addImm(LastCSPush->getOperand(1).getReg())
1923 }
1924 }
1925
1926 // Realign stack after we pushed callee-saved registers (so that we'll be
1927 // able to calculate their offsets from the frame pointer).
1928 // Don't do this for Win64, it needs to realign the stack after the prologue.
1929 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1930 !ArgBaseReg.isValid()) {
1931 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1932 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1933
1934 if (NeedsWinCFI) {
1935 HasWinCFI = true;
1936 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1937 .addImm(MaxAlign)
1939 }
1940 }
1941
1942 // If there is an SUB32ri of ESP immediately before this instruction, merge
1943 // the two. This can be the case when tail call elimination is enabled and
1944 // the callee has more arguments then the caller.
1945 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1946
1947 // Adjust stack pointer: ESP -= numbytes.
1948
1949 // Windows and cygwin/mingw require a prologue helper routine when allocating
1950 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1951 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1952 // stack and adjust the stack pointer in one go. The 64-bit version of
1953 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1954 // responsible for adjusting the stack pointer. Touching the stack at 4K
1955 // increments is necessary to ensure that the guard pages used by the OS
1956 // virtual memory manager are allocated in correct sequence.
1957 uint64_t AlignedNumBytes = NumBytes;
1958 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1959 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1960 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1961 assert(!X86FI->getUsesRedZone() &&
1962 "The Red Zone is not accounted for in stack probes");
1963
1964 // Check whether EAX is livein for this block.
1965 bool isEAXAlive = isEAXLiveIn(MBB);
1966
1967 if (isEAXAlive) {
1968 if (Is64Bit) {
1969 // Save RAX
1970 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1971 .addReg(X86::RAX, RegState::Kill)
1973 } else {
1974 // Save EAX
1975 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1976 .addReg(X86::EAX, RegState::Kill)
1978 }
1979 }
1980
1981 if (Is64Bit) {
1982 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1983 // Function prologue is responsible for adjusting the stack pointer.
1984 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1985 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1986 .addImm(Alloc)
1988 } else {
1989 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1990 // We'll also use 4 already allocated bytes for EAX.
1991 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1992 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1994 }
1995
1996 // Call __chkstk, __chkstk_ms, or __alloca.
1997 emitStackProbe(MF, MBB, MBBI, DL, true);
1998
1999 if (isEAXAlive) {
2000 // Restore RAX/EAX
2002 if (Is64Bit)
2003 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2004 StackPtr, false, NumBytes - 8);
2005 else
2006 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2007 StackPtr, false, NumBytes - 4);
2008 MI->setFlag(MachineInstr::FrameSetup);
2009 MBB.insert(MBBI, MI);
2010 }
2011 } else if (NumBytes) {
2012 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2013 }
2014
2015 if (NeedsWinCFI && NumBytes) {
2016 HasWinCFI = true;
2017 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2018 .addImm(NumBytes)
2020 }
2021
2022 int SEHFrameOffset = 0;
2023 unsigned SPOrEstablisher;
2024 if (IsFunclet) {
2025 if (IsClrFunclet) {
2026 // The establisher parameter passed to a CLR funclet is actually a pointer
2027 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2028 // to find the root function establisher frame by loading the PSPSym from
2029 // the intermediate frame.
2030 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2031 MachinePointerInfo NoInfo;
2032 MBB.addLiveIn(Establisher);
2033 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2034 Establisher, false, PSPSlotOffset)
2037 ;
2038 // Save the root establisher back into the current funclet's (mostly
2039 // empty) frame, in case a sub-funclet or the GC needs it.
2040 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2041 false, PSPSlotOffset)
2042 .addReg(Establisher)
2044 NoInfo,
2047 }
2048 SPOrEstablisher = Establisher;
2049 } else {
2050 SPOrEstablisher = StackPtr;
2051 }
2052
2053 if (IsWin64Prologue && HasFP) {
2054 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2055 // this calculation on the incoming establisher, which holds the value of
2056 // RSP from the parent frame at the end of the prologue.
2057 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2058 if (SEHFrameOffset)
2059 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2060 SPOrEstablisher, false, SEHFrameOffset);
2061 else
2062 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2063 .addReg(SPOrEstablisher);
2064
2065 // If this is not a funclet, emit the CFI describing our frame pointer.
2066 if (NeedsWinCFI && !IsFunclet) {
2067 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2068 HasWinCFI = true;
2069 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2071 .addImm(SEHFrameOffset)
2073 if (isAsynchronousEHPersonality(Personality))
2074 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2075 }
2076 } else if (IsFunclet && STI.is32Bit()) {
2077 // Reset EBP / ESI to something good for funclets.
2079 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2080 // into the registration node so that the runtime will restore it for us.
2081 if (!MBB.isCleanupFuncletEntry()) {
2082 assert(Personality == EHPersonality::MSVC_CXX);
2083 Register FrameReg;
2085 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2086 // ESP is the first field, so no extra displacement is needed.
2087 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2088 false, EHRegOffset)
2089 .addReg(X86::ESP);
2090 }
2091 }
2092
2093 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2094 const MachineInstr &FrameInstr = *MBBI;
2095 ++MBBI;
2096
2097 if (NeedsWinCFI) {
2098 int FI;
2099 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2100 if (X86::FR64RegClass.contains(Reg)) {
2101 int Offset;
2102 Register IgnoredFrameReg;
2103 if (IsWin64Prologue && IsFunclet)
2104 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2105 else
2106 Offset =
2107 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2108 SEHFrameOffset;
2109
2110 HasWinCFI = true;
2111 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2112 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2113 .addImm(Reg)
2114 .addImm(Offset)
2116 }
2117 }
2118 }
2119 }
2120
2121 if (NeedsWinCFI && HasWinCFI)
2122 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2124
2125 if (FnHasClrFunclet && !IsFunclet) {
2126 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2127 // immediately after the prolog) into the PSPSlot so that funclets
2128 // and the GC can recover it.
2129 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2130 auto PSPInfo = MachinePointerInfo::getFixedStack(
2132 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2133 PSPSlotOffset)
2138 }
2139
2140 // Realign stack after we spilled callee-saved registers (so that we'll be
2141 // able to calculate their offsets from the frame pointer).
2142 // Win64 requires aligning the stack after the prologue.
2143 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2144 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2145 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2146 }
2147
2148 // We already dealt with stack realignment and funclets above.
2149 if (IsFunclet && STI.is32Bit())
2150 return;
2151
2152 // If we need a base pointer, set it up here. It's whatever the value
2153 // of the stack pointer is at this point. Any variable size objects
2154 // will be allocated after this, so we can still use the base pointer
2155 // to reference locals.
2156 if (TRI->hasBasePointer(MF)) {
2157 // Update the base pointer with the current stack pointer.
2158 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2159 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2160 .addReg(SPOrEstablisher)
2162 if (X86FI->getRestoreBasePointer()) {
2163 // Stash value of base pointer. Saving RSP instead of EBP shortens
2164 // dependence chain. Used by SjLj EH.
2165 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2166 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2168 .addReg(SPOrEstablisher)
2170 }
2171
2172 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2173 // Stash the value of the frame pointer relative to the base pointer for
2174 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2175 // it recovers the frame pointer from the base pointer rather than the
2176 // other way around.
2177 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2178 Register UsedReg;
2179 int Offset =
2180 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2181 .getFixed();
2182 assert(UsedReg == BasePtr);
2183 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2186 }
2187 }
2188 if (ArgBaseReg.isValid()) {
2189 // Save argument base pointer.
2190 auto *MI = X86FI->getStackPtrSaveMI();
2191 int FI = MI->getOperand(1).getIndex();
2192 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2193 // movl %basereg, offset(%ebp)
2194 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2195 .addReg(ArgBaseReg)
2197 }
2198
2199 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2200 // Mark end of stack pointer adjustment.
2201 if (!HasFP && NumBytes) {
2202 // Define the current CFA rule to use the provided offset.
2203 assert(StackSize);
2204 BuildCFI(
2205 MBB, MBBI, DL,
2206 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2208 }
2209
2210 // Emit DWARF info specifying the offsets of the callee-saved registers.
2212 }
2213
2214 // X86 Interrupt handling function cannot assume anything about the direction
2215 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2216 // in each prologue of interrupt handler function.
2217 //
2218 // Create "cld" instruction only in these cases:
2219 // 1. The interrupt handling function uses any of the "rep" instructions.
2220 // 2. Interrupt handling function calls another function.
2221 // 3. If there are any inline asm blocks, as we do not know what they do
2222 //
2223 // TODO: We should also emit cld if we detect the use of std, but as of now,
2224 // the compiler does not even emit that instruction or even define it, so in
2225 // practice, this would only happen with inline asm, which we cover anyway.
2227 bool NeedsCLD = false;
2228
2229 for (const MachineBasicBlock &B : MF) {
2230 for (const MachineInstr &MI : B) {
2231 if (MI.isCall()) {
2232 NeedsCLD = true;
2233 break;
2234 }
2235
2236 if (isOpcodeRep(MI.getOpcode())) {
2237 NeedsCLD = true;
2238 break;
2239 }
2240
2241 if (MI.isInlineAsm()) {
2242 // TODO: Parse asm for rep instructions or call sites?
2243 // For now, let's play it safe and emit a cld instruction
2244 // just in case.
2245 NeedsCLD = true;
2246 break;
2247 }
2248 }
2249 }
2250
2251 if (NeedsCLD) {
2252 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2254 }
2255 }
2256
2257 // At this point we know if the function has WinCFI or not.
2258 MF.setHasWinCFI(HasWinCFI);
2259}
2260
2262 const MachineFunction &MF) const {
2263 // We can't use LEA instructions for adjusting the stack pointer if we don't
2264 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2265 // to deallocate the stack.
2266 // This means that we can use LEA for SP in two situations:
2267 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2268 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2269 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2270}
2271
2273 switch (MI.getOpcode()) {
2274 case X86::CATCHRET:
2275 case X86::CLEANUPRET:
2276 return true;
2277 default:
2278 return false;
2279 }
2280 llvm_unreachable("impossible");
2281}
2282
2283// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2284// stack. It holds a pointer to the bottom of the root function frame. The
2285// establisher frame pointer passed to a nested funclet may point to the
2286// (mostly empty) frame of its parent funclet, but it will need to find
2287// the frame of the root function to access locals. To facilitate this,
2288// every funclet copies the pointer to the bottom of the root function
2289// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2290// same offset for the PSPSym in the root function frame that's used in the
2291// funclets' frames allows each funclet to dynamically accept any ancestor
2292// frame as its establisher argument (the runtime doesn't guarantee the
2293// immediate parent for some reason lost to history), and also allows the GC,
2294// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2295// frame with only a single offset reported for the entire method.
2296unsigned
2297X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2298 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2299 Register SPReg;
2300 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2301 /*IgnoreSPUpdates*/ true)
2302 .getFixed();
2303 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2304 return static_cast<unsigned>(Offset);
2305}
2306
2307unsigned
2308X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2310 // This is the size of the pushed CSRs.
2311 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2312 // This is the size of callee saved XMMs.
2313 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2314 unsigned XMMSize =
2315 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2316 // This is the amount of stack a funclet needs to allocate.
2317 unsigned UsedSize;
2318 EHPersonality Personality =
2320 if (Personality == EHPersonality::CoreCLR) {
2321 // CLR funclets need to hold enough space to include the PSPSym, at the
2322 // same offset from the stack pointer (immediately after the prolog) as it
2323 // resides at in the main function.
2324 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2325 } else {
2326 // Other funclets just need enough stack for outgoing call arguments.
2327 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2328 }
2329 // RBP is not included in the callee saved register block. After pushing RBP,
2330 // everything is 16 byte aligned. Everything we allocate before an outgoing
2331 // call must also be 16 byte aligned.
2332 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2333 // Subtract out the size of the callee saved registers. This is how much stack
2334 // each funclet will allocate.
2335 return FrameSizeMinusRBP + XMMSize - CSSize;
2336}
2337
2338static bool isTailCallOpcode(unsigned Opc) {
2339 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2340 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2341 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2342}
2343
2345 MachineBasicBlock &MBB) const {
2346 const MachineFrameInfo &MFI = MF.getFrameInfo();
2349 MachineBasicBlock::iterator MBBI = Terminator;
2350 DebugLoc DL;
2351 if (MBBI != MBB.end())
2352 DL = MBBI->getDebugLoc();
2353 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2354 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2356 Register MachineFramePtr =
2357 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2358
2359 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2360 bool NeedsWin64CFI =
2361 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2362 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2363
2364 // Get the number of bytes to allocate from the FrameInfo.
2365 uint64_t StackSize = MFI.getStackSize();
2366 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2367 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2368 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2369 bool HasFP = hasFP(MF);
2370 uint64_t NumBytes = 0;
2371
2372 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2374 MF.needsFrameMoves();
2375
2376 Register ArgBaseReg;
2377 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2378 unsigned Opc = X86::LEA32r;
2379 Register StackReg = X86::ESP;
2380 ArgBaseReg = MI->getOperand(0).getReg();
2381 if (STI.is64Bit()) {
2382 Opc = X86::LEA64r;
2383 StackReg = X86::RSP;
2384 }
2385 // leal -4(%basereg), %esp
2386 // .cfi_def_cfa %esp, 4
2387 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2388 .addUse(ArgBaseReg)
2389 .addImm(1)
2390 .addUse(X86::NoRegister)
2391 .addImm(-(int64_t)SlotSize)
2392 .addUse(X86::NoRegister)
2394 if (NeedsDwarfCFI) {
2395 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2396 BuildCFI(MBB, MBBI, DL,
2397 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2399 --MBBI;
2400 }
2401 --MBBI;
2402 }
2403
2404 if (IsFunclet) {
2405 assert(HasFP && "EH funclets without FP not yet implemented");
2406 NumBytes = getWinEHFuncletFrameSize(MF);
2407 } else if (HasFP) {
2408 // Calculate required stack adjustment.
2409 uint64_t FrameSize = StackSize - SlotSize;
2410 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2411
2412 // Callee-saved registers were pushed on stack before the stack was
2413 // realigned.
2414 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2415 NumBytes = alignTo(FrameSize, MaxAlign);
2416 } else {
2417 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2418 }
2419 uint64_t SEHStackAllocAmt = NumBytes;
2420
2421 // AfterPop is the position to insert .cfi_restore.
2423 if (HasFP) {
2424 if (X86FI->hasSwiftAsyncContext()) {
2425 // Discard the context.
2426 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2427 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2428 }
2429 // Pop EBP.
2430 BuildMI(MBB, MBBI, DL,
2432 MachineFramePtr)
2434
2435 // We need to reset FP to its untagged state on return. Bit 60 is currently
2436 // used to show the presence of an extended frame.
2437 if (X86FI->hasSwiftAsyncContext()) {
2438 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2439 .addUse(MachineFramePtr)
2440 .addImm(60)
2442 }
2443
2444 if (NeedsDwarfCFI) {
2445 if (!ArgBaseReg.isValid()) {
2446 unsigned DwarfStackPtr =
2447 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2448 BuildCFI(MBB, MBBI, DL,
2449 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2451 }
2452 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2453 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2454 BuildCFI(MBB, AfterPop, DL,
2455 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2457 --MBBI;
2458 --AfterPop;
2459 }
2460 --MBBI;
2461 }
2462 }
2463
2464 MachineBasicBlock::iterator FirstCSPop = MBBI;
2465 // Skip the callee-saved pop instructions.
2466 while (MBBI != MBB.begin()) {
2467 MachineBasicBlock::iterator PI = std::prev(MBBI);
2468 unsigned Opc = PI->getOpcode();
2469
2470 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2471 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2472 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2473 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2474 Opc != X86::POP2P && Opc != X86::LEA64r))
2475 break;
2476 FirstCSPop = PI;
2477 }
2478
2479 --MBBI;
2480 }
2481 if (ArgBaseReg.isValid()) {
2482 // Restore argument base pointer.
2483 auto *MI = X86FI->getStackPtrSaveMI();
2484 int FI = MI->getOperand(1).getIndex();
2485 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2486 // movl offset(%ebp), %basereg
2487 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2489 }
2490 MBBI = FirstCSPop;
2491
2492 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2493 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2494
2495 if (MBBI != MBB.end())
2496 DL = MBBI->getDebugLoc();
2497 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2498 // instruction, merge the two instructions.
2499 if (NumBytes || MFI.hasVarSizedObjects())
2500 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2501
2502 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2503 // slot before popping them off! Same applies for the case, when stack was
2504 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2505 // will not do realignment or dynamic stack allocation.
2506 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2507 !IsFunclet) {
2508 if (TRI->hasStackRealignment(MF))
2509 MBBI = FirstCSPop;
2510 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2511 uint64_t LEAAmount =
2512 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2513
2514 if (X86FI->hasSwiftAsyncContext())
2515 LEAAmount -= 16;
2516
2517 // There are only two legal forms of epilogue:
2518 // - add SEHAllocationSize, %rsp
2519 // - lea SEHAllocationSize(%FramePtr), %rsp
2520 //
2521 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2522 // However, we may use this sequence if we have a frame pointer because the
2523 // effects of the prologue can safely be undone.
2524 if (LEAAmount != 0) {
2525 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2527 false, LEAAmount);
2528 --MBBI;
2529 } else {
2530 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2531 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2532 --MBBI;
2533 }
2534 } else if (NumBytes) {
2535 // Adjust stack pointer back: ESP += numbytes.
2536 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2537 if (!HasFP && NeedsDwarfCFI) {
2538 // Define the current CFA rule to use the provided offset.
2539 BuildCFI(MBB, MBBI, DL,
2541 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2543 }
2544 --MBBI;
2545 }
2546
2547 // Windows unwinder will not invoke function's exception handler if IP is
2548 // either in prologue or in epilogue. This behavior causes a problem when a
2549 // call immediately precedes an epilogue, because the return address points
2550 // into the epilogue. To cope with that, we insert an epilogue marker here,
2551 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2552 // final emitted code.
2553 if (NeedsWin64CFI && MF.hasWinCFI())
2554 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2555
2556 if (!HasFP && NeedsDwarfCFI) {
2557 MBBI = FirstCSPop;
2558 int64_t Offset = -CSSize - SlotSize;
2559 // Mark callee-saved pop instruction.
2560 // Define the current CFA rule to use the provided offset.
2561 while (MBBI != MBB.end()) {
2563 unsigned Opc = PI->getOpcode();
2564 ++MBBI;
2565 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2566 Opc == X86::POP2 || Opc == X86::POP2P) {
2567 Offset += SlotSize;
2568 // Compared to pop, pop2 introduces more stack offset (one more
2569 // register).
2570 if (Opc == X86::POP2 || Opc == X86::POP2P)
2571 Offset += SlotSize;
2572 BuildCFI(MBB, MBBI, DL,
2575 }
2576 }
2577 }
2578
2579 // Emit DWARF info specifying the restores of the callee-saved registers.
2580 // For epilogue with return inside or being other block without successor,
2581 // no need to generate .cfi_restore for callee-saved registers.
2582 if (NeedsDwarfCFI && !MBB.succ_empty())
2583 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2584
2585 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2586 // Add the return addr area delta back since we are not tail calling.
2587 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2588 assert(Offset >= 0 && "TCDelta should never be positive");
2589 if (Offset) {
2590 // Check for possible merge with preceding ADD instruction.
2591 Offset += mergeSPUpdates(MBB, Terminator, true);
2592 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2593 }
2594 }
2595
2596 // Emit tilerelease for AMX kernel.
2597 if (X86FI->hasVirtualTileReg())
2598 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2599}
2600
2602 int FI,
2603 Register &FrameReg) const {
2604 const MachineFrameInfo &MFI = MF.getFrameInfo();
2605
2606 bool IsFixed = MFI.isFixedObjectIndex(FI);
2607 // We can't calculate offset from frame pointer if the stack is realigned,
2608 // so enforce usage of stack/base pointer. The base pointer is used when we
2609 // have dynamic allocas in addition to dynamic realignment.
2610 if (TRI->hasBasePointer(MF))
2611 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2612 else if (TRI->hasStackRealignment(MF))
2613 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2614 else
2615 FrameReg = TRI->getFrameRegister(MF);
2616
2617 // Offset will hold the offset from the stack pointer at function entry to the
2618 // object.
2619 // We need to factor in additional offsets applied during the prologue to the
2620 // frame, base, and stack pointer depending on which is used.
2623 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2624 uint64_t StackSize = MFI.getStackSize();
2625 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2626 int64_t FPDelta = 0;
2627
2628 // In an x86 interrupt, remove the offset we added to account for the return
2629 // address from any stack object allocated in the caller's frame. Interrupts
2630 // do not have a standard return address. Fixed objects in the current frame,
2631 // such as SSE register spills, should not get this treatment.
2633 Offset >= 0) {
2635 }
2636
2637 if (IsWin64Prologue) {
2638 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2639
2640 // Calculate required stack adjustment.
2641 uint64_t FrameSize = StackSize - SlotSize;
2642 // If required, include space for extra hidden slot for stashing base
2643 // pointer.
2644 if (X86FI->getRestoreBasePointer())
2645 FrameSize += SlotSize;
2646 uint64_t NumBytes = FrameSize - CSSize;
2647
2648 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2649 if (FI && FI == X86FI->getFAIndex())
2650 return StackOffset::getFixed(-SEHFrameOffset);
2651
2652 // FPDelta is the offset from the "traditional" FP location of the old base
2653 // pointer followed by return address and the location required by the
2654 // restricted Win64 prologue.
2655 // Add FPDelta to all offsets below that go through the frame pointer.
2656 FPDelta = FrameSize - SEHFrameOffset;
2657 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2658 "FPDelta isn't aligned per the Win64 ABI!");
2659 }
2660
2661 if (FrameReg == TRI->getFramePtr()) {
2662 // Skip saved EBP/RBP
2663 Offset += SlotSize;
2664
2665 // Account for restricted Windows prologue.
2666 Offset += FPDelta;
2667
2668 // Skip the RETADDR move area
2669 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2670 if (TailCallReturnAddrDelta < 0)
2671 Offset -= TailCallReturnAddrDelta;
2672
2674 }
2675
2676 // FrameReg is either the stack pointer or a base pointer. But the base is
2677 // located at the end of the statically known StackSize so the distinction
2678 // doesn't really matter.
2679 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2680 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2681 return StackOffset::getFixed(Offset + StackSize);
2682}
2683
2685 Register &FrameReg) const {
2686 const MachineFrameInfo &MFI = MF.getFrameInfo();
2688 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2689 const auto it = WinEHXMMSlotInfo.find(FI);
2690
2691 if (it == WinEHXMMSlotInfo.end())
2692 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2693
2694 FrameReg = TRI->getStackRegister();
2696 it->second;
2697}
2698
2701 Register &FrameReg,
2702 int Adjustment) const {
2703 const MachineFrameInfo &MFI = MF.getFrameInfo();
2704 FrameReg = TRI->getStackRegister();
2705 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2706 getOffsetOfLocalArea() + Adjustment);
2707}
2708
2711 int FI, Register &FrameReg,
2712 bool IgnoreSPUpdates) const {
2713
2714 const MachineFrameInfo &MFI = MF.getFrameInfo();
2715 // Does not include any dynamic realign.
2716 const uint64_t StackSize = MFI.getStackSize();
2717 // LLVM arranges the stack as follows:
2718 // ...
2719 // ARG2
2720 // ARG1
2721 // RETADDR
2722 // PUSH RBP <-- RBP points here
2723 // PUSH CSRs
2724 // ~~~~~~~ <-- possible stack realignment (non-win64)
2725 // ...
2726 // STACK OBJECTS
2727 // ... <-- RSP after prologue points here
2728 // ~~~~~~~ <-- possible stack realignment (win64)
2729 //
2730 // if (hasVarSizedObjects()):
2731 // ... <-- "base pointer" (ESI/RBX) points here
2732 // DYNAMIC ALLOCAS
2733 // ... <-- RSP points here
2734 //
2735 // Case 1: In the simple case of no stack realignment and no dynamic
2736 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2737 // with fixed offsets from RSP.
2738 //
2739 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2740 // stack objects are addressed with RBP and regular stack objects with RSP.
2741 //
2742 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2743 // to address stack arguments for outgoing calls and nothing else. The "base
2744 // pointer" points to local variables, and RBP points to fixed objects.
2745 //
2746 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2747 // answer we give is relative to the SP after the prologue, and not the
2748 // SP in the middle of the function.
2749
2750 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2751 !STI.isTargetWin64())
2752 return getFrameIndexReference(MF, FI, FrameReg);
2753
2754 // If !hasReservedCallFrame the function might have SP adjustement in the
2755 // body. So, even though the offset is statically known, it depends on where
2756 // we are in the function.
2757 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2758 return getFrameIndexReference(MF, FI, FrameReg);
2759
2760 // We don't handle tail calls, and shouldn't be seeing them either.
2762 "we don't handle this case!");
2763
2764 // This is how the math works out:
2765 //
2766 // %rsp grows (i.e. gets lower) left to right. Each box below is
2767 // one word (eight bytes). Obj0 is the stack slot we're trying to
2768 // get to.
2769 //
2770 // ----------------------------------
2771 // | BP | Obj0 | Obj1 | ... | ObjN |
2772 // ----------------------------------
2773 // ^ ^ ^ ^
2774 // A B C E
2775 //
2776 // A is the incoming stack pointer.
2777 // (B - A) is the local area offset (-8 for x86-64) [1]
2778 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2779 //
2780 // |(E - B)| is the StackSize (absolute value, positive). For a
2781 // stack that grown down, this works out to be (B - E). [3]
2782 //
2783 // E is also the value of %rsp after stack has been set up, and we
2784 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2785 // (C - E) == (C - A) - (B - A) + (B - E)
2786 // { Using [1], [2] and [3] above }
2787 // == getObjectOffset - LocalAreaOffset + StackSize
2788
2789 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2790}
2791
2794 std::vector<CalleeSavedInfo> &CSI) const {
2795 MachineFrameInfo &MFI = MF.getFrameInfo();
2797
2798 unsigned CalleeSavedFrameSize = 0;
2799 unsigned XMMCalleeSavedFrameSize = 0;
2800 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2801 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2802
2803 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2804
2805 if (TailCallReturnAddrDelta < 0) {
2806 // create RETURNADDR area
2807 // arg
2808 // arg
2809 // RETADDR
2810 // { ...
2811 // RETADDR area
2812 // ...
2813 // }
2814 // [EBP]
2815 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2816 TailCallReturnAddrDelta - SlotSize, true);
2817 }
2818
2819 // Spill the BasePtr if it's used.
2820 if (this->TRI->hasBasePointer(MF)) {
2821 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2822 if (MF.hasEHFunclets()) {
2824 X86FI->setHasSEHFramePtrSave(true);
2825 X86FI->setSEHFramePtrSaveIndex(FI);
2826 }
2827 }
2828
2829 if (hasFP(MF)) {
2830 // emitPrologue always spills frame register the first thing.
2831 SpillSlotOffset -= SlotSize;
2832 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2833
2834 // The async context lives directly before the frame pointer, and we
2835 // allocate a second slot to preserve stack alignment.
2836 if (X86FI->hasSwiftAsyncContext()) {
2837 SpillSlotOffset -= SlotSize;
2838 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2839 SpillSlotOffset -= SlotSize;
2840 }
2841
2842 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2843 // the frame register, we can delete it from CSI list and not have to worry
2844 // about avoiding it later.
2845 Register FPReg = TRI->getFrameRegister(MF);
2846 for (unsigned i = 0; i < CSI.size(); ++i) {
2847 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2848 CSI.erase(CSI.begin() + i);
2849 break;
2850 }
2851 }
2852 }
2853
2854 // Strategy:
2855 // 1. Use push2 when
2856 // a) number of CSR > 1 if no need padding
2857 // b) number of CSR > 2 if need padding
2858 // 2. When the number of CSR push is odd
2859 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2860 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2861 // 3. When the number of CSR push is even, start to use push2 from the 1st
2862 // push and make the stack 16B aligned before the push
2863 unsigned NumRegsForPush2 = 0;
2864 if (STI.hasPush2Pop2()) {
2865 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2866 return X86::GR64RegClass.contains(I.getReg());
2867 });
2868 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2869 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2870 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2871 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2872 if (X86FI->padForPush2Pop2()) {
2873 SpillSlotOffset -= SlotSize;
2874 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2875 }
2876 }
2877
2878 // Assign slots for GPRs. It increases frame size.
2879 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2880 Register Reg = I.getReg();
2881
2882 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2883 continue;
2884
2885 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2886 // or only an odd number of registers in the candidates.
2887 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2888 (SpillSlotOffset % 16 == 0 ||
2889 X86FI->getNumCandidatesForPush2Pop2() % 2))
2890 X86FI->addCandidateForPush2Pop2(Reg);
2891
2892 SpillSlotOffset -= SlotSize;
2893 CalleeSavedFrameSize += SlotSize;
2894
2895 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2896 I.setFrameIdx(SlotIndex);
2897 }
2898
2899 // Adjust the offset of spill slot as we know the accurate callee saved frame
2900 // size.
2901 if (X86FI->getRestoreBasePointer()) {
2902 SpillSlotOffset -= SlotSize;
2903 CalleeSavedFrameSize += SlotSize;
2904
2905 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2906 // TODO: saving the slot index is better?
2907 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2908 }
2909 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2910 "Expect even candidates for push2/pop2");
2911 if (X86FI->getNumCandidatesForPush2Pop2())
2912 ++NumFunctionUsingPush2Pop2;
2913 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2914 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2915
2916 // Assign slots for XMMs.
2917 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2918 Register Reg = I.getReg();
2919 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2920 continue;
2921
2922 // If this is k-register make sure we lookup via the largest legal type.
2923 MVT VT = MVT::Other;
2924 if (X86::VK16RegClass.contains(Reg))
2925 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2926
2927 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2928 unsigned Size = TRI->getSpillSize(*RC);
2929 Align Alignment = TRI->getSpillAlign(*RC);
2930 // ensure alignment
2931 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2932 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2933
2934 // spill into slot
2935 SpillSlotOffset -= Size;
2936 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2937 I.setFrameIdx(SlotIndex);
2938 MFI.ensureMaxAlignment(Alignment);
2939
2940 // Save the start offset and size of XMM in stack frame for funclets.
2941 if (X86::VR128RegClass.contains(Reg)) {
2942 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2943 XMMCalleeSavedFrameSize += Size;
2944 }
2945 }
2946
2947 return true;
2948}
2949
2954
2955 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2956 // for us, and there are no XMM CSRs on Win32.
2957 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2958 return true;
2959
2960 // Push GPRs. It increases frame size.
2961 const MachineFunction &MF = *MBB.getParent();
2963 if (X86FI->padForPush2Pop2())
2964 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2965
2966 // Update LiveIn of the basic block and decide whether we can add a kill flag
2967 // to the use.
2968 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2969 const MachineRegisterInfo &MRI = MF.getRegInfo();
2970 // Do not set a kill flag on values that are also marked as live-in. This
2971 // happens with the @llvm-returnaddress intrinsic and with arguments
2972 // passed in callee saved registers.
2973 // Omitting the kill flags is conservatively correct even if the live-in
2974 // is not used after all.
2975 if (MRI.isLiveIn(Reg))
2976 return false;
2977 MBB.addLiveIn(Reg);
2978 // Check if any subregister is live-in
2979 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2980 if (MRI.isLiveIn(*AReg))
2981 return false;
2982 return true;
2983 };
2984 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2985 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2986 };
2987
2988 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2989 Register Reg = RI->getReg();
2990 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2991 continue;
2992
2993 if (X86FI->isCandidateForPush2Pop2(Reg)) {
2994 Register Reg2 = (++RI)->getReg();
2996 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
2997 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
2999 } else {
3000 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3001 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3003 }
3004 }
3005
3006 if (X86FI->getRestoreBasePointer()) {
3007 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3008 Register BaseReg = this->TRI->getBaseRegister();
3009 BuildMI(MBB, MI, DL, TII.get(Opc))
3010 .addReg(BaseReg, getKillRegState(true))
3012 }
3013
3014 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3015 // It can be done by spilling XMMs to stack frame.
3016 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3017 Register Reg = I.getReg();
3018 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3019 continue;
3020
3021 // If this is k-register make sure we lookup via the largest legal type.
3022 MVT VT = MVT::Other;
3023 if (X86::VK16RegClass.contains(Reg))
3024 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3025
3026 // Add the callee-saved register as live-in. It's killed at the spill.
3027 MBB.addLiveIn(Reg);
3028 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3029
3030 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3031 Register());
3032 --MI;
3033 MI->setFlag(MachineInstr::FrameSetup);
3034 ++MI;
3035 }
3036
3037 return true;
3038}
3039
3040void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3042 MachineInstr *CatchRet) const {
3043 // SEH shouldn't use catchret.
3046 "SEH should not use CATCHRET");
3047 const DebugLoc &DL = CatchRet->getDebugLoc();
3048 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3049
3050 // Fill EAX/RAX with the address of the target block.
3051 if (STI.is64Bit()) {
3052 // LEA64r CatchRetTarget(%rip), %rax
3053 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3054 .addReg(X86::RIP)
3055 .addImm(0)
3056 .addReg(0)
3057 .addMBB(CatchRetTarget)
3058 .addReg(0);
3059 } else {
3060 // MOV32ri $CatchRetTarget, %eax
3061 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3062 .addMBB(CatchRetTarget);
3063 }
3064
3065 // Record that we've taken the address of CatchRetTarget and no longer just
3066 // reference it in a terminator.
3067 CatchRetTarget->setMachineBlockAddressTaken();
3068}
3069
3073 if (CSI.empty())
3074 return false;
3075
3076 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3077 // Don't restore CSRs in 32-bit EH funclets. Matches
3078 // spillCalleeSavedRegisters.
3079 if (STI.is32Bit())
3080 return true;
3081 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3082 // funclets. emitEpilogue transforms these to normal jumps.
3083 if (MI->getOpcode() == X86::CATCHRET) {
3084 const Function &F = MBB.getParent()->getFunction();
3085 bool IsSEH = isAsynchronousEHPersonality(
3086 classifyEHPersonality(F.getPersonalityFn()));
3087 if (IsSEH)
3088 return true;
3089 }
3090 }
3091
3093
3094 // Reload XMMs from stack frame.
3095 for (const CalleeSavedInfo &I : CSI) {
3096 Register Reg = I.getReg();
3097 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3098 continue;
3099
3100 // If this is k-register make sure we lookup via the largest legal type.
3101 MVT VT = MVT::Other;
3102 if (X86::VK16RegClass.contains(Reg))
3103 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3104
3105 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3106 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3107 Register());
3108 }
3109
3110 // Clear the stack slot for spill base pointer register.
3111 MachineFunction &MF = *MBB.getParent();
3113 if (X86FI->getRestoreBasePointer()) {
3114 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3115 Register BaseReg = this->TRI->getBaseRegister();
3116 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3118 }
3119
3120 // POP GPRs.
3121 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3122 Register Reg = I->getReg();
3123 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3124 continue;
3125
3126 if (X86FI->isCandidateForPush2Pop2(Reg))
3127 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3130 else
3131 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3133 }
3134 if (X86FI->padForPush2Pop2())
3135 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3136
3137 return true;
3138}
3139
3141 BitVector &SavedRegs,
3142 RegScavenger *RS) const {
3144
3145 // Spill the BasePtr if it's used.
3146 if (TRI->hasBasePointer(MF)) {
3147 Register BasePtr = TRI->getBaseRegister();
3148 if (STI.isTarget64BitILP32())
3149 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3150 SavedRegs.set(BasePtr);
3151 }
3152}
3153
3154static bool HasNestArgument(const MachineFunction *MF) {
3155 const Function &F = MF->getFunction();
3156 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3157 I++) {
3158 if (I->hasNestAttr() && !I->use_empty())
3159 return true;
3160 }
3161 return false;
3162}
3163
3164/// GetScratchRegister - Get a temp register for performing work in the
3165/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3166/// and the properties of the function either one or two registers will be
3167/// needed. Set primary to true for the first register, false for the second.
3168static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3169 const MachineFunction &MF, bool Primary) {
3171
3172 // Erlang stuff.
3174 if (Is64Bit)
3175 return Primary ? X86::R14 : X86::R13;
3176 else
3177 return Primary ? X86::EBX : X86::EDI;
3178 }
3179
3180 if (Is64Bit) {
3181 if (IsLP64)
3182 return Primary ? X86::R11 : X86::R12;
3183 else
3184 return Primary ? X86::R11D : X86::R12D;
3185 }
3186
3187 bool IsNested = HasNestArgument(&MF);
3188
3192 if (IsNested)
3193 report_fatal_error("Segmented stacks does not support fastcall with "
3194 "nested function.");
3195 return Primary ? X86::EAX : X86::ECX;
3196 }
3197 if (IsNested)
3198 return Primary ? X86::EDX : X86::EAX;
3199 return Primary ? X86::ECX : X86::EAX;
3200}
3201
3202// The stack limit in the TCB is set to this many bytes above the actual stack
3203// limit.
3205
3207 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3208 MachineFrameInfo &MFI = MF.getFrameInfo();
3209 uint64_t StackSize;
3210 unsigned TlsReg, TlsOffset;
3211 DebugLoc DL;
3212
3213 // To support shrink-wrapping we would need to insert the new blocks
3214 // at the right place and update the branches to PrologueMBB.
3215 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3216
3217 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3218 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3219 "Scratch register is live-in");
3220
3221 if (MF.getFunction().isVarArg())
3222 report_fatal_error("Segmented stacks do not support vararg functions.");
3223 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3226 report_fatal_error("Segmented stacks not supported on this platform.");
3227
3228 // Eventually StackSize will be calculated by a link-time pass; which will
3229 // also decide whether checking code needs to be injected into this particular
3230 // prologue.
3231 StackSize = MFI.getStackSize();
3232
3233 if (!MFI.needsSplitStackProlog())
3234 return;
3235
3239 bool IsNested = false;
3240
3241 // We need to know if the function has a nest argument only in 64 bit mode.
3242 if (Is64Bit)
3243 IsNested = HasNestArgument(&MF);
3244
3245 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3246 // allocMBB needs to be last (terminating) instruction.
3247
3248 for (const auto &LI : PrologueMBB.liveins()) {
3249 allocMBB->addLiveIn(LI);
3250 checkMBB->addLiveIn(LI);
3251 }
3252
3253 if (IsNested)
3254 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3255
3256 MF.push_front(allocMBB);
3257 MF.push_front(checkMBB);
3258
3259 // When the frame size is less than 256 we just compare the stack
3260 // boundary directly to the value of the stack pointer, per gcc.
3261 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3262
3263 // Read the limit off the current stacklet off the stack_guard location.
3264 if (Is64Bit) {
3265 if (STI.isTargetLinux()) {
3266 TlsReg = X86::FS;
3267 TlsOffset = IsLP64 ? 0x70 : 0x40;
3268 } else if (STI.isTargetDarwin()) {
3269 TlsReg = X86::GS;
3270 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3271 } else if (STI.isTargetWin64()) {
3272 TlsReg = X86::GS;
3273 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3274 } else if (STI.isTargetFreeBSD()) {
3275 TlsReg = X86::FS;
3276 TlsOffset = 0x18;
3277 } else if (STI.isTargetDragonFly()) {
3278 TlsReg = X86::FS;
3279 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3280 } else {
3281 report_fatal_error("Segmented stacks not supported on this platform.");
3282 }
3283
3284 if (CompareStackPointer)
3285 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3286 else
3287 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3288 ScratchReg)
3289 .addReg(X86::RSP)
3290 .addImm(1)
3291 .addReg(0)
3292 .addImm(-StackSize)
3293 .addReg(0);
3294
3295 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3296 .addReg(ScratchReg)
3297 .addReg(0)
3298 .addImm(1)
3299 .addReg(0)
3300 .addImm(TlsOffset)
3301 .addReg(TlsReg);
3302 } else {
3303 if (STI.isTargetLinux()) {
3304 TlsReg = X86::GS;
3305 TlsOffset = 0x30;
3306 } else if (STI.isTargetDarwin()) {
3307 TlsReg = X86::GS;
3308 TlsOffset = 0x48 + 90 * 4;
3309 } else if (STI.isTargetWin32()) {
3310 TlsReg = X86::FS;
3311 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3312 } else if (STI.isTargetDragonFly()) {
3313 TlsReg = X86::FS;
3314 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3315 } else if (STI.isTargetFreeBSD()) {
3316 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3317 } else {
3318 report_fatal_error("Segmented stacks not supported on this platform.");
3319 }
3320
3321 if (CompareStackPointer)
3322 ScratchReg = X86::ESP;
3323 else
3324 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3325 .addReg(X86::ESP)
3326 .addImm(1)
3327 .addReg(0)
3328 .addImm(-StackSize)
3329 .addReg(0);
3330
3333 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3334 .addReg(ScratchReg)
3335 .addReg(0)
3336 .addImm(0)
3337 .addReg(0)
3338 .addImm(TlsOffset)
3339 .addReg(TlsReg);
3340 } else if (STI.isTargetDarwin()) {
3341
3342 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3343 unsigned ScratchReg2;
3344 bool SaveScratch2;
3345 if (CompareStackPointer) {
3346 // The primary scratch register is available for holding the TLS offset.
3347 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3348 SaveScratch2 = false;
3349 } else {
3350 // Need to use a second register to hold the TLS offset
3351 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3352
3353 // Unfortunately, with fastcc the second scratch register may hold an
3354 // argument.
3355 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3356 }
3357
3358 // If Scratch2 is live-in then it needs to be saved.
3359 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3360 "Scratch register is live-in and not saved");
3361
3362 if (SaveScratch2)
3363 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3364 .addReg(ScratchReg2, RegState::Kill);
3365
3366 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3367 .addImm(TlsOffset);
3368 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3369 .addReg(ScratchReg)
3370 .addReg(ScratchReg2)
3371 .addImm(1)
3372 .addReg(0)
3373 .addImm(0)
3374 .addReg(TlsReg);
3375
3376 if (SaveScratch2)
3377 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3378 }
3379 }
3380
3381 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3382 // It jumps to normal execution of the function body.
3383 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3384 .addMBB(&PrologueMBB)
3386
3387 // On 32 bit we first push the arguments size and then the frame size. On 64
3388 // bit, we pass the stack frame size in r10 and the argument size in r11.
3389 if (Is64Bit) {
3390 // Functions with nested arguments use R10, so it needs to be saved across
3391 // the call to _morestack
3392
3393 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3394 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3395 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3396 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3397
3398 if (IsNested)
3399 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3400
3401 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3402 .addImm(StackSize);
3403 BuildMI(allocMBB, DL,
3405 Reg11)
3406 .addImm(X86FI->getArgumentStackSize());
3407 } else {
3408 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3409 .addImm(X86FI->getArgumentStackSize());
3410 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3411 }
3412
3413 // __morestack is in libgcc
3415 // Under the large code model, we cannot assume that __morestack lives
3416 // within 2^31 bytes of the call site, so we cannot use pc-relative
3417 // addressing. We cannot perform the call via a temporary register,
3418 // as the rax register may be used to store the static chain, and all
3419 // other suitable registers may be either callee-save or used for
3420 // parameter passing. We cannot use the stack at this point either
3421 // because __morestack manipulates the stack directly.
3422 //
3423 // To avoid these issues, perform an indirect call via a read-only memory
3424 // location containing the address.
3425 //
3426 // This solution is not perfect, as it assumes that the .rodata section
3427 // is laid out within 2^31 bytes of each function body, but this seems
3428 // to be sufficient for JIT.
3429 // FIXME: Add retpoline support and remove the error here..
3431 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3432 "code model and thunks not yet implemented.");
3433 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3434 .addReg(X86::RIP)
3435 .addImm(0)
3436 .addReg(0)
3437 .addExternalSymbol("__morestack_addr")
3438 .addReg(0);
3439 } else {
3440 if (Is64Bit)
3441 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3442 .addExternalSymbol("__morestack");
3443 else
3444 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3445 .addExternalSymbol("__morestack");
3446 }
3447
3448 if (IsNested)
3449 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3450 else
3451 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3452
3453 allocMBB->addSuccessor(&PrologueMBB);
3454
3455 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3456 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3457
3458#ifdef EXPENSIVE_CHECKS
3459 MF.verify();
3460#endif
3461}
3462
3463/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3464/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3465/// to fields it needs, through a named metadata node "hipe.literals" containing
3466/// name-value pairs.
3467static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3468 const StringRef LiteralName) {
3469 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3470 MDNode *Node = HiPELiteralsMD->getOperand(i);
3471 if (Node->getNumOperands() != 2)
3472 continue;
3473 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3474 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3475 if (!NodeName || !NodeVal)
3476 continue;
3477 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3478 if (ValConst && NodeName->getString() == LiteralName) {
3479 return ValConst->getZExtValue();
3480 }
3481 }
3482
3483 report_fatal_error("HiPE literal " + LiteralName +
3484 " required but not provided");
3485}
3486
3487// Return true if there are no non-ehpad successors to MBB and there are no
3488// non-meta instructions between MBBI and MBB.end().
3491 return llvm::all_of(
3492 MBB.successors(),
3493 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3494 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3495 return MI.isMetaInstruction();
3496 });
3497}
3498
3499/// Erlang programs may need a special prologue to handle the stack size they
3500/// might need at runtime. That is because Erlang/OTP does not implement a C
3501/// stack but uses a custom implementation of hybrid stack/heap architecture.
3502/// (for more information see Eric Stenman's Ph.D. thesis:
3503/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3504///
3505/// CheckStack:
3506/// temp0 = sp - MaxStack
3507/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3508/// OldStart:
3509/// ...
3510/// IncStack:
3511/// call inc_stack # doubles the stack space
3512/// temp0 = sp - MaxStack
3513/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3515 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3516 MachineFrameInfo &MFI = MF.getFrameInfo();
3517 DebugLoc DL;
3518
3519 // To support shrink-wrapping we would need to insert the new blocks
3520 // at the right place and update the branches to PrologueMBB.
3521 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3522
3523 // HiPE-specific values
3524 NamedMDNode *HiPELiteralsMD =
3525 MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
3526 if (!HiPELiteralsMD)
3528 "Can't generate HiPE prologue without runtime parameters");
3529 const unsigned HipeLeafWords = getHiPELiteral(
3530 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3531 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3532 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3533 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3534 ? MF.getFunction().arg_size() - CCRegisteredArgs
3535 : 0;
3536 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3537
3539 "HiPE prologue is only supported on Linux operating systems.");
3540
3541 // Compute the largest caller's frame that is needed to fit the callees'
3542 // frames. This 'MaxStack' is computed from:
3543 //
3544 // a) the fixed frame size, which is the space needed for all spilled temps,
3545 // b) outgoing on-stack parameter areas, and
3546 // c) the minimum stack space this function needs to make available for the
3547 // functions it calls (a tunable ABI property).
3548 if (MFI.hasCalls()) {
3549 unsigned MoreStackForCalls = 0;
3550
3551 for (auto &MBB : MF) {
3552 for (auto &MI : MBB) {
3553 if (!MI.isCall())
3554 continue;
3555
3556 // Get callee operand.
3557 const MachineOperand &MO = MI.getOperand(0);
3558
3559 // Only take account of global function calls (no closures etc.).
3560 if (!MO.isGlobal())
3561 continue;
3562
3563 const Function *F = dyn_cast<Function>(MO.getGlobal());
3564 if (!F)
3565 continue;
3566
3567 // Do not update 'MaxStack' for primitive and built-in functions
3568 // (encoded with names either starting with "erlang."/"bif_" or not
3569 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3570 // "_", such as the BIF "suspend_0") as they are executed on another
3571 // stack.
3572 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3573 F->getName().find_first_of("._") == StringRef::npos)
3574 continue;
3575
3576 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3577 ? F->arg_size() - CCRegisteredArgs
3578 : 0;
3579 if (HipeLeafWords - 1 > CalleeStkArity)
3580 MoreStackForCalls =
3581 std::max(MoreStackForCalls,
3582 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3583 }
3584 }
3585 MaxStack += MoreStackForCalls;
3586 }
3587
3588 // If the stack frame needed is larger than the guaranteed then runtime checks
3589 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3590 if (MaxStack > Guaranteed) {
3591 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3592 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3593
3594 for (const auto &LI : PrologueMBB.liveins()) {
3595 stackCheckMBB->addLiveIn(LI);
3596 incStackMBB->addLiveIn(LI);
3597 }
3598
3599 MF.push_front(incStackMBB);
3600 MF.push_front(stackCheckMBB);
3601
3602 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3603 unsigned LEAop, CMPop, CALLop;
3604 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3605 if (Is64Bit) {
3606 SPReg = X86::RSP;
3607 PReg = X86::RBP;
3608 LEAop = X86::LEA64r;
3609 CMPop = X86::CMP64rm;
3610 CALLop = X86::CALL64pcrel32;
3611 } else {
3612 SPReg = X86::ESP;
3613 PReg = X86::EBP;
3614 LEAop = X86::LEA32r;
3615 CMPop = X86::CMP32rm;
3616 CALLop = X86::CALLpcrel32;
3617 }
3618
3619 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3620 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3621 "HiPE prologue scratch register is live-in");
3622
3623 // Create new MBB for StackCheck:
3624 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3625 false, -MaxStack);
3626 // SPLimitOffset is in a fixed heap location (pointed by BP).
3627 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3628 PReg, false, SPLimitOffset);
3629 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3630 .addMBB(&PrologueMBB)
3632
3633 // Create new MBB for IncStack:
3634 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3635 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3636 false, -MaxStack);
3637 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3638 PReg, false, SPLimitOffset);
3639 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3640 .addMBB(incStackMBB)
3642
3643 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3644 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3645 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3646 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3647 }
3648#ifdef EXPENSIVE_CHECKS
3649 MF.verify();
3650#endif
3651}
3652
3653bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3655 const DebugLoc &DL,
3656 int Offset) const {
3657 if (Offset <= 0)
3658 return false;
3659
3660 if (Offset % SlotSize)
3661 return false;
3662
3663 int NumPops = Offset / SlotSize;
3664 // This is only worth it if we have at most 2 pops.
3665 if (NumPops != 1 && NumPops != 2)
3666 return false;
3667
3668 // Handle only the trivial case where the adjustment directly follows
3669 // a call. This is the most common one, anyway.
3670 if (MBBI == MBB.begin())
3671 return false;
3672 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3673 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3674 return false;
3675
3676 unsigned Regs[2];
3677 unsigned FoundRegs = 0;
3678
3680 const MachineOperand &RegMask = Prev->getOperand(1);
3681
3682 auto &RegClass =
3683 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3684 // Try to find up to NumPops free registers.
3685 for (auto Candidate : RegClass) {
3686 // Poor man's liveness:
3687 // Since we're immediately after a call, any register that is clobbered
3688 // by the call and not defined by it can be considered dead.
3689 if (!RegMask.clobbersPhysReg(Candidate))
3690 continue;
3691
3692 // Don't clobber reserved registers
3693 if (MRI.isReserved(Candidate))
3694 continue;
3695
3696 bool IsDef = false;
3697 for (const MachineOperand &MO : Prev->implicit_operands()) {
3698 if (MO.isReg() && MO.isDef() &&
3699 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3700 IsDef = true;
3701 break;
3702 }
3703 }
3704
3705 if (IsDef)
3706 continue;
3707
3708 Regs[FoundRegs++] = Candidate;
3709 if (FoundRegs == (unsigned)NumPops)
3710 break;
3711 }
3712
3713 if (FoundRegs == 0)
3714 return false;
3715
3716 // If we found only one free register, but need two, reuse the same one twice.
3717 while (FoundRegs < (unsigned)NumPops)
3718 Regs[FoundRegs++] = Regs[0];
3719
3720 for (int i = 0; i < NumPops; ++i)
3721 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3722 Regs[i]);
3723
3724 return true;
3725}
3726
3730 bool reserveCallFrame = hasReservedCallFrame(MF);
3731 unsigned Opcode = I->getOpcode();
3732 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3733 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3734 uint64_t Amount = TII.getFrameSize(*I);
3735 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3736 I = MBB.erase(I);
3737 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3738
3739 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3740 // typically because the function is marked noreturn (abort, throw,
3741 // assert_fail, etc).
3742 if (isDestroy && blockEndIsUnreachable(MBB, I))
3743 return I;
3744
3745 if (!reserveCallFrame) {
3746 // If the stack pointer can be changed after prologue, turn the
3747 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3748 // adjcallstackdown instruction into 'add ESP, <amt>'
3749
3750 // We need to keep the stack aligned properly. To do this, we round the
3751 // amount of space needed for the outgoing arguments up to the next
3752 // alignment boundary.
3753 Amount = alignTo(Amount, getStackAlign());
3754
3755 const Function &F = MF.getFunction();
3756 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3757 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3758
3759 // If we have any exception handlers in this function, and we adjust
3760 // the SP before calls, we may need to indicate this to the unwinder
3761 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3762 // Amount == 0, because the preceding function may have set a non-0
3763 // GNU_ARGS_SIZE.
3764 // TODO: We don't need to reset this between subsequent functions,
3765 // if it didn't change.
3766 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3767
3768 if (HasDwarfEHHandlers && !isDestroy &&
3770 BuildCFI(MBB, InsertPos, DL,
3771 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3772
3773 if (Amount == 0)
3774 return I;
3775
3776 // Factor out the amount that gets handled inside the sequence
3777 // (Pushes of argument for frame setup, callee pops for frame destroy)
3778 Amount -= InternalAmt;
3779
3780 // TODO: This is needed only if we require precise CFA.
3781 // If this is a callee-pop calling convention, emit a CFA adjust for
3782 // the amount the callee popped.
3783 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3784 BuildCFI(MBB, InsertPos, DL,
3785 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3786
3787 // Add Amount to SP to destroy a frame, or subtract to setup.
3788 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3789
3790 if (StackAdjustment) {
3791 // Merge with any previous or following adjustment instruction. Note: the
3792 // instructions merged with here do not have CFI, so their stack
3793 // adjustments do not feed into CfaAdjustment.
3794 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3795 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3796
3797 if (StackAdjustment) {
3798 if (!(F.hasMinSize() &&
3799 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3800 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3801 /*InEpilogue=*/false);
3802 }
3803 }
3804
3805 if (DwarfCFI && !hasFP(MF)) {
3806 // If we don't have FP, but need to generate unwind information,
3807 // we need to set the correct CFA offset after the stack adjustment.
3808 // How much we adjust the CFA offset depends on whether we're emitting
3809 // CFI only for EH purposes or for debugging. EH only requires the CFA
3810 // offset to be correct at each call site, while for debugging we want
3811 // it to be more precise.
3812
3813 int64_t CfaAdjustment = -StackAdjustment;
3814 // TODO: When not using precise CFA, we also need to adjust for the
3815 // InternalAmt here.
3816 if (CfaAdjustment) {
3817 BuildCFI(
3818 MBB, InsertPos, DL,
3819 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3820 }
3821 }
3822
3823 return I;
3824 }
3825
3826 if (InternalAmt) {
3829 while (CI != B && !std::prev(CI)->isCall())
3830 --CI;
3831 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3832 }
3833
3834 return I;
3835}
3836
3838 assert(MBB.getParent() && "Block is not attached to a function!");
3839 const MachineFunction &MF = *MBB.getParent();
3840 if (!MBB.isLiveIn(X86::EFLAGS))
3841 return true;
3842
3843 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3844 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3846 const X86TargetLowering &TLI = *STI.getTargetLowering();
3847 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3848 return false;
3849
3851 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3852}
3853
3855 assert(MBB.getParent() && "Block is not attached to a function!");
3856
3857 // Win64 has strict requirements in terms of epilogue and we are
3858 // not taking a chance at messing with them.
3859 // I.e., unless this block is already an exit block, we can't use
3860 // it as an epilogue.
3861 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3862 return false;
3863
3864 // Swift async context epilogue has a BTR instruction that clobbers parts of
3865 // EFLAGS.
3866 const MachineFunction &MF = *MBB.getParent();
3869
3871 return true;
3872
3873 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3874 // clobbers the EFLAGS. Check that we do not need to preserve it,
3875 // otherwise, conservatively assume this is not
3876 // safe to insert the epilogue here.
3878}
3879
3881 // If we may need to emit frameless compact unwind information, give
3882 // up as this is currently broken: PR25614.
3883 bool CompactUnwind =
3885 nullptr;
3886 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3887 !CompactUnwind) &&
3888 // The lowering of segmented stack and HiPE only support entry
3889 // blocks as prologue blocks: PR26107. This limitation may be
3890 // lifted if we fix:
3891 // - adjustForSegmentedStacks
3892 // - adjustForHiPEPrologue
3894 !MF.shouldSplitStack();
3895}
3896
3899 const DebugLoc &DL, bool RestoreSP) const {
3900 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3901 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3902 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3903 "restoring EBP/ESI on non-32-bit target");
3904
3905 MachineFunction &MF = *MBB.getParent();
3907 Register BasePtr = TRI->getBaseRegister();
3908 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3910 MachineFrameInfo &MFI = MF.getFrameInfo();
3911
3912 // FIXME: Don't set FrameSetup flag in catchret case.
3913
3914 int FI = FuncInfo.EHRegNodeFrameIndex;
3915 int EHRegSize = MFI.getObjectSize(FI);
3916
3917 if (RestoreSP) {
3918 // MOV32rm -EHRegSize(%ebp), %esp
3919 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3920 X86::EBP, true, -EHRegSize)
3922 }
3923
3924 Register UsedReg;
3925 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3926 int EndOffset = -EHRegOffset - EHRegSize;
3927 FuncInfo.EHRegNodeEndOffset = EndOffset;
3928
3929 if (UsedReg == FramePtr) {
3930 // ADD $offset, %ebp
3931 unsigned ADDri = getADDriOpcode(false);
3932 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3934 .addImm(EndOffset)
3936 ->getOperand(3)
3937 .setIsDead();
3938 assert(EndOffset >= 0 &&
3939 "end of registration object above normal EBP position!");
3940 } else if (UsedReg == BasePtr) {
3941 // LEA offset(%ebp), %esi
3942 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3943 FramePtr, false, EndOffset)
3945 // MOV32rm SavedEBPOffset(%esi), %ebp
3946 assert(X86FI->getHasSEHFramePtrSave());
3947 int Offset =
3948 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3949 .getFixed();
3950 assert(UsedReg == BasePtr);
3951 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3952 UsedReg, true, Offset)
3954 } else {
3955 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3956 }
3957 return MBBI;
3958}
3959
3961 return TRI->getSlotSize();
3962}
3963
3966 return StackPtr;
3967}
3968
3972 Register FrameRegister = RI->getFrameRegister(MF);
3973 if (getInitialCFARegister(MF) == FrameRegister &&
3975 DwarfFrameBase FrameBase;
3976 FrameBase.Kind = DwarfFrameBase::CFA;
3977 FrameBase.Location.Offset =
3979 return FrameBase;
3980 }
3981
3982 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3983}
3984
3985namespace {
3986// Struct used by orderFrameObjects to help sort the stack objects.
3987struct X86FrameSortingObject {
3988 bool IsValid = false; // true if we care about this Object.
3989 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3990 unsigned ObjectSize = 0; // Size of Object in bytes.
3991 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3992 unsigned ObjectNumUses = 0; // Object static number of uses.
3993};
3994
3995// The comparison function we use for std::sort to order our local
3996// stack symbols. The current algorithm is to use an estimated
3997// "density". This takes into consideration the size and number of
3998// uses each object has in order to roughly minimize code size.
3999// So, for example, an object of size 16B that is referenced 5 times
4000// will get higher priority than 4 4B objects referenced 1 time each.
4001// It's not perfect and we may be able to squeeze a few more bytes out of
4002// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4003// fringe end can have special consideration, given their size is less
4004// important, etc.), but the algorithmic complexity grows too much to be
4005// worth the extra gains we get. This gets us pretty close.
4006// The final order leaves us with objects with highest priority going
4007// at the end of our list.
4008struct X86FrameSortingComparator {
4009 inline bool operator()(const X86FrameSortingObject &A,
4010 const X86FrameSortingObject &B) const {
4011 uint64_t DensityAScaled, DensityBScaled;
4012
4013 // For consistency in our comparison, all invalid objects are placed
4014 // at the end. This also allows us to stop walking when we hit the
4015 // first invalid item after it's all sorted.
4016 if (!A.IsValid)
4017 return false;
4018 if (!B.IsValid)
4019 return true;
4020
4021 // The density is calculated by doing :
4022 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4023 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4024 // Since this approach may cause inconsistencies in
4025 // the floating point <, >, == comparisons, depending on the floating
4026 // point model with which the compiler was built, we're going
4027 // to scale both sides by multiplying with
4028 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4029 // the division and, with it, the need for any floating point
4030 // arithmetic.
4031 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4032 static_cast<uint64_t>(B.ObjectSize);
4033 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4034 static_cast<uint64_t>(A.ObjectSize);
4035
4036 // If the two densities are equal, prioritize highest alignment
4037 // objects. This allows for similar alignment objects
4038 // to be packed together (given the same density).
4039 // There's room for improvement here, also, since we can pack
4040 // similar alignment (different density) objects next to each
4041 // other to save padding. This will also require further
4042 // complexity/iterations, and the overall gain isn't worth it,
4043 // in general. Something to keep in mind, though.
4044 if (DensityAScaled == DensityBScaled)
4045 return A.ObjectAlignment < B.ObjectAlignment;
4046
4047 return DensityAScaled < DensityBScaled;
4048 }
4049};
4050} // namespace
4051
4052// Order the symbols in the local stack.
4053// We want to place the local stack objects in some sort of sensible order.
4054// The heuristic we use is to try and pack them according to static number
4055// of uses and size of object in order to minimize code size.
4057 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4058 const MachineFrameInfo &MFI = MF.getFrameInfo();
4059
4060 // Don't waste time if there's nothing to do.
4061 if (ObjectsToAllocate.empty())
4062 return;
4063
4064 // Create an array of all MFI objects. We won't need all of these
4065 // objects, but we're going to create a full array of them to make
4066 // it easier to index into when we're counting "uses" down below.
4067 // We want to be able to easily/cheaply access an object by simply
4068 // indexing into it, instead of having to search for it every time.
4069 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4070
4071 // Walk the objects we care about and mark them as such in our working
4072 // struct.
4073 for (auto &Obj : ObjectsToAllocate) {
4074 SortingObjects[Obj].IsValid = true;
4075 SortingObjects[Obj].ObjectIndex = Obj;
4076 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4077 // Set the size.
4078 int ObjectSize = MFI.getObjectSize(Obj);
4079 if (ObjectSize == 0)
4080 // Variable size. Just use 4.
4081 SortingObjects[Obj].ObjectSize = 4;
4082 else
4083 SortingObjects[Obj].ObjectSize = ObjectSize;
4084 }
4085
4086 // Count the number of uses for each object.
4087 for (auto &MBB : MF) {
4088 for (auto &MI : MBB) {
4089 if (MI.isDebugInstr())
4090 continue;
4091 for (const MachineOperand &MO : MI.operands()) {
4092 // Check to see if it's a local stack symbol.
4093 if (!MO.isFI())
4094 continue;
4095 int Index = MO.getIndex();
4096 // Check to see if it falls within our range, and is tagged
4097 // to require ordering.
4098 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4099 SortingObjects[Index].IsValid)
4100 SortingObjects[Index].ObjectNumUses++;
4101 }
4102 }
4103 }
4104
4105 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4106 // info).
4107 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4108
4109 // Now modify the original list to represent the final order that
4110 // we want. The order will depend on whether we're going to access them
4111 // from the stack pointer or the frame pointer. For SP, the list should
4112 // end up with the END containing objects that we want with smaller offsets.
4113 // For FP, it should be flipped.
4114 int i = 0;
4115 for (auto &Obj : SortingObjects) {
4116 // All invalid items are sorted at the end, so it's safe to stop.
4117 if (!Obj.IsValid)
4118 break;
4119 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4120 }
4121
4122 // Flip it if we're accessing off of the FP.
4123 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4124 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4125}
4126
4127unsigned
4129 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4130 unsigned Offset = 16;
4131 // RBP is immediately pushed.
4132 Offset += SlotSize;
4133 // All callee-saved registers are then pushed.
4134 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4135 // Every funclet allocates enough stack space for the largest outgoing call.
4136 Offset += getWinEHFuncletFrameSize(MF);
4137 return Offset;
4138}
4139
4141 MachineFunction &MF, RegScavenger *RS) const {
4142 // Mark the function as not having WinCFI. We will set it back to true in
4143 // emitPrologue if it gets called and emits CFI.
4144 MF.setHasWinCFI(false);
4145
4146 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4147 // aligned. The format doesn't support misaligned stack adjustments.
4150
4151 // If this function isn't doing Win64-style C++ EH, we don't need to do
4152 // anything.
4153 if (STI.is64Bit() && MF.hasEHFunclets() &&
4156 adjustFrameForMsvcCxxEh(MF);
4157 }
4158}
4159
4160void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4161 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4162 // relative to RSP after the prologue. Find the offset of the last fixed
4163 // object, so that we can allocate a slot immediately following it. If there
4164 // were no fixed objects, use offset -SlotSize, which is immediately after the
4165 // return address. Fixed objects have negative frame indices.
4166 MachineFrameInfo &MFI = MF.getFrameInfo();
4167 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4168 int64_t MinFixedObjOffset = -SlotSize;
4169 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4170 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4171
4172 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4173 for (WinEHHandlerType &H : TBME.HandlerArray) {
4174 int FrameIndex = H.CatchObj.FrameIndex;
4175 if (FrameIndex != INT_MAX) {
4176 // Ensure alignment.
4177 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4178 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4179 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4180 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4181 }
4182 }
4183 }
4184
4185 // Ensure alignment.
4186 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4187 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4188 int UnwindHelpFI =
4189 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4190 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4191
4192 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4193 // other frame setup instructions.
4194 MachineBasicBlock &MBB = MF.front();
4195 auto MBBI = MBB.begin();
4196 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4197 ++MBBI;
4198
4200 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4201 UnwindHelpFI)
4202 .addImm(-2);
4203}
4204
4206 MachineFunction &MF, RegScavenger *RS) const {
4207 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4208
4209 if (STI.is32Bit() && MF.hasEHFunclets())
4211 // We have emitted prolog and epilog. Don't need stack pointer saving
4212 // instruction any more.
4213 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4214 MI->eraseFromParent();
4215 X86FI->setStackPtrSaveMI(nullptr);
4216 }
4217}
4218
4220 MachineFunction &MF) const {
4221 // 32-bit functions have to restore stack pointers when control is transferred
4222 // back to the parent function. These blocks are identified as eh pads that
4223 // are not funclet entries.
4224 bool IsSEH = isAsynchronousEHPersonality(
4226 for (MachineBasicBlock &MBB : MF) {
4227 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4228 if (NeedsRestore)
4230 /*RestoreSP=*/IsSEH);
4231 }
4232}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:851
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1903
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
size_t arg_size() const
Definition: Function.h:847
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:656
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:214
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:548
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:616
OpType getOperation() const
Definition: MCDwarf.h:658
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:564
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:653
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:457
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:455
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1067
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
MachineModuleInfo & getMMI() const
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:561
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:487
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
const MCContext & getContext() const
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:260
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:575
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1729
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1382
unsigned getNumOperands() const
Definition: Metadata.cpp:1378
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:619
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:553
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:203
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:335
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:125
bool isTargetDragonFly() const
Definition: X86Subtarget.h:295
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:313
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:180
bool isTargetDarwin() const
Definition: X86Subtarget.h:293
bool isTargetWin64() const
Definition: X86Subtarget.h:337
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:185
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:399
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:317
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:129
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:350
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:294
bool isTargetNaCl64() const
Definition: X86Subtarget.h:309
bool isTargetWin32() const
Definition: X86Subtarget.h:339
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:232
bool isTargetLinux() const
Definition: X86Subtarget.h:303
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:401
CallingConvention
Definition: Dwarf.h:421
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:439
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
union llvm::TargetFrameLowering::DwarfFrameBase::@236 Location
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76