LLVM 19.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/Function.h"
31#include "llvm/MC/MCAsmInfo.h"
33#include "llvm/MC/MCSymbol.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/LEB128.h"
37#include <cstdlib>
38
39#define DEBUG_TYPE "x86-fl"
40
41STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
42STATISTIC(NumFrameExtraProbe,
43 "Number of extra stack probes generated in prologue");
44STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
73 const MachineFunction &MF) const {
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
88 const MachineFunction &MF) const {
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFP - Return true if the specified function should have a dedicated frame
94/// pointer register. This is true if the function has variable sized allocas
95/// or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64) {
109 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
110}
111
112static unsigned getADDriOpcode(bool IsLP64) {
113 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
114}
115
116static unsigned getSUBrrOpcode(bool IsLP64) {
117 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
118}
119
120static unsigned getADDrrOpcode(bool IsLP64) {
121 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
122}
123
124static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
125 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
126}
127
128static unsigned getLEArOpcode(bool IsLP64) {
129 return IsLP64 ? X86::LEA64r : X86::LEA32r;
130}
131
132static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
133 if (Use64BitReg) {
134 if (isUInt<32>(Imm))
135 return X86::MOV32ri64;
136 if (isInt<32>(Imm))
137 return X86::MOV64ri32;
138 return X86::MOV64ri;
139 }
140 return X86::MOV32ri;
141}
142
143// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144// value written by the PUSH from the stack. The processor tracks these marked
145// instructions internally and fast-forwards register data between matching PUSH
146// and POP instructions, without going through memory or through the training
147// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148// memory-renaming optimization can be used.
149//
150// The PPX hint is purely a performance hint. Instructions with this hint have
151// the same functional semantics as those without. PPX hints set by the
152// compiler that violate the balancing rule may turn off the PPX optimization,
153// but they will not affect program semantics.
154//
155// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156// are not considered).
157//
158// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159// GPRs at a time to/from the stack.
160static unsigned getPUSHOpcode(const X86Subtarget &ST) {
161 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
162 : X86::PUSH32r;
163}
164static unsigned getPOPOpcode(const X86Subtarget &ST) {
165 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
166 : X86::POP32r;
167}
168static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
169 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
170}
171static unsigned getPOP2Opcode(const X86Subtarget &ST) {
172 return ST.hasPPX() ? X86::POP2P : X86::POP2;
173}
174
177 unsigned Reg = RegMask.PhysReg;
178
179 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
180 Reg == X86::AH || Reg == X86::AL)
181 return true;
182 }
183
184 return false;
185}
186
187/// Check if the flags need to be preserved before the terminators.
188/// This would be the case, if the eflags is live-in of the region
189/// composed by the terminators or live-out of that region, without
190/// being defined by a terminator.
191static bool
193 for (const MachineInstr &MI : MBB.terminators()) {
194 bool BreakNext = false;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isReg())
197 continue;
198 Register Reg = MO.getReg();
199 if (Reg != X86::EFLAGS)
200 continue;
201
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
205 if (!MO.isDef())
206 return true;
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
210 BreakNext = true;
211 }
212 // We found a definition of the eflags, no need to preserve them.
213 if (BreakNext)
214 return false;
215 }
216
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock *Succ : MBB.successors())
220 if (Succ->isLiveIn(X86::EFLAGS))
221 return true;
222
223 return false;
224}
225
226/// emitSPUpdate - Emit a series of instructions to increment / decrement the
227/// stack pointer by a constant value.
230 const DebugLoc &DL, int64_t NumBytes,
231 bool InEpilogue) const {
232 bool isSub = NumBytes < 0;
233 uint64_t Offset = isSub ? -NumBytes : NumBytes;
236
237 uint64_t Chunk = (1LL << 31) - 1;
238
242 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
243
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe && !InEpilogue) {
247
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
251 return;
252 } else if (Offset > Chunk) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
255 unsigned Reg = 0;
256 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
257
258 if (isSub && !isEAXLiveIn(MBB))
259 Reg = Rax;
260 else
262
263 unsigned AddSubRROpc =
265 if (Reg) {
267 .addImm(Offset)
268 .setMIFlag(Flag);
269 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
271 .addReg(Reg);
272 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
273 return;
274 } else if (Offset > 8 * Chunk) {
275 // If we would need more than 8 add or sub instructions (a >16GB stack
276 // frame), it's worth spilling RAX to materialize this immediate.
277 // pushq %rax
278 // movabsq +-$Offset+-SlotSize, %rax
279 // addq %rsp, %rax
280 // xchg %rax, (%rsp)
281 // movq (%rsp), %rsp
282 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
283 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
285 .setMIFlag(Flag);
286 // Subtract is not commutative, so negate the offset and always use add.
287 // Subtract 8 less and add 8 more to account for the PUSH we just did.
288 if (isSub)
289 Offset = -(Offset - SlotSize);
290 else
293 .addImm(Offset)
294 .setMIFlag(Flag);
295 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
296 .addReg(Rax)
298 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
299 // Exchange the new SP in RAX with the top of the stack.
301 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
302 StackPtr, false, 0);
303 // Load new SP from the top of the stack into RSP.
304 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
305 StackPtr, false, 0);
306 return;
307 }
308 }
309
310 while (Offset) {
311 uint64_t ThisVal = std::min(Offset, Chunk);
312 if (ThisVal == SlotSize) {
313 // Use push / pop for slot sized adjustments as a size optimization. We
314 // need to find a dead register when using pop.
315 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
317 if (Reg) {
318 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
319 : (Is64Bit ? X86::POP64r : X86::POP32r);
320 BuildMI(MBB, MBBI, DL, TII.get(Opc))
321 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
322 .setMIFlag(Flag);
323 Offset -= ThisVal;
324 continue;
325 }
326 }
327
328 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
329 .setMIFlag(Flag);
330
331 Offset -= ThisVal;
332 }
333}
334
335MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
337 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
338 assert(Offset != 0 && "zero offset stack adjustment requested");
339
340 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
341 // is tricky.
342 bool UseLEA;
343 if (!InEpilogue) {
344 // Check if inserting the prologue at the beginning
345 // of MBB would require to use LEA operations.
346 // We need to use LEA operations if EFLAGS is live in, because
347 // it means an instruction will read it before it gets defined.
348 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
349 } else {
350 // If we can use LEA for SP but we shouldn't, check that none
351 // of the terminators uses the eflags. Otherwise we will insert
352 // a ADD that will redefine the eflags and break the condition.
353 // Alternatively, we could move the ADD, but this may not be possible
354 // and is an optimization anyway.
356 if (UseLEA && !STI.useLeaForSP())
358 // If that assert breaks, that means we do not do the right thing
359 // in canUseAsEpilogue.
361 "We shouldn't have allowed this insertion point");
362 }
363
365 if (UseLEA) {
368 StackPtr),
369 StackPtr, false, Offset);
370 } else {
371 bool IsSub = Offset < 0;
372 uint64_t AbsOffset = IsSub ? -Offset : Offset;
373 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
375 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
377 .addImm(AbsOffset);
378 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
379 }
380 return MI;
381}
382
385 bool doMergeWithPrevious) const {
386 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
387 (!doMergeWithPrevious && MBBI == MBB.end()))
388 return 0;
389
390 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
391
393 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
394 // instruction, and that there are no DBG_VALUE or other instructions between
395 // ADD/SUB/LEA and its corresponding CFI instruction.
396 /* TODO: Add support for the case where there are multiple CFI instructions
397 below the ADD/SUB/LEA, e.g.:
398 ...
399 add
400 cfi_def_cfa_offset
401 cfi_offset
402 ...
403 */
404 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
405 PI = std::prev(PI);
406
407 unsigned Opc = PI->getOpcode();
408 int Offset = 0;
409
410 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
411 PI->getOperand(0).getReg() == StackPtr) {
412 assert(PI->getOperand(1).getReg() == StackPtr);
413 Offset = PI->getOperand(2).getImm();
414 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
415 PI->getOperand(0).getReg() == StackPtr &&
416 PI->getOperand(1).getReg() == StackPtr &&
417 PI->getOperand(2).getImm() == 1 &&
418 PI->getOperand(3).getReg() == X86::NoRegister &&
419 PI->getOperand(5).getReg() == X86::NoRegister) {
420 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
421 Offset = PI->getOperand(4).getImm();
422 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
423 PI->getOperand(0).getReg() == StackPtr) {
424 assert(PI->getOperand(1).getReg() == StackPtr);
425 Offset = -PI->getOperand(2).getImm();
426 } else
427 return 0;
428
429 PI = MBB.erase(PI);
430 if (PI != MBB.end() && PI->isCFIInstruction()) {
431 auto CIs = MBB.getParent()->getFrameInstructions();
432 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
435 PI = MBB.erase(PI);
436 }
437 if (!doMergeWithPrevious)
439
440 return Offset;
441}
442
445 const DebugLoc &DL,
446 const MCCFIInstruction &CFIInst,
447 MachineInstr::MIFlag Flag) const {
449 unsigned CFIIndex = MF.addFrameInst(CFIInst);
450
452 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
453
454 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
455 .addCFIIndex(CFIIndex)
456 .setMIFlag(Flag);
457}
458
459/// Emits Dwarf Info specifying offsets of callee saved registers and
460/// frame pointer. This is called only when basic block sections are enabled.
464 if (!hasFP(MF)) {
466 return;
467 }
468 const MachineModuleInfo &MMI = MF.getMMI();
471 const Register MachineFramePtr =
473 : FramePtr;
474 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
475 // Offset = space for return address + size of the frame pointer itself.
476 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
478 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
480}
481
484 const DebugLoc &DL, bool IsPrologue) const {
486 MachineFrameInfo &MFI = MF.getFrameInfo();
487 MachineModuleInfo &MMI = MF.getMMI();
490
491 // Add callee saved registers to move list.
492 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
493
494 // Calculate offsets.
495 for (const CalleeSavedInfo &I : CSI) {
496 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
497 Register Reg = I.getReg();
498 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
499
500 if (IsPrologue) {
501 if (X86FI->getStackPtrSaveMI()) {
502 // +2*SlotSize because there is return address and ebp at the bottom
503 // of the stack.
504 // | retaddr |
505 // | ebp |
506 // | |<--ebp
507 Offset += 2 * SlotSize;
508 SmallString<64> CfaExpr;
509 CfaExpr.push_back(dwarf::DW_CFA_expression);
510 uint8_t buffer[16];
511 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
512 CfaExpr.push_back(2);
514 const Register MachineFramePtr =
517 : FramePtr;
518 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
519 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
520 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
522 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
524 } else {
526 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
527 }
528 } else {
530 MCCFIInstruction::createRestore(nullptr, DwarfReg));
531 }
532 }
533 if (auto *MI = X86FI->getStackPtrSaveMI()) {
534 int FI = MI->getOperand(1).getIndex();
535 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
536 SmallString<64> CfaExpr;
538 const Register MachineFramePtr =
541 : FramePtr;
542 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
543 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
544 uint8_t buffer[16];
545 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
546 CfaExpr.push_back(dwarf::DW_OP_deref);
547
548 SmallString<64> DefCfaExpr;
549 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
550 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
551 DefCfaExpr.append(CfaExpr.str());
552 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
554 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
556 }
557}
558
559void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
560 MachineBasicBlock &MBB) const {
561 const MachineFunction &MF = *MBB.getParent();
562
563 // Insertion point.
565
566 // Fake a debug loc.
567 DebugLoc DL;
568 if (MBBI != MBB.end())
569 DL = MBBI->getDebugLoc();
570
571 // Zero out FP stack if referenced. Do this outside of the loop below so that
572 // it's done only once.
573 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
574 for (MCRegister Reg : RegsToZero.set_bits()) {
575 if (!X86::RFP80RegClass.contains(Reg))
576 continue;
577
578 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
579 for (unsigned i = 0; i != NumFPRegs; ++i)
580 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
581
582 for (unsigned i = 0; i != NumFPRegs; ++i)
583 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
584 break;
585 }
586
587 // For GPRs, we only care to clear out the 32-bit register.
588 BitVector GPRsToZero(TRI->getNumRegs());
589 for (MCRegister Reg : RegsToZero.set_bits())
590 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
591 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
592 RegsToZero.reset(Reg);
593 }
594
595 // Zero out the GPRs first.
596 for (MCRegister Reg : GPRsToZero.set_bits())
598
599 // Zero out the remaining registers.
600 for (MCRegister Reg : RegsToZero.set_bits())
602}
603
606 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
607 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
610 if (InProlog) {
611 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
612 .addImm(0 /* no explicit stack size */);
613 } else {
614 emitStackProbeInline(MF, MBB, MBBI, DL, false);
615 }
616 } else {
617 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
618 }
619}
620
622 return STI.isOSWindows() && !STI.isTargetWin64();
623}
624
626 MachineBasicBlock &PrologMBB) const {
627 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
628 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
629 });
630 if (Where != PrologMBB.end()) {
631 DebugLoc DL = PrologMBB.findDebugLoc(Where);
632 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
633 Where->eraseFromParent();
634 }
635}
636
637void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
640 const DebugLoc &DL,
641 bool InProlog) const {
643 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
644 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
645 else
646 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
647}
648
649void X86FrameLowering::emitStackProbeInlineGeneric(
651 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
652 MachineInstr &AllocWithProbe = *MBBI;
653 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
654
657 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
658 "different expansion expected for CoreCLR 64 bit");
659
660 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
661 uint64_t ProbeChunk = StackProbeSize * 8;
662
663 uint64_t MaxAlign =
664 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
665
666 // Synthesize a loop or unroll it, depending on the number of iterations.
667 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
668 // between the unaligned rsp and current rsp.
669 if (Offset > ProbeChunk) {
670 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
671 MaxAlign % StackProbeSize);
672 } else {
673 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
674 MaxAlign % StackProbeSize);
675 }
676}
677
678void X86FrameLowering::emitStackProbeInlineGenericBlock(
681 uint64_t AlignOffset) const {
682
683 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
684 const bool HasFP = hasFP(MF);
687 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
688 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
689
690 uint64_t CurrentOffset = 0;
691
692 assert(AlignOffset < StackProbeSize);
693
694 // If the offset is so small it fits within a page, there's nothing to do.
695 if (StackProbeSize < Offset + AlignOffset) {
696
697 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
698 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
700 if (!HasFP && NeedsDwarfCFI) {
701 BuildCFI(
702 MBB, MBBI, DL,
703 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
704 }
705
706 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
708 StackPtr, false, 0)
709 .addImm(0)
711 NumFrameExtraProbe++;
712 CurrentOffset = StackProbeSize - AlignOffset;
713 }
714
715 // For the next N - 1 pages, just probe. I tried to take advantage of
716 // natural probes but it implies much more logic and there was very few
717 // interesting natural probes to interleave.
718 while (CurrentOffset + StackProbeSize < Offset) {
719 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
721
722 if (!HasFP && NeedsDwarfCFI) {
723 BuildCFI(
724 MBB, MBBI, DL,
725 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
726 }
727 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
729 StackPtr, false, 0)
730 .addImm(0)
732 NumFrameExtraProbe++;
733 CurrentOffset += StackProbeSize;
734 }
735
736 // No need to probe the tail, it is smaller than a Page.
737 uint64_t ChunkSize = Offset - CurrentOffset;
738 if (ChunkSize == SlotSize) {
739 // Use push for slot sized adjustments as a size optimization,
740 // like emitSPUpdate does when not probing.
741 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
742 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
743 BuildMI(MBB, MBBI, DL, TII.get(Opc))
746 } else {
747 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
749 }
750 // No need to adjust Dwarf CFA offset here, the last position of the stack has
751 // been defined
752}
753
754void X86FrameLowering::emitStackProbeInlineGenericLoop(
757 uint64_t AlignOffset) const {
758 assert(Offset && "null offset");
759
760 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
762 "Inline stack probe loop will clobber live EFLAGS.");
763
764 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
765 const bool HasFP = hasFP(MF);
768 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
769 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
770
771 if (AlignOffset) {
772 if (AlignOffset < StackProbeSize) {
773 // Perform a first smaller allocation followed by a probe.
774 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
776
777 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
779 StackPtr, false, 0)
780 .addImm(0)
782 NumFrameExtraProbe++;
783 Offset -= AlignOffset;
784 }
785 }
786
787 // Synthesize a loop
788 NumFrameLoopProbe++;
789 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
790
791 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
792 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
793
795 MF.insert(MBBIter, testMBB);
796 MF.insert(MBBIter, tailMBB);
797
798 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
799 : Is64Bit ? X86::R11D
800 : X86::EAX;
801
802 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
805
806 // save loop bound
807 {
808 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
809 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
810 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
811 .addReg(FinalStackProbed)
812 .addImm(BoundOffset)
814
815 // while in the loop, use loop-invariant reg for CFI,
816 // instead of the stack pointer, which changes during the loop
817 if (!HasFP && NeedsDwarfCFI) {
818 // x32 uses the same DWARF register numbers as x86-64,
819 // so there isn't a register number for r11d, we must use r11 instead
820 const Register DwarfFinalStackProbed =
822 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
823 : FinalStackProbed;
824
827 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
829 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
830 }
831 }
832
833 // allocate a page
834 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
835 /*InEpilogue=*/false)
837
838 // touch the page
839 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
841 StackPtr, false, 0)
842 .addImm(0)
844
845 // cmp with stack pointer bound
846 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
848 .addReg(FinalStackProbed)
850
851 // jump
852 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
853 .addMBB(testMBB)
856 testMBB->addSuccessor(testMBB);
857 testMBB->addSuccessor(tailMBB);
858
859 // BB management
860 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
862 MBB.addSuccessor(testMBB);
863
864 // handle tail
865 const uint64_t TailOffset = Offset % StackProbeSize;
866 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
867 if (TailOffset) {
868 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
869 /*InEpilogue=*/false)
871 }
872
873 // after the loop, switch back to stack pointer for CFI
874 if (!HasFP && NeedsDwarfCFI) {
875 // x32 uses the same DWARF register numbers as x86-64,
876 // so there isn't a register number for esp, we must use rsp instead
877 const Register DwarfStackPtr =
881
882 BuildCFI(*tailMBB, TailMBBIter, DL,
884 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
885 }
886
887 // Update Live In information
888 bool anyChange = false;
889 do {
890 anyChange = recomputeLiveIns(*tailMBB) || recomputeLiveIns(*testMBB);
891 } while (anyChange);
892}
893
894void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
896 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
898 assert(STI.is64Bit() && "different expansion needed for 32 bit");
899 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
901 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
902
903 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
905 "Inline stack probe loop will clobber live EFLAGS.");
906
907 // RAX contains the number of bytes of desired stack adjustment.
908 // The handling here assumes this value has already been updated so as to
909 // maintain stack alignment.
910 //
911 // We need to exit with RSP modified by this amount and execute suitable
912 // page touches to notify the OS that we're growing the stack responsibly.
913 // All stack probing must be done without modifying RSP.
914 //
915 // MBB:
916 // SizeReg = RAX;
917 // ZeroReg = 0
918 // CopyReg = RSP
919 // Flags, TestReg = CopyReg - SizeReg
920 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
921 // LimitReg = gs magic thread env access
922 // if FinalReg >= LimitReg goto ContinueMBB
923 // RoundBB:
924 // RoundReg = page address of FinalReg
925 // LoopMBB:
926 // LoopReg = PHI(LimitReg,ProbeReg)
927 // ProbeReg = LoopReg - PageSize
928 // [ProbeReg] = 0
929 // if (ProbeReg > RoundReg) goto LoopMBB
930 // ContinueMBB:
931 // RSP = RSP - RAX
932 // [rest of original MBB]
933
934 // Set up the new basic blocks
935 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
936 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
937 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
938
939 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
940 MF.insert(MBBIter, RoundMBB);
941 MF.insert(MBBIter, LoopMBB);
942 MF.insert(MBBIter, ContinueMBB);
943
944 // Split MBB and move the tail portion down to ContinueMBB.
945 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
946 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
948
949 // Some useful constants
950 const int64_t ThreadEnvironmentStackLimit = 0x10;
951 const int64_t PageSize = 0x1000;
952 const int64_t PageMask = ~(PageSize - 1);
953
954 // Registers we need. For the normal case we use virtual
955 // registers. For the prolog expansion we use RAX, RCX and RDX.
957 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
958 const Register
959 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
960 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
961 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
962 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
963 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
964 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
965 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
966 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
967 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
968
969 // SP-relative offsets where we can save RCX and RDX.
970 int64_t RCXShadowSlot = 0;
971 int64_t RDXShadowSlot = 0;
972
973 // If inlining in the prolog, save RCX and RDX.
974 if (InProlog) {
975 // Compute the offsets. We need to account for things already
976 // pushed onto the stack at this point: return address, frame
977 // pointer (if used), and callee saves.
979 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
980 const bool HasFP = hasFP(MF);
981
982 // Check if we need to spill RCX and/or RDX.
983 // Here we assume that no earlier prologue instruction changes RCX and/or
984 // RDX, so checking the block live-ins is enough.
985 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
986 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
987 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
988 // Assign the initial slot to both registers, then change RDX's slot if both
989 // need to be spilled.
990 if (IsRCXLiveIn)
991 RCXShadowSlot = InitSlot;
992 if (IsRDXLiveIn)
993 RDXShadowSlot = InitSlot;
994 if (IsRDXLiveIn && IsRCXLiveIn)
995 RDXShadowSlot += 8;
996 // Emit the saves if needed.
997 if (IsRCXLiveIn)
998 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
999 RCXShadowSlot)
1000 .addReg(X86::RCX);
1001 if (IsRDXLiveIn)
1002 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1003 RDXShadowSlot)
1004 .addReg(X86::RDX);
1005 } else {
1006 // Not in the prolog. Copy RAX to a virtual reg.
1007 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1008 }
1009
1010 // Add code to MBB to check for overflow and set the new target stack pointer
1011 // to zero if so.
1012 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1013 .addReg(ZeroReg, RegState::Undef)
1014 .addReg(ZeroReg, RegState::Undef);
1015 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1016 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1017 .addReg(CopyReg)
1018 .addReg(SizeReg);
1019 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1020 .addReg(TestReg)
1021 .addReg(ZeroReg)
1023
1024 // FinalReg now holds final stack pointer value, or zero if
1025 // allocation would overflow. Compare against the current stack
1026 // limit from the thread environment block. Note this limit is the
1027 // lowest touched page on the stack, not the point at which the OS
1028 // will cause an overflow exception, so this is just an optimization
1029 // to avoid unnecessarily touching pages that are below the current
1030 // SP but already committed to the stack by the OS.
1031 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1032 .addReg(0)
1033 .addImm(1)
1034 .addReg(0)
1035 .addImm(ThreadEnvironmentStackLimit)
1036 .addReg(X86::GS);
1037 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1038 // Jump if the desired stack pointer is at or above the stack limit.
1039 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1040 .addMBB(ContinueMBB)
1042
1043 // Add code to roundMBB to round the final stack pointer to a page boundary.
1044 RoundMBB->addLiveIn(FinalReg);
1045 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1046 .addReg(FinalReg)
1047 .addImm(PageMask);
1048 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1049
1050 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1051 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1052 // and probe until we reach RoundedReg.
1053 if (!InProlog) {
1054 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1055 .addReg(LimitReg)
1056 .addMBB(RoundMBB)
1057 .addReg(ProbeReg)
1058 .addMBB(LoopMBB);
1059 }
1060
1061 LoopMBB->addLiveIn(JoinReg);
1062 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1063 false, -PageSize);
1064
1065 // Probe by storing a byte onto the stack.
1066 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1067 .addReg(ProbeReg)
1068 .addImm(1)
1069 .addReg(0)
1070 .addImm(0)
1071 .addReg(0)
1072 .addImm(0);
1073
1074 LoopMBB->addLiveIn(RoundedReg);
1075 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1076 .addReg(RoundedReg)
1077 .addReg(ProbeReg);
1078 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1079 .addMBB(LoopMBB)
1081
1082 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1083
1084 // If in prolog, restore RDX and RCX.
1085 if (InProlog) {
1086 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1087 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1088 TII.get(X86::MOV64rm), X86::RCX),
1089 X86::RSP, false, RCXShadowSlot);
1090 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1091 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1092 TII.get(X86::MOV64rm), X86::RDX),
1093 X86::RSP, false, RDXShadowSlot);
1094 }
1095
1096 // Now that the probing is done, add code to continueMBB to update
1097 // the stack pointer for real.
1098 ContinueMBB->addLiveIn(SizeReg);
1099 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1100 .addReg(X86::RSP)
1101 .addReg(SizeReg);
1102
1103 // Add the control flow edges we need.
1104 MBB.addSuccessor(ContinueMBB);
1105 MBB.addSuccessor(RoundMBB);
1106 RoundMBB->addSuccessor(LoopMBB);
1107 LoopMBB->addSuccessor(ContinueMBB);
1108 LoopMBB->addSuccessor(LoopMBB);
1109
1110 // Mark all the instructions added to the prolog as frame setup.
1111 if (InProlog) {
1112 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1113 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1114 }
1115 for (MachineInstr &MI : *RoundMBB) {
1117 }
1118 for (MachineInstr &MI : *LoopMBB) {
1120 }
1121 for (MachineInstr &MI :
1122 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1124 }
1125 }
1126}
1127
1128void X86FrameLowering::emitStackProbeCall(
1130 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1131 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1132 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1133
1134 // FIXME: Add indirect thunk support and remove this.
1135 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1136 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1137 "code model and indirect thunks not yet implemented.");
1138
1139 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1141 "Stack probe calls will clobber live EFLAGS.");
1142
1143 unsigned CallOp;
1144 if (Is64Bit)
1145 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1146 else
1147 CallOp = X86::CALLpcrel32;
1148
1150
1152 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1153
1154 // All current stack probes take AX and SP as input, clobber flags, and
1155 // preserve all registers. x86_64 probes leave RSP unmodified.
1157 // For the large code model, we have to call through a register. Use R11,
1158 // as it is scratch in all supported calling conventions.
1159 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1161 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1162 } else {
1163 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1165 }
1166
1167 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1168 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1174
1175 MachineInstr *ModInst = CI;
1176 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1177 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1178 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1179 // themselves. They also does not clobber %rax so we can reuse it when
1180 // adjusting %rsp.
1181 // All other platforms do not specify a particular ABI for the stack probe
1182 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1183 ModInst =
1185 .addReg(SP)
1186 .addReg(AX);
1187 }
1188
1189 // DebugInfo variable locations -- if there's an instruction number for the
1190 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1191 // modifies SP.
1192 if (InstrNum) {
1193 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1194 // Label destination operand of the subtract.
1195 MF.makeDebugValueSubstitution(*InstrNum,
1196 {ModInst->getDebugInstrNum(), 0});
1197 } else {
1198 // Label the call. The operand number is the penultimate operand, zero
1199 // based.
1200 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1202 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1203 }
1204 }
1205
1206 if (InProlog) {
1207 // Apply the frame setup flag to all inserted instrs.
1208 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1209 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1210 }
1211}
1212
1213static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1214 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1215 // and might require smaller successive adjustments.
1216 const uint64_t Win64MaxSEHOffset = 128;
1217 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1218 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1219 return SEHFrameOffset & -16;
1220}
1221
1222// If we're forcing a stack realignment we can't rely on just the frame
1223// info, we need to know the ABI stack alignment as well in case we
1224// have a call out. Otherwise just make sure we have some alignment - we'll
1225// go with the minimum SlotSize.
1227X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1228 const MachineFrameInfo &MFI = MF.getFrameInfo();
1229 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1230 Align StackAlign = getStackAlign();
1231 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1232 if (HasRealign) {
1233 if (MFI.hasCalls())
1234 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1235 else if (MaxAlign < SlotSize)
1236 MaxAlign = Align(SlotSize);
1237 }
1238
1240 if (HasRealign)
1241 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1242 else
1243 MaxAlign = Align(16);
1244 }
1245 return MaxAlign.value();
1246}
1247
1248void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1250 const DebugLoc &DL, unsigned Reg,
1251 uint64_t MaxAlign) const {
1252 uint64_t Val = -MaxAlign;
1253 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1254
1255 MachineFunction &MF = *MBB.getParent();
1257 const X86TargetLowering &TLI = *STI.getTargetLowering();
1258 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1259 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1260
1261 // We want to make sure that (in worst case) less than StackProbeSize bytes
1262 // are not probed after the AND. This assumption is used in
1263 // emitStackProbeInlineGeneric.
1264 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1265 {
1266 NumFrameLoopProbe++;
1267 MachineBasicBlock *entryMBB =
1269 MachineBasicBlock *headMBB =
1271 MachineBasicBlock *bodyMBB =
1273 MachineBasicBlock *footMBB =
1275
1277 MF.insert(MBBIter, entryMBB);
1278 MF.insert(MBBIter, headMBB);
1279 MF.insert(MBBIter, bodyMBB);
1280 MF.insert(MBBIter, footMBB);
1281 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1282 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1283 : Is64Bit ? X86::R11D
1284 : X86::EAX;
1285
1286 // Setup entry block
1287 {
1288
1289 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1290 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1293 MachineInstr *MI =
1294 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1295 .addReg(FinalStackProbed)
1296 .addImm(Val)
1298
1299 // The EFLAGS implicit def is dead.
1300 MI->getOperand(3).setIsDead();
1301
1302 BuildMI(entryMBB, DL,
1303 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1304 .addReg(FinalStackProbed)
1307 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1308 .addMBB(&MBB)
1311 entryMBB->addSuccessor(headMBB);
1312 entryMBB->addSuccessor(&MBB);
1313 }
1314
1315 // Loop entry block
1316
1317 {
1318 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1319 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1321 .addImm(StackProbeSize)
1323
1324 BuildMI(headMBB, DL,
1325 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1327 .addReg(FinalStackProbed)
1329
1330 // jump to the footer if StackPtr < FinalStackProbed
1331 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1332 .addMBB(footMBB)
1335
1336 headMBB->addSuccessor(bodyMBB);
1337 headMBB->addSuccessor(footMBB);
1338 }
1339
1340 // setup loop body
1341 {
1342 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1344 StackPtr, false, 0)
1345 .addImm(0)
1347
1348 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1349 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1351 .addImm(StackProbeSize)
1353
1354 // cmp with stack pointer bound
1355 BuildMI(bodyMBB, DL,
1356 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1357 .addReg(FinalStackProbed)
1360
1361 // jump back while FinalStackProbed < StackPtr
1362 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1363 .addMBB(bodyMBB)
1366 bodyMBB->addSuccessor(bodyMBB);
1367 bodyMBB->addSuccessor(footMBB);
1368 }
1369
1370 // setup loop footer
1371 {
1372 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1373 .addReg(FinalStackProbed)
1375 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1377 StackPtr, false, 0)
1378 .addImm(0)
1380 footMBB->addSuccessor(&MBB);
1381 }
1382
1383 bool anyChange = false;
1384 do {
1385 anyChange = recomputeLiveIns(*footMBB) || recomputeLiveIns(*bodyMBB) ||
1387 } while (anyChange);
1388 }
1389 } else {
1390 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1391 .addReg(Reg)
1392 .addImm(Val)
1394
1395 // The EFLAGS implicit def is dead.
1396 MI->getOperand(3).setIsDead();
1397 }
1398}
1399
1401 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1402 // clobbered by any interrupt handler.
1403 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1404 "MF used frame lowering for wrong subtarget");
1405 const Function &Fn = MF.getFunction();
1406 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1407 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1408}
1409
1410/// Return true if we need to use the restricted Windows x64 prologue and
1411/// epilogue code patterns that can be described with WinCFI (.seh_*
1412/// directives).
1413bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1414 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1415}
1416
1417bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1418 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1419}
1420
1421/// Return true if an opcode is part of the REP group of instructions
1422static bool isOpcodeRep(unsigned Opcode) {
1423 switch (Opcode) {
1424 case X86::REPNE_PREFIX:
1425 case X86::REP_MOVSB_32:
1426 case X86::REP_MOVSB_64:
1427 case X86::REP_MOVSD_32:
1428 case X86::REP_MOVSD_64:
1429 case X86::REP_MOVSQ_32:
1430 case X86::REP_MOVSQ_64:
1431 case X86::REP_MOVSW_32:
1432 case X86::REP_MOVSW_64:
1433 case X86::REP_PREFIX:
1434 case X86::REP_STOSB_32:
1435 case X86::REP_STOSB_64:
1436 case X86::REP_STOSD_32:
1437 case X86::REP_STOSD_64:
1438 case X86::REP_STOSQ_32:
1439 case X86::REP_STOSQ_64:
1440 case X86::REP_STOSW_32:
1441 case X86::REP_STOSW_64:
1442 return true;
1443 default:
1444 break;
1445 }
1446 return false;
1447}
1448
1449/// emitPrologue - Push callee-saved registers onto the stack, which
1450/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1451/// space for local variables. Also emit labels used by the exception handler to
1452/// generate the exception handling frames.
1453
1454/*
1455 Here's a gist of what gets emitted:
1456
1457 ; Establish frame pointer, if needed
1458 [if needs FP]
1459 push %rbp
1460 .cfi_def_cfa_offset 16
1461 .cfi_offset %rbp, -16
1462 .seh_pushreg %rpb
1463 mov %rsp, %rbp
1464 .cfi_def_cfa_register %rbp
1465
1466 ; Spill general-purpose registers
1467 [for all callee-saved GPRs]
1468 pushq %<reg>
1469 [if not needs FP]
1470 .cfi_def_cfa_offset (offset from RETADDR)
1471 .seh_pushreg %<reg>
1472
1473 ; If the required stack alignment > default stack alignment
1474 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1475 ; of unknown size in the stack frame.
1476 [if stack needs re-alignment]
1477 and $MASK, %rsp
1478
1479 ; Allocate space for locals
1480 [if target is Windows and allocated space > 4096 bytes]
1481 ; Windows needs special care for allocations larger
1482 ; than one page.
1483 mov $NNN, %rax
1484 call ___chkstk_ms/___chkstk
1485 sub %rax, %rsp
1486 [else]
1487 sub $NNN, %rsp
1488
1489 [if needs FP]
1490 .seh_stackalloc (size of XMM spill slots)
1491 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1492 [else]
1493 .seh_stackalloc NNN
1494
1495 ; Spill XMMs
1496 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1497 ; they may get spilled on any platform, if the current function
1498 ; calls @llvm.eh.unwind.init
1499 [if needs FP]
1500 [for all callee-saved XMM registers]
1501 movaps %<xmm reg>, -MMM(%rbp)
1502 [for all callee-saved XMM registers]
1503 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1504 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1505 [else]
1506 [for all callee-saved XMM registers]
1507 movaps %<xmm reg>, KKK(%rsp)
1508 [for all callee-saved XMM registers]
1509 .seh_savexmm %<xmm reg>, KKK
1510
1511 .seh_endprologue
1512
1513 [if needs base pointer]
1514 mov %rsp, %rbx
1515 [if needs to restore base pointer]
1516 mov %rsp, -MMM(%rbp)
1517
1518 ; Emit CFI info
1519 [if needs FP]
1520 [for all callee-saved registers]
1521 .cfi_offset %<reg>, (offset from %rbp)
1522 [else]
1523 .cfi_def_cfa_offset (offset from RETADDR)
1524 [for all callee-saved registers]
1525 .cfi_offset %<reg>, (offset from %rsp)
1526
1527 Notes:
1528 - .seh directives are emitted only for Windows 64 ABI
1529 - .cv_fpo directives are emitted on win32 when emitting CodeView
1530 - .cfi directives are emitted for all other ABIs
1531 - for 32-bit code, substitute %e?? registers for %r??
1532*/
1533
1535 MachineBasicBlock &MBB) const {
1536 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1537 "MF used frame lowering for wrong subtarget");
1539 MachineFrameInfo &MFI = MF.getFrameInfo();
1540 const Function &Fn = MF.getFunction();
1541 MachineModuleInfo &MMI = MF.getMMI();
1543 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1544 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1545 bool IsFunclet = MBB.isEHFuncletEntry();
1547 if (Fn.hasPersonalityFn())
1548 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1549 bool FnHasClrFunclet =
1550 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1551 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1552 bool HasFP = hasFP(MF);
1553 bool IsWin64Prologue = isWin64Prologue(MF);
1554 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1555 // FIXME: Emit FPO data for EH funclets.
1556 bool NeedsWinFPO =
1557 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1558 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1559 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1561 const Register MachineFramePtr =
1563 : FramePtr;
1564 Register BasePtr = TRI->getBaseRegister();
1565 bool HasWinCFI = false;
1566
1567 // Debug location must be unknown since the first debug location is used
1568 // to determine the end of the prologue.
1569 DebugLoc DL;
1570 Register ArgBaseReg;
1571
1572 // Emit extra prolog for argument stack slot reference.
1573 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1574 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1575 // Creat extra prolog for stack realignment.
1576 ArgBaseReg = MI->getOperand(0).getReg();
1577 // leal 4(%esp), %basereg
1578 // .cfi_def_cfa %basereg, 0
1579 // andl $-128, %esp
1580 // pushl -4(%basereg)
1581 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1582 ArgBaseReg)
1584 .addImm(1)
1585 .addUse(X86::NoRegister)
1587 .addUse(X86::NoRegister)
1589 if (NeedsDwarfCFI) {
1590 // .cfi_def_cfa %basereg, 0
1591 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1592 BuildCFI(MBB, MBBI, DL,
1593 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1595 }
1596 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1597 int64_t Offset = -(int64_t)SlotSize;
1598 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1599 .addReg(ArgBaseReg)
1600 .addImm(1)
1601 .addReg(X86::NoRegister)
1602 .addImm(Offset)
1603 .addReg(X86::NoRegister)
1605 }
1606
1607 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1608 // tail call.
1609 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1610 if (TailCallArgReserveSize && IsWin64Prologue)
1611 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1612
1613 const bool EmitStackProbeCall =
1615 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1616
1617 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1621 // The special symbol below is absolute and has a *value* suitable to be
1622 // combined with the frame pointer directly.
1623 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1624 .addUse(MachineFramePtr)
1625 .addUse(X86::RIP)
1626 .addImm(1)
1627 .addUse(X86::NoRegister)
1628 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1630 .addUse(X86::NoRegister);
1631 break;
1632 }
1633 [[fallthrough]];
1634
1636 assert(
1637 !IsWin64Prologue &&
1638 "win64 prologue does not set the bit 60 in the saved frame pointer");
1639 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1640 .addUse(MachineFramePtr)
1641 .addImm(60)
1643 break;
1644
1646 break;
1647 }
1648 }
1649
1650 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1651 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1652 // stack alignment.
1654 Fn.arg_size() == 2) {
1655 StackSize += 8;
1656 MFI.setStackSize(StackSize);
1657
1658 // Update the stack pointer by pushing a register. This is the instruction
1659 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1660 // Hard-coding the update to a push avoids emitting a second
1661 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1662 // probing isn't needed anyways for an 8-byte update.
1663 // Pushing a register leaves us in a similar situation to a regular
1664 // function call where we know that the address at (rsp-8) is writeable.
1665 // That way we avoid any off-by-ones with stack probing for additional
1666 // stack pointer updates later on.
1667 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1668 .addReg(X86::RAX, RegState::Undef)
1670 }
1671
1672 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1673 // function, and use up to 128 bytes of stack space, don't have a frame
1674 // pointer, calls, or dynamic alloca then we do not need to adjust the
1675 // stack pointer (we fit in the Red Zone). We also check that we don't
1676 // push and pop from the stack.
1677 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1678 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1679 !MFI.adjustsStack() && // No calls.
1680 !EmitStackProbeCall && // No stack probes.
1681 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1682 !MF.shouldSplitStack()) { // Regular stack
1683 uint64_t MinSize =
1685 if (HasFP)
1686 MinSize += SlotSize;
1687 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1688 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1689 MFI.setStackSize(StackSize);
1690 }
1691
1692 // Insert stack pointer adjustment for later moving of return addr. Only
1693 // applies to tail call optimized functions where the callee argument stack
1694 // size is bigger than the callers.
1695 if (TailCallArgReserveSize != 0) {
1696 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1697 /*InEpilogue=*/false)
1699 }
1700
1701 // Mapping for machine moves:
1702 //
1703 // DST: VirtualFP AND
1704 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1705 // ELSE => DW_CFA_def_cfa
1706 //
1707 // SRC: VirtualFP AND
1708 // DST: Register => DW_CFA_def_cfa_register
1709 //
1710 // ELSE
1711 // OFFSET < 0 => DW_CFA_offset_extended_sf
1712 // REG < 64 => DW_CFA_offset + Reg
1713 // ELSE => DW_CFA_offset_extended
1714
1715 uint64_t NumBytes = 0;
1716 int stackGrowth = -SlotSize;
1717
1718 // Find the funclet establisher parameter
1719 Register Establisher = X86::NoRegister;
1720 if (IsClrFunclet)
1721 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1722 else if (IsFunclet)
1723 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1724
1725 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1726 // Immediately spill establisher into the home slot.
1727 // The runtime cares about this.
1728 // MOV64mr %rdx, 16(%rsp)
1729 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1730 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1731 .addReg(Establisher)
1733 MBB.addLiveIn(Establisher);
1734 }
1735
1736 if (HasFP) {
1737 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1738
1739 // Calculate required stack adjustment.
1740 uint64_t FrameSize = StackSize - SlotSize;
1741 NumBytes =
1742 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1743
1744 // Callee-saved registers are pushed on stack before the stack is realigned.
1745 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1746 NumBytes = alignTo(NumBytes, MaxAlign);
1747
1748 // Save EBP/RBP into the appropriate stack slot.
1749 BuildMI(MBB, MBBI, DL,
1751 .addReg(MachineFramePtr, RegState::Kill)
1753
1754 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1755 // Mark the place where EBP/RBP was saved.
1756 // Define the current CFA rule to use the provided offset.
1757 assert(StackSize);
1758 BuildCFI(MBB, MBBI, DL,
1760 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1762
1763 // Change the rule for the FramePtr to be an "offset" rule.
1764 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1765 BuildCFI(MBB, MBBI, DL,
1766 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1767 2 * stackGrowth -
1768 (int)TailCallArgReserveSize),
1770 }
1771
1772 if (NeedsWinCFI) {
1773 HasWinCFI = true;
1774 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1777 }
1778
1779 if (!IsFunclet) {
1780 if (X86FI->hasSwiftAsyncContext()) {
1781 assert(!IsWin64Prologue &&
1782 "win64 prologue does not store async context right below rbp");
1783 const auto &Attrs = MF.getFunction().getAttributes();
1784
1785 // Before we update the live frame pointer we have to ensure there's a
1786 // valid (or null) asynchronous context in its slot just before FP in
1787 // the frame record, so store it now.
1788 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1789 // We have an initial context in r14, store it just before the frame
1790 // pointer.
1791 MBB.addLiveIn(X86::R14);
1792 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1793 .addReg(X86::R14)
1795 } else {
1796 // No initial context, store null so that there's no pointer that
1797 // could be misused.
1798 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1799 .addImm(0)
1801 }
1802
1803 if (NeedsWinCFI) {
1804 HasWinCFI = true;
1805 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1806 .addImm(X86::R14)
1808 }
1809
1810 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1811 .addUse(X86::RSP)
1812 .addImm(1)
1813 .addUse(X86::NoRegister)
1814 .addImm(8)
1815 .addUse(X86::NoRegister)
1817 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1818 .addUse(X86::RSP)
1819 .addImm(8)
1821 }
1822
1823 if (!IsWin64Prologue && !IsFunclet) {
1824 // Update EBP with the new base value.
1825 if (!X86FI->hasSwiftAsyncContext())
1826 BuildMI(MBB, MBBI, DL,
1827 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1828 FramePtr)
1831
1832 if (NeedsDwarfCFI) {
1833 if (ArgBaseReg.isValid()) {
1834 SmallString<64> CfaExpr;
1835 CfaExpr.push_back(dwarf::DW_CFA_expression);
1836 uint8_t buffer[16];
1837 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1838 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1839 CfaExpr.push_back(2);
1840 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1841 CfaExpr.push_back(0);
1842 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1843 BuildCFI(MBB, MBBI, DL,
1844 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1846 } else {
1847 // Mark effective beginning of when frame pointer becomes valid.
1848 // Define the current CFA to use the EBP/RBP register.
1849 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1850 BuildCFI(
1851 MBB, MBBI, DL,
1852 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1854 }
1855 }
1856
1857 if (NeedsWinFPO) {
1858 // .cv_fpo_setframe $FramePtr
1859 HasWinCFI = true;
1860 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1862 .addImm(0)
1864 }
1865 }
1866 }
1867 } else {
1868 assert(!IsFunclet && "funclets without FPs not yet implemented");
1869 NumBytes =
1870 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1871 }
1872
1873 // Update the offset adjustment, which is mainly used by codeview to translate
1874 // from ESP to VFRAME relative local variable offsets.
1875 if (!IsFunclet) {
1876 if (HasFP && TRI->hasStackRealignment(MF))
1877 MFI.setOffsetAdjustment(-NumBytes);
1878 else
1879 MFI.setOffsetAdjustment(-StackSize);
1880 }
1881
1882 // For EH funclets, only allocate enough space for outgoing calls. Save the
1883 // NumBytes value that we would've used for the parent frame.
1884 unsigned ParentFrameNumBytes = NumBytes;
1885 if (IsFunclet)
1886 NumBytes = getWinEHFuncletFrameSize(MF);
1887
1888 // Skip the callee-saved push instructions.
1889 bool PushedRegs = false;
1890 int StackOffset = 2 * stackGrowth;
1892 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1893 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1894 return false;
1895 unsigned Opc = MBBI->getOpcode();
1896 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1897 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1898 };
1899
1900 while (IsCSPush(MBBI)) {
1901 PushedRegs = true;
1902 Register Reg = MBBI->getOperand(0).getReg();
1903 LastCSPush = MBBI;
1904 ++MBBI;
1905 unsigned Opc = LastCSPush->getOpcode();
1906
1907 if (!HasFP && NeedsDwarfCFI) {
1908 // Mark callee-saved push instruction.
1909 // Define the current CFA rule to use the provided offset.
1910 assert(StackSize);
1911 // Compared to push, push2 introduces more stack offset (one more
1912 // register).
1913 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1914 StackOffset += stackGrowth;
1915 BuildCFI(MBB, MBBI, DL,
1918 StackOffset += stackGrowth;
1919 }
1920
1921 if (NeedsWinCFI) {
1922 HasWinCFI = true;
1923 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1924 .addImm(Reg)
1926 if (Opc == X86::PUSH2 || Opc == X86::PUSH2P)
1927 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1928 .addImm(LastCSPush->getOperand(1).getReg())
1930 }
1931 }
1932
1933 // Realign stack after we pushed callee-saved registers (so that we'll be
1934 // able to calculate their offsets from the frame pointer).
1935 // Don't do this for Win64, it needs to realign the stack after the prologue.
1936 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1937 !ArgBaseReg.isValid()) {
1938 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1939 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1940
1941 if (NeedsWinCFI) {
1942 HasWinCFI = true;
1943 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1944 .addImm(MaxAlign)
1946 }
1947 }
1948
1949 // If there is an SUB32ri of ESP immediately before this instruction, merge
1950 // the two. This can be the case when tail call elimination is enabled and
1951 // the callee has more arguments then the caller.
1952 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1953
1954 // Adjust stack pointer: ESP -= numbytes.
1955
1956 // Windows and cygwin/mingw require a prologue helper routine when allocating
1957 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1958 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1959 // stack and adjust the stack pointer in one go. The 64-bit version of
1960 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1961 // responsible for adjusting the stack pointer. Touching the stack at 4K
1962 // increments is necessary to ensure that the guard pages used by the OS
1963 // virtual memory manager are allocated in correct sequence.
1964 uint64_t AlignedNumBytes = NumBytes;
1965 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1966 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1967 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1968 assert(!X86FI->getUsesRedZone() &&
1969 "The Red Zone is not accounted for in stack probes");
1970
1971 // Check whether EAX is livein for this block.
1972 bool isEAXAlive = isEAXLiveIn(MBB);
1973
1974 if (isEAXAlive) {
1975 if (Is64Bit) {
1976 // Save RAX
1977 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1978 .addReg(X86::RAX, RegState::Kill)
1980 } else {
1981 // Save EAX
1982 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1983 .addReg(X86::EAX, RegState::Kill)
1985 }
1986 }
1987
1988 if (Is64Bit) {
1989 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1990 // Function prologue is responsible for adjusting the stack pointer.
1991 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1992 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1993 .addImm(Alloc)
1995 } else {
1996 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1997 // We'll also use 4 already allocated bytes for EAX.
1998 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1999 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2001 }
2002
2003 // Call __chkstk, __chkstk_ms, or __alloca.
2004 emitStackProbe(MF, MBB, MBBI, DL, true);
2005
2006 if (isEAXAlive) {
2007 // Restore RAX/EAX
2009 if (Is64Bit)
2010 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2011 StackPtr, false, NumBytes - 8);
2012 else
2013 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2014 StackPtr, false, NumBytes - 4);
2015 MI->setFlag(MachineInstr::FrameSetup);
2016 MBB.insert(MBBI, MI);
2017 }
2018 } else if (NumBytes) {
2019 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2020 }
2021
2022 if (NeedsWinCFI && NumBytes) {
2023 HasWinCFI = true;
2024 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2025 .addImm(NumBytes)
2027 }
2028
2029 int SEHFrameOffset = 0;
2030 unsigned SPOrEstablisher;
2031 if (IsFunclet) {
2032 if (IsClrFunclet) {
2033 // The establisher parameter passed to a CLR funclet is actually a pointer
2034 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2035 // to find the root function establisher frame by loading the PSPSym from
2036 // the intermediate frame.
2037 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2038 MachinePointerInfo NoInfo;
2039 MBB.addLiveIn(Establisher);
2040 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2041 Establisher, false, PSPSlotOffset)
2044 ;
2045 // Save the root establisher back into the current funclet's (mostly
2046 // empty) frame, in case a sub-funclet or the GC needs it.
2047 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2048 false, PSPSlotOffset)
2049 .addReg(Establisher)
2051 NoInfo,
2054 }
2055 SPOrEstablisher = Establisher;
2056 } else {
2057 SPOrEstablisher = StackPtr;
2058 }
2059
2060 if (IsWin64Prologue && HasFP) {
2061 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2062 // this calculation on the incoming establisher, which holds the value of
2063 // RSP from the parent frame at the end of the prologue.
2064 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2065 if (SEHFrameOffset)
2066 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2067 SPOrEstablisher, false, SEHFrameOffset);
2068 else
2069 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2070 .addReg(SPOrEstablisher);
2071
2072 // If this is not a funclet, emit the CFI describing our frame pointer.
2073 if (NeedsWinCFI && !IsFunclet) {
2074 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2075 HasWinCFI = true;
2076 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2078 .addImm(SEHFrameOffset)
2080 if (isAsynchronousEHPersonality(Personality))
2081 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2082 }
2083 } else if (IsFunclet && STI.is32Bit()) {
2084 // Reset EBP / ESI to something good for funclets.
2086 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2087 // into the registration node so that the runtime will restore it for us.
2088 if (!MBB.isCleanupFuncletEntry()) {
2089 assert(Personality == EHPersonality::MSVC_CXX);
2090 Register FrameReg;
2092 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2093 // ESP is the first field, so no extra displacement is needed.
2094 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2095 false, EHRegOffset)
2096 .addReg(X86::ESP);
2097 }
2098 }
2099
2100 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2101 const MachineInstr &FrameInstr = *MBBI;
2102 ++MBBI;
2103
2104 if (NeedsWinCFI) {
2105 int FI;
2106 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2107 if (X86::FR64RegClass.contains(Reg)) {
2108 int Offset;
2109 Register IgnoredFrameReg;
2110 if (IsWin64Prologue && IsFunclet)
2111 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2112 else
2113 Offset =
2114 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2115 SEHFrameOffset;
2116
2117 HasWinCFI = true;
2118 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2119 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2120 .addImm(Reg)
2121 .addImm(Offset)
2123 }
2124 }
2125 }
2126 }
2127
2128 if (NeedsWinCFI && HasWinCFI)
2129 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2131
2132 if (FnHasClrFunclet && !IsFunclet) {
2133 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2134 // immediately after the prolog) into the PSPSlot so that funclets
2135 // and the GC can recover it.
2136 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2137 auto PSPInfo = MachinePointerInfo::getFixedStack(
2139 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2140 PSPSlotOffset)
2145 }
2146
2147 // Realign stack after we spilled callee-saved registers (so that we'll be
2148 // able to calculate their offsets from the frame pointer).
2149 // Win64 requires aligning the stack after the prologue.
2150 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2151 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2152 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2153 }
2154
2155 // We already dealt with stack realignment and funclets above.
2156 if (IsFunclet && STI.is32Bit())
2157 return;
2158
2159 // If we need a base pointer, set it up here. It's whatever the value
2160 // of the stack pointer is at this point. Any variable size objects
2161 // will be allocated after this, so we can still use the base pointer
2162 // to reference locals.
2163 if (TRI->hasBasePointer(MF)) {
2164 // Update the base pointer with the current stack pointer.
2165 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2166 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2167 .addReg(SPOrEstablisher)
2169 if (X86FI->getRestoreBasePointer()) {
2170 // Stash value of base pointer. Saving RSP instead of EBP shortens
2171 // dependence chain. Used by SjLj EH.
2172 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2173 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2175 .addReg(SPOrEstablisher)
2177 }
2178
2179 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2180 // Stash the value of the frame pointer relative to the base pointer for
2181 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2182 // it recovers the frame pointer from the base pointer rather than the
2183 // other way around.
2184 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2185 Register UsedReg;
2186 int Offset =
2187 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2188 .getFixed();
2189 assert(UsedReg == BasePtr);
2190 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2193 }
2194 }
2195 if (ArgBaseReg.isValid()) {
2196 // Save argument base pointer.
2197 auto *MI = X86FI->getStackPtrSaveMI();
2198 int FI = MI->getOperand(1).getIndex();
2199 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2200 // movl %basereg, offset(%ebp)
2201 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2202 .addReg(ArgBaseReg)
2204 }
2205
2206 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2207 // Mark end of stack pointer adjustment.
2208 if (!HasFP && NumBytes) {
2209 // Define the current CFA rule to use the provided offset.
2210 assert(StackSize);
2211 BuildCFI(
2212 MBB, MBBI, DL,
2213 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2215 }
2216
2217 // Emit DWARF info specifying the offsets of the callee-saved registers.
2219 }
2220
2221 // X86 Interrupt handling function cannot assume anything about the direction
2222 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2223 // in each prologue of interrupt handler function.
2224 //
2225 // Create "cld" instruction only in these cases:
2226 // 1. The interrupt handling function uses any of the "rep" instructions.
2227 // 2. Interrupt handling function calls another function.
2228 // 3. If there are any inline asm blocks, as we do not know what they do
2229 //
2230 // TODO: We should also emit cld if we detect the use of std, but as of now,
2231 // the compiler does not even emit that instruction or even define it, so in
2232 // practice, this would only happen with inline asm, which we cover anyway.
2234 bool NeedsCLD = false;
2235
2236 for (const MachineBasicBlock &B : MF) {
2237 for (const MachineInstr &MI : B) {
2238 if (MI.isCall()) {
2239 NeedsCLD = true;
2240 break;
2241 }
2242
2243 if (isOpcodeRep(MI.getOpcode())) {
2244 NeedsCLD = true;
2245 break;
2246 }
2247
2248 if (MI.isInlineAsm()) {
2249 // TODO: Parse asm for rep instructions or call sites?
2250 // For now, let's play it safe and emit a cld instruction
2251 // just in case.
2252 NeedsCLD = true;
2253 break;
2254 }
2255 }
2256 }
2257
2258 if (NeedsCLD) {
2259 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2261 }
2262 }
2263
2264 // At this point we know if the function has WinCFI or not.
2265 MF.setHasWinCFI(HasWinCFI);
2266}
2267
2269 const MachineFunction &MF) const {
2270 // We can't use LEA instructions for adjusting the stack pointer if we don't
2271 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2272 // to deallocate the stack.
2273 // This means that we can use LEA for SP in two situations:
2274 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2275 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2276 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2277}
2278
2280 switch (MI.getOpcode()) {
2281 case X86::CATCHRET:
2282 case X86::CLEANUPRET:
2283 return true;
2284 default:
2285 return false;
2286 }
2287 llvm_unreachable("impossible");
2288}
2289
2290// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2291// stack. It holds a pointer to the bottom of the root function frame. The
2292// establisher frame pointer passed to a nested funclet may point to the
2293// (mostly empty) frame of its parent funclet, but it will need to find
2294// the frame of the root function to access locals. To facilitate this,
2295// every funclet copies the pointer to the bottom of the root function
2296// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2297// same offset for the PSPSym in the root function frame that's used in the
2298// funclets' frames allows each funclet to dynamically accept any ancestor
2299// frame as its establisher argument (the runtime doesn't guarantee the
2300// immediate parent for some reason lost to history), and also allows the GC,
2301// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2302// frame with only a single offset reported for the entire method.
2303unsigned
2304X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2305 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2306 Register SPReg;
2307 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2308 /*IgnoreSPUpdates*/ true)
2309 .getFixed();
2310 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2311 return static_cast<unsigned>(Offset);
2312}
2313
2314unsigned
2315X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2317 // This is the size of the pushed CSRs.
2318 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2319 // This is the size of callee saved XMMs.
2320 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2321 unsigned XMMSize =
2322 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2323 // This is the amount of stack a funclet needs to allocate.
2324 unsigned UsedSize;
2325 EHPersonality Personality =
2327 if (Personality == EHPersonality::CoreCLR) {
2328 // CLR funclets need to hold enough space to include the PSPSym, at the
2329 // same offset from the stack pointer (immediately after the prolog) as it
2330 // resides at in the main function.
2331 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2332 } else {
2333 // Other funclets just need enough stack for outgoing call arguments.
2334 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2335 }
2336 // RBP is not included in the callee saved register block. After pushing RBP,
2337 // everything is 16 byte aligned. Everything we allocate before an outgoing
2338 // call must also be 16 byte aligned.
2339 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2340 // Subtract out the size of the callee saved registers. This is how much stack
2341 // each funclet will allocate.
2342 return FrameSizeMinusRBP + XMMSize - CSSize;
2343}
2344
2345static bool isTailCallOpcode(unsigned Opc) {
2346 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2347 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2348 Opc == X86::TCRETURNdi64 || Opc == X86::TCRETURNmi64;
2349}
2350
2352 MachineBasicBlock &MBB) const {
2353 const MachineFrameInfo &MFI = MF.getFrameInfo();
2356 MachineBasicBlock::iterator MBBI = Terminator;
2357 DebugLoc DL;
2358 if (MBBI != MBB.end())
2359 DL = MBBI->getDebugLoc();
2360 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2361 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2363 Register MachineFramePtr =
2364 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2365
2366 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2367 bool NeedsWin64CFI =
2368 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2369 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2370
2371 // Get the number of bytes to allocate from the FrameInfo.
2372 uint64_t StackSize = MFI.getStackSize();
2373 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2374 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2375 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2376 bool HasFP = hasFP(MF);
2377 uint64_t NumBytes = 0;
2378
2379 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2381 MF.needsFrameMoves();
2382
2383 Register ArgBaseReg;
2384 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2385 unsigned Opc = X86::LEA32r;
2386 Register StackReg = X86::ESP;
2387 ArgBaseReg = MI->getOperand(0).getReg();
2388 if (STI.is64Bit()) {
2389 Opc = X86::LEA64r;
2390 StackReg = X86::RSP;
2391 }
2392 // leal -4(%basereg), %esp
2393 // .cfi_def_cfa %esp, 4
2394 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2395 .addUse(ArgBaseReg)
2396 .addImm(1)
2397 .addUse(X86::NoRegister)
2398 .addImm(-(int64_t)SlotSize)
2399 .addUse(X86::NoRegister)
2401 if (NeedsDwarfCFI) {
2402 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2403 BuildCFI(MBB, MBBI, DL,
2404 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2406 --MBBI;
2407 }
2408 --MBBI;
2409 }
2410
2411 if (IsFunclet) {
2412 assert(HasFP && "EH funclets without FP not yet implemented");
2413 NumBytes = getWinEHFuncletFrameSize(MF);
2414 } else if (HasFP) {
2415 // Calculate required stack adjustment.
2416 uint64_t FrameSize = StackSize - SlotSize;
2417 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2418
2419 // Callee-saved registers were pushed on stack before the stack was
2420 // realigned.
2421 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2422 NumBytes = alignTo(FrameSize, MaxAlign);
2423 } else {
2424 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2425 }
2426 uint64_t SEHStackAllocAmt = NumBytes;
2427
2428 // AfterPop is the position to insert .cfi_restore.
2430 if (HasFP) {
2431 if (X86FI->hasSwiftAsyncContext()) {
2432 // Discard the context.
2433 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2434 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2435 }
2436 // Pop EBP.
2437 BuildMI(MBB, MBBI, DL,
2439 MachineFramePtr)
2441
2442 // We need to reset FP to its untagged state on return. Bit 60 is currently
2443 // used to show the presence of an extended frame.
2444 if (X86FI->hasSwiftAsyncContext()) {
2445 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2446 .addUse(MachineFramePtr)
2447 .addImm(60)
2449 }
2450
2451 if (NeedsDwarfCFI) {
2452 if (!ArgBaseReg.isValid()) {
2453 unsigned DwarfStackPtr =
2454 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2455 BuildCFI(MBB, MBBI, DL,
2456 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2458 }
2459 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2460 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2461 BuildCFI(MBB, AfterPop, DL,
2462 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2464 --MBBI;
2465 --AfterPop;
2466 }
2467 --MBBI;
2468 }
2469 }
2470
2471 MachineBasicBlock::iterator FirstCSPop = MBBI;
2472 // Skip the callee-saved pop instructions.
2473 while (MBBI != MBB.begin()) {
2474 MachineBasicBlock::iterator PI = std::prev(MBBI);
2475 unsigned Opc = PI->getOpcode();
2476
2477 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2478 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2479 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2480 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2481 Opc != X86::POP2P && Opc != X86::LEA64r))
2482 break;
2483 FirstCSPop = PI;
2484 }
2485
2486 --MBBI;
2487 }
2488 if (ArgBaseReg.isValid()) {
2489 // Restore argument base pointer.
2490 auto *MI = X86FI->getStackPtrSaveMI();
2491 int FI = MI->getOperand(1).getIndex();
2492 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2493 // movl offset(%ebp), %basereg
2494 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2496 }
2497 MBBI = FirstCSPop;
2498
2499 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2500 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2501
2502 if (MBBI != MBB.end())
2503 DL = MBBI->getDebugLoc();
2504 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2505 // instruction, merge the two instructions.
2506 if (NumBytes || MFI.hasVarSizedObjects())
2507 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2508
2509 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2510 // slot before popping them off! Same applies for the case, when stack was
2511 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2512 // will not do realignment or dynamic stack allocation.
2513 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2514 !IsFunclet) {
2515 if (TRI->hasStackRealignment(MF))
2516 MBBI = FirstCSPop;
2517 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2518 uint64_t LEAAmount =
2519 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2520
2521 if (X86FI->hasSwiftAsyncContext())
2522 LEAAmount -= 16;
2523
2524 // There are only two legal forms of epilogue:
2525 // - add SEHAllocationSize, %rsp
2526 // - lea SEHAllocationSize(%FramePtr), %rsp
2527 //
2528 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2529 // However, we may use this sequence if we have a frame pointer because the
2530 // effects of the prologue can safely be undone.
2531 if (LEAAmount != 0) {
2532 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2534 false, LEAAmount);
2535 --MBBI;
2536 } else {
2537 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2538 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr).addReg(FramePtr);
2539 --MBBI;
2540 }
2541 } else if (NumBytes) {
2542 // Adjust stack pointer back: ESP += numbytes.
2543 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2544 if (!HasFP && NeedsDwarfCFI) {
2545 // Define the current CFA rule to use the provided offset.
2546 BuildCFI(MBB, MBBI, DL,
2548 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2550 }
2551 --MBBI;
2552 }
2553
2554 // Windows unwinder will not invoke function's exception handler if IP is
2555 // either in prologue or in epilogue. This behavior causes a problem when a
2556 // call immediately precedes an epilogue, because the return address points
2557 // into the epilogue. To cope with that, we insert an epilogue marker here,
2558 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2559 // final emitted code.
2560 if (NeedsWin64CFI && MF.hasWinCFI())
2561 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2562
2563 if (!HasFP && NeedsDwarfCFI) {
2564 MBBI = FirstCSPop;
2565 int64_t Offset = -CSSize - SlotSize;
2566 // Mark callee-saved pop instruction.
2567 // Define the current CFA rule to use the provided offset.
2568 while (MBBI != MBB.end()) {
2570 unsigned Opc = PI->getOpcode();
2571 ++MBBI;
2572 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2573 Opc == X86::POP2 || Opc == X86::POP2P) {
2574 Offset += SlotSize;
2575 // Compared to pop, pop2 introduces more stack offset (one more
2576 // register).
2577 if (Opc == X86::POP2 || Opc == X86::POP2P)
2578 Offset += SlotSize;
2579 BuildCFI(MBB, MBBI, DL,
2582 }
2583 }
2584 }
2585
2586 // Emit DWARF info specifying the restores of the callee-saved registers.
2587 // For epilogue with return inside or being other block without successor,
2588 // no need to generate .cfi_restore for callee-saved registers.
2589 if (NeedsDwarfCFI && !MBB.succ_empty())
2590 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2591
2592 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2593 // Add the return addr area delta back since we are not tail calling.
2594 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2595 assert(Offset >= 0 && "TCDelta should never be positive");
2596 if (Offset) {
2597 // Check for possible merge with preceding ADD instruction.
2598 Offset += mergeSPUpdates(MBB, Terminator, true);
2599 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2600 }
2601 }
2602
2603 // Emit tilerelease for AMX kernel.
2604 if (X86FI->hasVirtualTileReg())
2605 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2606}
2607
2609 int FI,
2610 Register &FrameReg) const {
2611 const MachineFrameInfo &MFI = MF.getFrameInfo();
2612
2613 bool IsFixed = MFI.isFixedObjectIndex(FI);
2614 // We can't calculate offset from frame pointer if the stack is realigned,
2615 // so enforce usage of stack/base pointer. The base pointer is used when we
2616 // have dynamic allocas in addition to dynamic realignment.
2617 if (TRI->hasBasePointer(MF))
2618 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2619 else if (TRI->hasStackRealignment(MF))
2620 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2621 else
2622 FrameReg = TRI->getFrameRegister(MF);
2623
2624 // Offset will hold the offset from the stack pointer at function entry to the
2625 // object.
2626 // We need to factor in additional offsets applied during the prologue to the
2627 // frame, base, and stack pointer depending on which is used.
2630 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2631 uint64_t StackSize = MFI.getStackSize();
2632 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2633 int64_t FPDelta = 0;
2634
2635 // In an x86 interrupt, remove the offset we added to account for the return
2636 // address from any stack object allocated in the caller's frame. Interrupts
2637 // do not have a standard return address. Fixed objects in the current frame,
2638 // such as SSE register spills, should not get this treatment.
2640 Offset >= 0) {
2642 }
2643
2644 if (IsWin64Prologue) {
2645 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2646
2647 // Calculate required stack adjustment.
2648 uint64_t FrameSize = StackSize - SlotSize;
2649 // If required, include space for extra hidden slot for stashing base
2650 // pointer.
2651 if (X86FI->getRestoreBasePointer())
2652 FrameSize += SlotSize;
2653 uint64_t NumBytes = FrameSize - CSSize;
2654
2655 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2656 if (FI && FI == X86FI->getFAIndex())
2657 return StackOffset::getFixed(-SEHFrameOffset);
2658
2659 // FPDelta is the offset from the "traditional" FP location of the old base
2660 // pointer followed by return address and the location required by the
2661 // restricted Win64 prologue.
2662 // Add FPDelta to all offsets below that go through the frame pointer.
2663 FPDelta = FrameSize - SEHFrameOffset;
2664 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2665 "FPDelta isn't aligned per the Win64 ABI!");
2666 }
2667
2668 if (FrameReg == TRI->getFramePtr()) {
2669 // Skip saved EBP/RBP
2670 Offset += SlotSize;
2671
2672 // Account for restricted Windows prologue.
2673 Offset += FPDelta;
2674
2675 // Skip the RETADDR move area
2676 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2677 if (TailCallReturnAddrDelta < 0)
2678 Offset -= TailCallReturnAddrDelta;
2679
2681 }
2682
2683 // FrameReg is either the stack pointer or a base pointer. But the base is
2684 // located at the end of the statically known StackSize so the distinction
2685 // doesn't really matter.
2686 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2687 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2688 return StackOffset::getFixed(Offset + StackSize);
2689}
2690
2692 Register &FrameReg) const {
2693 const MachineFrameInfo &MFI = MF.getFrameInfo();
2695 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2696 const auto it = WinEHXMMSlotInfo.find(FI);
2697
2698 if (it == WinEHXMMSlotInfo.end())
2699 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2700
2701 FrameReg = TRI->getStackRegister();
2703 it->second;
2704}
2705
2708 Register &FrameReg,
2709 int Adjustment) const {
2710 const MachineFrameInfo &MFI = MF.getFrameInfo();
2711 FrameReg = TRI->getStackRegister();
2712 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2713 getOffsetOfLocalArea() + Adjustment);
2714}
2715
2718 int FI, Register &FrameReg,
2719 bool IgnoreSPUpdates) const {
2720
2721 const MachineFrameInfo &MFI = MF.getFrameInfo();
2722 // Does not include any dynamic realign.
2723 const uint64_t StackSize = MFI.getStackSize();
2724 // LLVM arranges the stack as follows:
2725 // ...
2726 // ARG2
2727 // ARG1
2728 // RETADDR
2729 // PUSH RBP <-- RBP points here
2730 // PUSH CSRs
2731 // ~~~~~~~ <-- possible stack realignment (non-win64)
2732 // ...
2733 // STACK OBJECTS
2734 // ... <-- RSP after prologue points here
2735 // ~~~~~~~ <-- possible stack realignment (win64)
2736 //
2737 // if (hasVarSizedObjects()):
2738 // ... <-- "base pointer" (ESI/RBX) points here
2739 // DYNAMIC ALLOCAS
2740 // ... <-- RSP points here
2741 //
2742 // Case 1: In the simple case of no stack realignment and no dynamic
2743 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2744 // with fixed offsets from RSP.
2745 //
2746 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2747 // stack objects are addressed with RBP and regular stack objects with RSP.
2748 //
2749 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2750 // to address stack arguments for outgoing calls and nothing else. The "base
2751 // pointer" points to local variables, and RBP points to fixed objects.
2752 //
2753 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2754 // answer we give is relative to the SP after the prologue, and not the
2755 // SP in the middle of the function.
2756
2757 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2758 !STI.isTargetWin64())
2759 return getFrameIndexReference(MF, FI, FrameReg);
2760
2761 // If !hasReservedCallFrame the function might have SP adjustement in the
2762 // body. So, even though the offset is statically known, it depends on where
2763 // we are in the function.
2764 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2765 return getFrameIndexReference(MF, FI, FrameReg);
2766
2767 // We don't handle tail calls, and shouldn't be seeing them either.
2769 "we don't handle this case!");
2770
2771 // This is how the math works out:
2772 //
2773 // %rsp grows (i.e. gets lower) left to right. Each box below is
2774 // one word (eight bytes). Obj0 is the stack slot we're trying to
2775 // get to.
2776 //
2777 // ----------------------------------
2778 // | BP | Obj0 | Obj1 | ... | ObjN |
2779 // ----------------------------------
2780 // ^ ^ ^ ^
2781 // A B C E
2782 //
2783 // A is the incoming stack pointer.
2784 // (B - A) is the local area offset (-8 for x86-64) [1]
2785 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2786 //
2787 // |(E - B)| is the StackSize (absolute value, positive). For a
2788 // stack that grown down, this works out to be (B - E). [3]
2789 //
2790 // E is also the value of %rsp after stack has been set up, and we
2791 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2792 // (C - E) == (C - A) - (B - A) + (B - E)
2793 // { Using [1], [2] and [3] above }
2794 // == getObjectOffset - LocalAreaOffset + StackSize
2795
2796 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2797}
2798
2801 std::vector<CalleeSavedInfo> &CSI) const {
2802 MachineFrameInfo &MFI = MF.getFrameInfo();
2804
2805 unsigned CalleeSavedFrameSize = 0;
2806 unsigned XMMCalleeSavedFrameSize = 0;
2807 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2808 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2809
2810 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2811
2812 if (TailCallReturnAddrDelta < 0) {
2813 // create RETURNADDR area
2814 // arg
2815 // arg
2816 // RETADDR
2817 // { ...
2818 // RETADDR area
2819 // ...
2820 // }
2821 // [EBP]
2822 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2823 TailCallReturnAddrDelta - SlotSize, true);
2824 }
2825
2826 // Spill the BasePtr if it's used.
2827 if (this->TRI->hasBasePointer(MF)) {
2828 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2829 if (MF.hasEHFunclets()) {
2831 X86FI->setHasSEHFramePtrSave(true);
2832 X86FI->setSEHFramePtrSaveIndex(FI);
2833 }
2834 }
2835
2836 if (hasFP(MF)) {
2837 // emitPrologue always spills frame register the first thing.
2838 SpillSlotOffset -= SlotSize;
2839 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2840
2841 // The async context lives directly before the frame pointer, and we
2842 // allocate a second slot to preserve stack alignment.
2843 if (X86FI->hasSwiftAsyncContext()) {
2844 SpillSlotOffset -= SlotSize;
2845 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2846 SpillSlotOffset -= SlotSize;
2847 }
2848
2849 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2850 // the frame register, we can delete it from CSI list and not have to worry
2851 // about avoiding it later.
2852 Register FPReg = TRI->getFrameRegister(MF);
2853 for (unsigned i = 0; i < CSI.size(); ++i) {
2854 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
2855 CSI.erase(CSI.begin() + i);
2856 break;
2857 }
2858 }
2859 }
2860
2861 // Strategy:
2862 // 1. Use push2 when
2863 // a) number of CSR > 1 if no need padding
2864 // b) number of CSR > 2 if need padding
2865 // 2. When the number of CSR push is odd
2866 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2867 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2868 // 3. When the number of CSR push is even, start to use push2 from the 1st
2869 // push and make the stack 16B aligned before the push
2870 unsigned NumRegsForPush2 = 0;
2871 if (STI.hasPush2Pop2()) {
2872 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
2873 return X86::GR64RegClass.contains(I.getReg());
2874 });
2875 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
2876 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
2877 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
2878 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
2879 if (X86FI->padForPush2Pop2()) {
2880 SpillSlotOffset -= SlotSize;
2881 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2882 }
2883 }
2884
2885 // Assign slots for GPRs. It increases frame size.
2886 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2887 Register Reg = I.getReg();
2888
2889 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2890 continue;
2891
2892 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2893 // or only an odd number of registers in the candidates.
2894 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
2895 (SpillSlotOffset % 16 == 0 ||
2896 X86FI->getNumCandidatesForPush2Pop2() % 2))
2897 X86FI->addCandidateForPush2Pop2(Reg);
2898
2899 SpillSlotOffset -= SlotSize;
2900 CalleeSavedFrameSize += SlotSize;
2901
2902 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2903 I.setFrameIdx(SlotIndex);
2904 }
2905
2906 // Adjust the offset of spill slot as we know the accurate callee saved frame
2907 // size.
2908 if (X86FI->getRestoreBasePointer()) {
2909 SpillSlotOffset -= SlotSize;
2910 CalleeSavedFrameSize += SlotSize;
2911
2912 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2913 // TODO: saving the slot index is better?
2914 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2915 }
2916 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2917 "Expect even candidates for push2/pop2");
2918 if (X86FI->getNumCandidatesForPush2Pop2())
2919 ++NumFunctionUsingPush2Pop2;
2920 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2921 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2922
2923 // Assign slots for XMMs.
2924 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2925 Register Reg = I.getReg();
2926 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2927 continue;
2928
2929 // If this is k-register make sure we lookup via the largest legal type.
2930 MVT VT = MVT::Other;
2931 if (X86::VK16RegClass.contains(Reg))
2932 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2933
2934 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2935 unsigned Size = TRI->getSpillSize(*RC);
2936 Align Alignment = TRI->getSpillAlign(*RC);
2937 // ensure alignment
2938 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2939 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2940
2941 // spill into slot
2942 SpillSlotOffset -= Size;
2943 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2944 I.setFrameIdx(SlotIndex);
2945 MFI.ensureMaxAlignment(Alignment);
2946
2947 // Save the start offset and size of XMM in stack frame for funclets.
2948 if (X86::VR128RegClass.contains(Reg)) {
2949 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2950 XMMCalleeSavedFrameSize += Size;
2951 }
2952 }
2953
2954 return true;
2955}
2956
2961
2962 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2963 // for us, and there are no XMM CSRs on Win32.
2964 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2965 return true;
2966
2967 // Push GPRs. It increases frame size.
2968 const MachineFunction &MF = *MBB.getParent();
2970 if (X86FI->padForPush2Pop2())
2971 emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false);
2972
2973 // Update LiveIn of the basic block and decide whether we can add a kill flag
2974 // to the use.
2975 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
2976 const MachineRegisterInfo &MRI = MF.getRegInfo();
2977 // Do not set a kill flag on values that are also marked as live-in. This
2978 // happens with the @llvm-returnaddress intrinsic and with arguments
2979 // passed in callee saved registers.
2980 // Omitting the kill flags is conservatively correct even if the live-in
2981 // is not used after all.
2982 if (MRI.isLiveIn(Reg))
2983 return false;
2984 MBB.addLiveIn(Reg);
2985 // Check if any subregister is live-in
2986 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
2987 if (MRI.isLiveIn(*AReg))
2988 return false;
2989 return true;
2990 };
2991 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
2992 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
2993 };
2994
2995 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
2996 Register Reg = RI->getReg();
2997 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2998 continue;
2999
3000 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3001 Register Reg2 = (++RI)->getReg();
3003 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3004 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3006 } else {
3007 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3008 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3010 }
3011 }
3012
3013 if (X86FI->getRestoreBasePointer()) {
3014 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3015 Register BaseReg = this->TRI->getBaseRegister();
3016 BuildMI(MBB, MI, DL, TII.get(Opc))
3017 .addReg(BaseReg, getKillRegState(true))
3019 }
3020
3021 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3022 // It can be done by spilling XMMs to stack frame.
3023 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3024 Register Reg = I.getReg();
3025 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3026 continue;
3027
3028 // If this is k-register make sure we lookup via the largest legal type.
3029 MVT VT = MVT::Other;
3030 if (X86::VK16RegClass.contains(Reg))
3031 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3032
3033 // Add the callee-saved register as live-in. It's killed at the spill.
3034 MBB.addLiveIn(Reg);
3035 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3036
3037 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
3038 Register());
3039 --MI;
3040 MI->setFlag(MachineInstr::FrameSetup);
3041 ++MI;
3042 }
3043
3044 return true;
3045}
3046
3047void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3049 MachineInstr *CatchRet) const {
3050 // SEH shouldn't use catchret.
3053 "SEH should not use CATCHRET");
3054 const DebugLoc &DL = CatchRet->getDebugLoc();
3055 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3056
3057 // Fill EAX/RAX with the address of the target block.
3058 if (STI.is64Bit()) {
3059 // LEA64r CatchRetTarget(%rip), %rax
3060 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3061 .addReg(X86::RIP)
3062 .addImm(0)
3063 .addReg(0)
3064 .addMBB(CatchRetTarget)
3065 .addReg(0);
3066 } else {
3067 // MOV32ri $CatchRetTarget, %eax
3068 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3069 .addMBB(CatchRetTarget);
3070 }
3071
3072 // Record that we've taken the address of CatchRetTarget and no longer just
3073 // reference it in a terminator.
3074 CatchRetTarget->setMachineBlockAddressTaken();
3075}
3076
3080 if (CSI.empty())
3081 return false;
3082
3083 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3084 // Don't restore CSRs in 32-bit EH funclets. Matches
3085 // spillCalleeSavedRegisters.
3086 if (STI.is32Bit())
3087 return true;
3088 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3089 // funclets. emitEpilogue transforms these to normal jumps.
3090 if (MI->getOpcode() == X86::CATCHRET) {
3091 const Function &F = MBB.getParent()->getFunction();
3092 bool IsSEH = isAsynchronousEHPersonality(
3093 classifyEHPersonality(F.getPersonalityFn()));
3094 if (IsSEH)
3095 return true;
3096 }
3097 }
3098
3100
3101 // Reload XMMs from stack frame.
3102 for (const CalleeSavedInfo &I : CSI) {
3103 Register Reg = I.getReg();
3104 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3105 continue;
3106
3107 // If this is k-register make sure we lookup via the largest legal type.
3108 MVT VT = MVT::Other;
3109 if (X86::VK16RegClass.contains(Reg))
3110 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
3111
3112 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
3113 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
3114 Register());
3115 }
3116
3117 // Clear the stack slot for spill base pointer register.
3118 MachineFunction &MF = *MBB.getParent();
3120 if (X86FI->getRestoreBasePointer()) {
3121 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3122 Register BaseReg = this->TRI->getBaseRegister();
3123 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3125 }
3126
3127 // POP GPRs.
3128 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3129 Register Reg = I->getReg();
3130 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3131 continue;
3132
3133 if (X86FI->isCandidateForPush2Pop2(Reg))
3134 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3137 else
3138 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3140 }
3141 if (X86FI->padForPush2Pop2())
3142 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3143
3144 return true;
3145}
3146
3148 BitVector &SavedRegs,
3149 RegScavenger *RS) const {
3151
3152 // Spill the BasePtr if it's used.
3153 if (TRI->hasBasePointer(MF)) {
3154 Register BasePtr = TRI->getBaseRegister();
3155 if (STI.isTarget64BitILP32())
3156 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3157 SavedRegs.set(BasePtr);
3158 }
3159}
3160
3161static bool HasNestArgument(const MachineFunction *MF) {
3162 const Function &F = MF->getFunction();
3163 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3164 I++) {
3165 if (I->hasNestAttr() && !I->use_empty())
3166 return true;
3167 }
3168 return false;
3169}
3170
3171/// GetScratchRegister - Get a temp register for performing work in the
3172/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3173/// and the properties of the function either one or two registers will be
3174/// needed. Set primary to true for the first register, false for the second.
3175static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3176 const MachineFunction &MF, bool Primary) {
3178
3179 // Erlang stuff.
3181 if (Is64Bit)
3182 return Primary ? X86::R14 : X86::R13;
3183 else
3184 return Primary ? X86::EBX : X86::EDI;
3185 }
3186
3187 if (Is64Bit) {
3188 if (IsLP64)
3189 return Primary ? X86::R11 : X86::R12;
3190 else
3191 return Primary ? X86::R11D : X86::R12D;
3192 }
3193
3194 bool IsNested = HasNestArgument(&MF);
3195
3199 if (IsNested)
3200 report_fatal_error("Segmented stacks does not support fastcall with "
3201 "nested function.");
3202 return Primary ? X86::EAX : X86::ECX;
3203 }
3204 if (IsNested)
3205 return Primary ? X86::EDX : X86::EAX;
3206 return Primary ? X86::ECX : X86::EAX;
3207}
3208
3209// The stack limit in the TCB is set to this many bytes above the actual stack
3210// limit.
3212
3214 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3215 MachineFrameInfo &MFI = MF.getFrameInfo();
3216 uint64_t StackSize;
3217 unsigned TlsReg, TlsOffset;
3218 DebugLoc DL;
3219
3220 // To support shrink-wrapping we would need to insert the new blocks
3221 // at the right place and update the branches to PrologueMBB.
3222 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3223
3224 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3225 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3226 "Scratch register is live-in");
3227
3228 if (MF.getFunction().isVarArg())
3229 report_fatal_error("Segmented stacks do not support vararg functions.");
3230 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3233 report_fatal_error("Segmented stacks not supported on this platform.");
3234
3235 // Eventually StackSize will be calculated by a link-time pass; which will
3236 // also decide whether checking code needs to be injected into this particular
3237 // prologue.
3238 StackSize = MFI.getStackSize();
3239
3240 if (!MFI.needsSplitStackProlog())
3241 return;
3242
3246 bool IsNested = false;
3247
3248 // We need to know if the function has a nest argument only in 64 bit mode.
3249 if (Is64Bit)
3250 IsNested = HasNestArgument(&MF);
3251
3252 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3253 // allocMBB needs to be last (terminating) instruction.
3254
3255 for (const auto &LI : PrologueMBB.liveins()) {
3256 allocMBB->addLiveIn(LI);
3257 checkMBB->addLiveIn(LI);
3258 }
3259
3260 if (IsNested)
3261 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3262
3263 MF.push_front(allocMBB);
3264 MF.push_front(checkMBB);
3265
3266 // When the frame size is less than 256 we just compare the stack
3267 // boundary directly to the value of the stack pointer, per gcc.
3268 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3269
3270 // Read the limit off the current stacklet off the stack_guard location.
3271 if (Is64Bit) {
3272 if (STI.isTargetLinux()) {
3273 TlsReg = X86::FS;
3274 TlsOffset = IsLP64 ? 0x70 : 0x40;
3275 } else if (STI.isTargetDarwin()) {
3276 TlsReg = X86::GS;
3277 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3278 } else if (STI.isTargetWin64()) {
3279 TlsReg = X86::GS;
3280 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3281 } else if (STI.isTargetFreeBSD()) {
3282 TlsReg = X86::FS;
3283 TlsOffset = 0x18;
3284 } else if (STI.isTargetDragonFly()) {
3285 TlsReg = X86::FS;
3286 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3287 } else {
3288 report_fatal_error("Segmented stacks not supported on this platform.");
3289 }
3290
3291 if (CompareStackPointer)
3292 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3293 else
3294 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3295 ScratchReg)
3296 .addReg(X86::RSP)
3297 .addImm(1)
3298 .addReg(0)
3299 .addImm(-StackSize)
3300 .addReg(0);
3301
3302 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3303 .addReg(ScratchReg)
3304 .addReg(0)
3305 .addImm(1)
3306 .addReg(0)
3307 .addImm(TlsOffset)
3308 .addReg(TlsReg);
3309 } else {
3310 if (STI.isTargetLinux()) {
3311 TlsReg = X86::GS;
3312 TlsOffset = 0x30;
3313 } else if (STI.isTargetDarwin()) {
3314 TlsReg = X86::GS;
3315 TlsOffset = 0x48 + 90 * 4;
3316 } else if (STI.isTargetWin32()) {
3317 TlsReg = X86::FS;
3318 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3319 } else if (STI.isTargetDragonFly()) {
3320 TlsReg = X86::FS;
3321 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3322 } else if (STI.isTargetFreeBSD()) {
3323 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3324 } else {
3325 report_fatal_error("Segmented stacks not supported on this platform.");
3326 }
3327
3328 if (CompareStackPointer)
3329 ScratchReg = X86::ESP;
3330 else
3331 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3332 .addReg(X86::ESP)
3333 .addImm(1)
3334 .addReg(0)
3335 .addImm(-StackSize)
3336 .addReg(0);
3337
3340 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3341 .addReg(ScratchReg)
3342 .addReg(0)
3343 .addImm(0)
3344 .addReg(0)
3345 .addImm(TlsOffset)
3346 .addReg(TlsReg);
3347 } else if (STI.isTargetDarwin()) {
3348
3349 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3350 unsigned ScratchReg2;
3351 bool SaveScratch2;
3352 if (CompareStackPointer) {
3353 // The primary scratch register is available for holding the TLS offset.
3354 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3355 SaveScratch2 = false;
3356 } else {
3357 // Need to use a second register to hold the TLS offset
3358 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3359
3360 // Unfortunately, with fastcc the second scratch register may hold an
3361 // argument.
3362 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3363 }
3364
3365 // If Scratch2 is live-in then it needs to be saved.
3366 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3367 "Scratch register is live-in and not saved");
3368
3369 if (SaveScratch2)
3370 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3371 .addReg(ScratchReg2, RegState::Kill);
3372
3373 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3374 .addImm(TlsOffset);
3375 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3376 .addReg(ScratchReg)
3377 .addReg(ScratchReg2)
3378 .addImm(1)
3379 .addReg(0)
3380 .addImm(0)
3381 .addReg(TlsReg);
3382
3383 if (SaveScratch2)
3384 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3385 }
3386 }
3387
3388 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3389 // It jumps to normal execution of the function body.
3390 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3391 .addMBB(&PrologueMBB)
3393
3394 // On 32 bit we first push the arguments size and then the frame size. On 64
3395 // bit, we pass the stack frame size in r10 and the argument size in r11.
3396 if (Is64Bit) {
3397 // Functions with nested arguments use R10, so it needs to be saved across
3398 // the call to _morestack
3399
3400 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3401 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3402 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3403 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3404
3405 if (IsNested)
3406 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3407
3408 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3409 .addImm(StackSize);
3410 BuildMI(allocMBB, DL,
3412 Reg11)
3413 .addImm(X86FI->getArgumentStackSize());
3414 } else {
3415 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3416 .addImm(X86FI->getArgumentStackSize());
3417 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3418 }
3419
3420 // __morestack is in libgcc
3422 // Under the large code model, we cannot assume that __morestack lives
3423 // within 2^31 bytes of the call site, so we cannot use pc-relative
3424 // addressing. We cannot perform the call via a temporary register,
3425 // as the rax register may be used to store the static chain, and all
3426 // other suitable registers may be either callee-save or used for
3427 // parameter passing. We cannot use the stack at this point either
3428 // because __morestack manipulates the stack directly.
3429 //
3430 // To avoid these issues, perform an indirect call via a read-only memory
3431 // location containing the address.
3432 //
3433 // This solution is not perfect, as it assumes that the .rodata section
3434 // is laid out within 2^31 bytes of each function body, but this seems
3435 // to be sufficient for JIT.
3436 // FIXME: Add retpoline support and remove the error here..
3438 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3439 "code model and thunks not yet implemented.");
3440 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3441 .addReg(X86::RIP)
3442 .addImm(0)
3443 .addReg(0)
3444 .addExternalSymbol("__morestack_addr")
3445 .addReg(0);
3446 } else {
3447 if (Is64Bit)
3448 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3449 .addExternalSymbol("__morestack");
3450 else
3451 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3452 .addExternalSymbol("__morestack");
3453 }
3454
3455 if (IsNested)
3456 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3457 else
3458 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3459
3460 allocMBB->addSuccessor(&PrologueMBB);
3461
3462 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3463 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3464
3465#ifdef EXPENSIVE_CHECKS
3466 MF.verify();
3467#endif
3468}
3469
3470/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3471/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3472/// to fields it needs, through a named metadata node "hipe.literals" containing
3473/// name-value pairs.
3474static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3475 const StringRef LiteralName) {
3476 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3477 MDNode *Node = HiPELiteralsMD->getOperand(i);
3478 if (Node->getNumOperands() != 2)
3479 continue;
3480 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3481 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3482 if (!NodeName || !NodeVal)
3483 continue;
3484 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3485 if (ValConst && NodeName->getString() == LiteralName) {
3486 return ValConst->getZExtValue();
3487 }
3488 }
3489
3490 report_fatal_error("HiPE literal " + LiteralName +
3491 " required but not provided");
3492}
3493
3494// Return true if there are no non-ehpad successors to MBB and there are no
3495// non-meta instructions between MBBI and MBB.end().
3498 return llvm::all_of(
3499 MBB.successors(),
3500 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3501 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3502 return MI.isMetaInstruction();
3503 });
3504}
3505
3506/// Erlang programs may need a special prologue to handle the stack size they
3507/// might need at runtime. That is because Erlang/OTP does not implement a C
3508/// stack but uses a custom implementation of hybrid stack/heap architecture.
3509/// (for more information see Eric Stenman's Ph.D. thesis:
3510/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3511///
3512/// CheckStack:
3513/// temp0 = sp - MaxStack
3514/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3515/// OldStart:
3516/// ...
3517/// IncStack:
3518/// call inc_stack # doubles the stack space
3519/// temp0 = sp - MaxStack
3520/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3522 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3523 MachineFrameInfo &MFI = MF.getFrameInfo();
3524 DebugLoc DL;
3525
3526 // To support shrink-wrapping we would need to insert the new blocks
3527 // at the right place and update the branches to PrologueMBB.
3528 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3529
3530 // HiPE-specific values
3531 NamedMDNode *HiPELiteralsMD =
3532 MF.getMMI().getModule()->getNamedMetadata("hipe.literals");
3533 if (!HiPELiteralsMD)
3535 "Can't generate HiPE prologue without runtime parameters");
3536 const unsigned HipeLeafWords = getHiPELiteral(
3537 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3538 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3539 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3540 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3541 ? MF.getFunction().arg_size() - CCRegisteredArgs
3542 : 0;
3543 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3544
3546 "HiPE prologue is only supported on Linux operating systems.");
3547
3548 // Compute the largest caller's frame that is needed to fit the callees'
3549 // frames. This 'MaxStack' is computed from:
3550 //
3551 // a) the fixed frame size, which is the space needed for all spilled temps,
3552 // b) outgoing on-stack parameter areas, and
3553 // c) the minimum stack space this function needs to make available for the
3554 // functions it calls (a tunable ABI property).
3555 if (MFI.hasCalls()) {
3556 unsigned MoreStackForCalls = 0;
3557
3558 for (auto &MBB : MF) {
3559 for (auto &MI : MBB) {
3560 if (!MI.isCall())
3561 continue;
3562
3563 // Get callee operand.
3564 const MachineOperand &MO = MI.getOperand(0);
3565
3566 // Only take account of global function calls (no closures etc.).
3567 if (!MO.isGlobal())
3568 continue;
3569
3570 const Function *F = dyn_cast<Function>(MO.getGlobal());
3571 if (!F)
3572 continue;
3573
3574 // Do not update 'MaxStack' for primitive and built-in functions
3575 // (encoded with names either starting with "erlang."/"bif_" or not
3576 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3577 // "_", such as the BIF "suspend_0") as they are executed on another
3578 // stack.
3579 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3580 F->getName().find_first_of("._") == StringRef::npos)
3581 continue;
3582
3583 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3584 ? F->arg_size() - CCRegisteredArgs
3585 : 0;
3586 if (HipeLeafWords - 1 > CalleeStkArity)
3587 MoreStackForCalls =
3588 std::max(MoreStackForCalls,
3589 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3590 }
3591 }
3592 MaxStack += MoreStackForCalls;
3593 }
3594
3595 // If the stack frame needed is larger than the guaranteed then runtime checks
3596 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3597 if (MaxStack > Guaranteed) {
3598 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3599 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3600
3601 for (const auto &LI : PrologueMBB.liveins()) {
3602 stackCheckMBB->addLiveIn(LI);
3603 incStackMBB->addLiveIn(LI);
3604 }
3605
3606 MF.push_front(incStackMBB);
3607 MF.push_front(stackCheckMBB);
3608
3609 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3610 unsigned LEAop, CMPop, CALLop;
3611 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3612 if (Is64Bit) {
3613 SPReg = X86::RSP;
3614 PReg = X86::RBP;
3615 LEAop = X86::LEA64r;
3616 CMPop = X86::CMP64rm;
3617 CALLop = X86::CALL64pcrel32;
3618 } else {
3619 SPReg = X86::ESP;
3620 PReg = X86::EBP;
3621 LEAop = X86::LEA32r;
3622 CMPop = X86::CMP32rm;
3623 CALLop = X86::CALLpcrel32;
3624 }
3625
3626 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3627 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3628 "HiPE prologue scratch register is live-in");
3629
3630 // Create new MBB for StackCheck:
3631 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3632 false, -MaxStack);
3633 // SPLimitOffset is in a fixed heap location (pointed by BP).
3634 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3635 PReg, false, SPLimitOffset);
3636 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3637 .addMBB(&PrologueMBB)
3639
3640 // Create new MBB for IncStack:
3641 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3642 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3643 false, -MaxStack);
3644 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3645 PReg, false, SPLimitOffset);
3646 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3647 .addMBB(incStackMBB)
3649
3650 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3651 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3652 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3653 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3654 }
3655#ifdef EXPENSIVE_CHECKS
3656 MF.verify();
3657#endif
3658}
3659
3660bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3662 const DebugLoc &DL,
3663 int Offset) const {
3664 if (Offset <= 0)
3665 return false;
3666
3667 if (Offset % SlotSize)
3668 return false;
3669
3670 int NumPops = Offset / SlotSize;
3671 // This is only worth it if we have at most 2 pops.
3672 if (NumPops != 1 && NumPops != 2)
3673 return false;
3674
3675 // Handle only the trivial case where the adjustment directly follows
3676 // a call. This is the most common one, anyway.
3677 if (MBBI == MBB.begin())
3678 return false;
3679 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3680 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3681 return false;
3682
3683 unsigned Regs[2];
3684 unsigned FoundRegs = 0;
3685
3687 const MachineOperand &RegMask = Prev->getOperand(1);
3688
3689 auto &RegClass =
3690 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3691 // Try to find up to NumPops free registers.
3692 for (auto Candidate : RegClass) {
3693 // Poor man's liveness:
3694 // Since we're immediately after a call, any register that is clobbered
3695 // by the call and not defined by it can be considered dead.
3696 if (!RegMask.clobbersPhysReg(Candidate))
3697 continue;
3698
3699 // Don't clobber reserved registers
3700 if (MRI.isReserved(Candidate))
3701 continue;
3702
3703 bool IsDef = false;
3704 for (const MachineOperand &MO : Prev->implicit_operands()) {
3705 if (MO.isReg() && MO.isDef() &&
3706 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3707 IsDef = true;
3708 break;
3709 }
3710 }
3711
3712 if (IsDef)
3713 continue;
3714
3715 Regs[FoundRegs++] = Candidate;
3716 if (FoundRegs == (unsigned)NumPops)
3717 break;
3718 }
3719
3720 if (FoundRegs == 0)
3721 return false;
3722
3723 // If we found only one free register, but need two, reuse the same one twice.
3724 while (FoundRegs < (unsigned)NumPops)
3725 Regs[FoundRegs++] = Regs[0];
3726
3727 for (int i = 0; i < NumPops; ++i)
3728 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3729 Regs[i]);
3730
3731 return true;
3732}
3733
3737 bool reserveCallFrame = hasReservedCallFrame(MF);
3738 unsigned Opcode = I->getOpcode();
3739 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3740 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3741 uint64_t Amount = TII.getFrameSize(*I);
3742 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3743 I = MBB.erase(I);
3744 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3745
3746 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3747 // typically because the function is marked noreturn (abort, throw,
3748 // assert_fail, etc).
3749 if (isDestroy && blockEndIsUnreachable(MBB, I))
3750 return I;
3751
3752 if (!reserveCallFrame) {
3753 // If the stack pointer can be changed after prologue, turn the
3754 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3755 // adjcallstackdown instruction into 'add ESP, <amt>'
3756
3757 // We need to keep the stack aligned properly. To do this, we round the
3758 // amount of space needed for the outgoing arguments up to the next
3759 // alignment boundary.
3760 Amount = alignTo(Amount, getStackAlign());
3761
3762 const Function &F = MF.getFunction();
3763 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3764 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3765
3766 // If we have any exception handlers in this function, and we adjust
3767 // the SP before calls, we may need to indicate this to the unwinder
3768 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3769 // Amount == 0, because the preceding function may have set a non-0
3770 // GNU_ARGS_SIZE.
3771 // TODO: We don't need to reset this between subsequent functions,
3772 // if it didn't change.
3773 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3774
3775 if (HasDwarfEHHandlers && !isDestroy &&
3777 BuildCFI(MBB, InsertPos, DL,
3778 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3779
3780 if (Amount == 0)
3781 return I;
3782
3783 // Factor out the amount that gets handled inside the sequence
3784 // (Pushes of argument for frame setup, callee pops for frame destroy)
3785 Amount -= InternalAmt;
3786
3787 // TODO: This is needed only if we require precise CFA.
3788 // If this is a callee-pop calling convention, emit a CFA adjust for
3789 // the amount the callee popped.
3790 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3791 BuildCFI(MBB, InsertPos, DL,
3792 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3793
3794 // Add Amount to SP to destroy a frame, or subtract to setup.
3795 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3796
3797 if (StackAdjustment) {
3798 // Merge with any previous or following adjustment instruction. Note: the
3799 // instructions merged with here do not have CFI, so their stack
3800 // adjustments do not feed into CfaAdjustment.
3801 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3802 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3803
3804 if (StackAdjustment) {
3805 if (!(F.hasMinSize() &&
3806 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3807 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3808 /*InEpilogue=*/false);
3809 }
3810 }
3811
3812 if (DwarfCFI && !hasFP(MF)) {
3813 // If we don't have FP, but need to generate unwind information,
3814 // we need to set the correct CFA offset after the stack adjustment.
3815 // How much we adjust the CFA offset depends on whether we're emitting
3816 // CFI only for EH purposes or for debugging. EH only requires the CFA
3817 // offset to be correct at each call site, while for debugging we want
3818 // it to be more precise.
3819
3820 int64_t CfaAdjustment = -StackAdjustment;
3821 // TODO: When not using precise CFA, we also need to adjust for the
3822 // InternalAmt here.
3823 if (CfaAdjustment) {
3824 BuildCFI(
3825 MBB, InsertPos, DL,
3826 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment));
3827 }
3828 }
3829
3830 return I;
3831 }
3832
3833 if (InternalAmt) {
3836 while (CI != B && !std::prev(CI)->isCall())
3837 --CI;
3838 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3839 }
3840
3841 return I;
3842}
3843
3845 assert(MBB.getParent() && "Block is not attached to a function!");
3846 const MachineFunction &MF = *MBB.getParent();
3847 if (!MBB.isLiveIn(X86::EFLAGS))
3848 return true;
3849
3850 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3851 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3853 const X86TargetLowering &TLI = *STI.getTargetLowering();
3854 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3855 return false;
3856
3858 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3859}
3860
3862 assert(MBB.getParent() && "Block is not attached to a function!");
3863
3864 // Win64 has strict requirements in terms of epilogue and we are
3865 // not taking a chance at messing with them.
3866 // I.e., unless this block is already an exit block, we can't use
3867 // it as an epilogue.
3868 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3869 return false;
3870
3871 // Swift async context epilogue has a BTR instruction that clobbers parts of
3872 // EFLAGS.
3873 const MachineFunction &MF = *MBB.getParent();
3876
3878 return true;
3879
3880 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3881 // clobbers the EFLAGS. Check that we do not need to preserve it,
3882 // otherwise, conservatively assume this is not
3883 // safe to insert the epilogue here.
3885}
3886
3888 // If we may need to emit frameless compact unwind information, give
3889 // up as this is currently broken: PR25614.
3890 bool CompactUnwind =
3892 nullptr;
3893 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3894 !CompactUnwind) &&
3895 // The lowering of segmented stack and HiPE only support entry
3896 // blocks as prologue blocks: PR26107. This limitation may be
3897 // lifted if we fix:
3898 // - adjustForSegmentedStacks
3899 // - adjustForHiPEPrologue
3901 !MF.shouldSplitStack();
3902}
3903
3906 const DebugLoc &DL, bool RestoreSP) const {
3907 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3908 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3909 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3910 "restoring EBP/ESI on non-32-bit target");
3911
3912 MachineFunction &MF = *MBB.getParent();
3914 Register BasePtr = TRI->getBaseRegister();
3915 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
3917 MachineFrameInfo &MFI = MF.getFrameInfo();
3918
3919 // FIXME: Don't set FrameSetup flag in catchret case.
3920
3921 int FI = FuncInfo.EHRegNodeFrameIndex;
3922 int EHRegSize = MFI.getObjectSize(FI);
3923
3924 if (RestoreSP) {
3925 // MOV32rm -EHRegSize(%ebp), %esp
3926 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3927 X86::EBP, true, -EHRegSize)
3929 }
3930
3931 Register UsedReg;
3932 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3933 int EndOffset = -EHRegOffset - EHRegSize;
3934 FuncInfo.EHRegNodeEndOffset = EndOffset;
3935
3936 if (UsedReg == FramePtr) {
3937 // ADD $offset, %ebp
3938 unsigned ADDri = getADDriOpcode(false);
3939 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3941 .addImm(EndOffset)
3943 ->getOperand(3)
3944 .setIsDead();
3945 assert(EndOffset >= 0 &&
3946 "end of registration object above normal EBP position!");
3947 } else if (UsedReg == BasePtr) {
3948 // LEA offset(%ebp), %esi
3949 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3950 FramePtr, false, EndOffset)
3952 // MOV32rm SavedEBPOffset(%esi), %ebp
3953 assert(X86FI->getHasSEHFramePtrSave());
3954 int Offset =
3955 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3956 .getFixed();
3957 assert(UsedReg == BasePtr);
3958 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3959 UsedReg, true, Offset)
3961 } else {
3962 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3963 }
3964 return MBBI;
3965}
3966
3968 return TRI->getSlotSize();
3969}
3970
3973 return StackPtr;
3974}
3975
3979 Register FrameRegister = RI->getFrameRegister(MF);
3980 if (getInitialCFARegister(MF) == FrameRegister &&
3982 DwarfFrameBase FrameBase;
3983 FrameBase.Kind = DwarfFrameBase::CFA;
3984 FrameBase.Location.Offset =
3986 return FrameBase;
3987 }
3988
3989 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
3990}
3991
3992namespace {
3993// Struct used by orderFrameObjects to help sort the stack objects.
3994struct X86FrameSortingObject {
3995 bool IsValid = false; // true if we care about this Object.
3996 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3997 unsigned ObjectSize = 0; // Size of Object in bytes.
3998 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3999 unsigned ObjectNumUses = 0; // Object static number of uses.
4000};
4001
4002// The comparison function we use for std::sort to order our local
4003// stack symbols. The current algorithm is to use an estimated
4004// "density". This takes into consideration the size and number of
4005// uses each object has in order to roughly minimize code size.
4006// So, for example, an object of size 16B that is referenced 5 times
4007// will get higher priority than 4 4B objects referenced 1 time each.
4008// It's not perfect and we may be able to squeeze a few more bytes out of
4009// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4010// fringe end can have special consideration, given their size is less
4011// important, etc.), but the algorithmic complexity grows too much to be
4012// worth the extra gains we get. This gets us pretty close.
4013// The final order leaves us with objects with highest priority going
4014// at the end of our list.
4015struct X86FrameSortingComparator {
4016 inline bool operator()(const X86FrameSortingObject &A,
4017 const X86FrameSortingObject &B) const {
4018 uint64_t DensityAScaled, DensityBScaled;
4019
4020 // For consistency in our comparison, all invalid objects are placed
4021 // at the end. This also allows us to stop walking when we hit the
4022 // first invalid item after it's all sorted.
4023 if (!A.IsValid)
4024 return false;
4025 if (!B.IsValid)
4026 return true;
4027
4028 // The density is calculated by doing :
4029 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4030 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4031 // Since this approach may cause inconsistencies in
4032 // the floating point <, >, == comparisons, depending on the floating
4033 // point model with which the compiler was built, we're going
4034 // to scale both sides by multiplying with
4035 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4036 // the division and, with it, the need for any floating point
4037 // arithmetic.
4038 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4039 static_cast<uint64_t>(B.ObjectSize);
4040 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4041 static_cast<uint64_t>(A.ObjectSize);
4042
4043 // If the two densities are equal, prioritize highest alignment
4044 // objects. This allows for similar alignment objects
4045 // to be packed together (given the same density).
4046 // There's room for improvement here, also, since we can pack
4047 // similar alignment (different density) objects next to each
4048 // other to save padding. This will also require further
4049 // complexity/iterations, and the overall gain isn't worth it,
4050 // in general. Something to keep in mind, though.
4051 if (DensityAScaled == DensityBScaled)
4052 return A.ObjectAlignment < B.ObjectAlignment;
4053
4054 return DensityAScaled < DensityBScaled;
4055 }
4056};
4057} // namespace
4058
4059// Order the symbols in the local stack.
4060// We want to place the local stack objects in some sort of sensible order.
4061// The heuristic we use is to try and pack them according to static number
4062// of uses and size of object in order to minimize code size.
4064 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4065 const MachineFrameInfo &MFI = MF.getFrameInfo();
4066
4067 // Don't waste time if there's nothing to do.
4068 if (ObjectsToAllocate.empty())
4069 return;
4070
4071 // Create an array of all MFI objects. We won't need all of these
4072 // objects, but we're going to create a full array of them to make
4073 // it easier to index into when we're counting "uses" down below.
4074 // We want to be able to easily/cheaply access an object by simply
4075 // indexing into it, instead of having to search for it every time.
4076 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4077
4078 // Walk the objects we care about and mark them as such in our working
4079 // struct.
4080 for (auto &Obj : ObjectsToAllocate) {
4081 SortingObjects[Obj].IsValid = true;
4082 SortingObjects[Obj].ObjectIndex = Obj;
4083 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4084 // Set the size.
4085 int ObjectSize = MFI.getObjectSize(Obj);
4086 if (ObjectSize == 0)
4087 // Variable size. Just use 4.
4088 SortingObjects[Obj].ObjectSize = 4;
4089 else
4090 SortingObjects[Obj].ObjectSize = ObjectSize;
4091 }
4092
4093 // Count the number of uses for each object.
4094 for (auto &MBB : MF) {
4095 for (auto &MI : MBB) {
4096 if (MI.isDebugInstr())
4097 continue;
4098 for (const MachineOperand &MO : MI.operands()) {
4099 // Check to see if it's a local stack symbol.
4100 if (!MO.isFI())
4101 continue;
4102 int Index = MO.getIndex();
4103 // Check to see if it falls within our range, and is tagged
4104 // to require ordering.
4105 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4106 SortingObjects[Index].IsValid)
4107 SortingObjects[Index].ObjectNumUses++;
4108 }
4109 }
4110 }
4111
4112 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4113 // info).
4114 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4115
4116 // Now modify the original list to represent the final order that
4117 // we want. The order will depend on whether we're going to access them
4118 // from the stack pointer or the frame pointer. For SP, the list should
4119 // end up with the END containing objects that we want with smaller offsets.
4120 // For FP, it should be flipped.
4121 int i = 0;
4122 for (auto &Obj : SortingObjects) {
4123 // All invalid items are sorted at the end, so it's safe to stop.
4124 if (!Obj.IsValid)
4125 break;
4126 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4127 }
4128
4129 // Flip it if we're accessing off of the FP.
4130 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4131 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4132}
4133
4134unsigned
4136 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4137 unsigned Offset = 16;
4138 // RBP is immediately pushed.
4139 Offset += SlotSize;
4140 // All callee-saved registers are then pushed.
4141 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4142 // Every funclet allocates enough stack space for the largest outgoing call.
4143 Offset += getWinEHFuncletFrameSize(MF);
4144 return Offset;
4145}
4146
4148 MachineFunction &MF, RegScavenger *RS) const {
4149 // Mark the function as not having WinCFI. We will set it back to true in
4150 // emitPrologue if it gets called and emits CFI.
4151 MF.setHasWinCFI(false);
4152
4153 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4154 // aligned. The format doesn't support misaligned stack adjustments.
4157
4158 // If this function isn't doing Win64-style C++ EH, we don't need to do
4159 // anything.
4160 if (STI.is64Bit() && MF.hasEHFunclets() &&
4163 adjustFrameForMsvcCxxEh(MF);
4164 }
4165}
4166
4167void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4168 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4169 // relative to RSP after the prologue. Find the offset of the last fixed
4170 // object, so that we can allocate a slot immediately following it. If there
4171 // were no fixed objects, use offset -SlotSize, which is immediately after the
4172 // return address. Fixed objects have negative frame indices.
4173 MachineFrameInfo &MFI = MF.getFrameInfo();
4174 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4175 int64_t MinFixedObjOffset = -SlotSize;
4176 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4177 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4178
4179 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4180 for (WinEHHandlerType &H : TBME.HandlerArray) {
4181 int FrameIndex = H.CatchObj.FrameIndex;
4182 if (FrameIndex != INT_MAX) {
4183 // Ensure alignment.
4184 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4185 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4186 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4187 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4188 }
4189 }
4190 }
4191
4192 // Ensure alignment.
4193 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4194 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4195 int UnwindHelpFI =
4196 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4197 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4198
4199 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4200 // other frame setup instructions.
4201 MachineBasicBlock &MBB = MF.front();
4202 auto MBBI = MBB.begin();
4203 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4204 ++MBBI;
4205
4207 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4208 UnwindHelpFI)
4209 .addImm(-2);
4210}
4211
4213 MachineFunction &MF, RegScavenger *RS) const {
4214 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4215
4216 if (STI.is32Bit() && MF.hasEHFunclets())
4218 // We have emitted prolog and epilog. Don't need stack pointer saving
4219 // instruction any more.
4220 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4221 MI->eraseFromParent();
4222 X86FI->setStackPtrSaveMI(nullptr);
4223 }
4224}
4225
4227 MachineFunction &MF) const {
4228 // 32-bit functions have to restore stack pointers when control is transferred
4229 // back to the parent function. These blocks are identified as eh pads that
4230 // are not funclet entries.
4231 bool IsSEH = isAsynchronousEHPersonality(
4233 for (MachineBasicBlock &MBB : MF) {
4234 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4235 if (NeedsRestore)
4237 /*RestoreSP=*/IsSEH);
4238 }
4239}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:850
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
size_t arg_size() const
Definition: Function.h:846
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:655
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:213
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:548
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:616
OpType getOperation() const
Definition: MCDwarf.h:658
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:564
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:653
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:450
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
Metadata node.
Definition: Metadata.h:1067
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:607
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool callsUnwindInit() const
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
MachineModuleInfo & getMMI() const
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:547
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
const MCContext & getContext() const
const Module * getModule() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:260
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition: Module.cpp:575
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
A tuple of MDNodes.
Definition: Metadata.h:1729
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1379
unsigned getNumOperands() const
Definition: Metadata.cpp:1375
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static constexpr size_t npos
Definition: StringRef.h:52
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
const Triple & getTargetTriple() const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:608
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:542
Value wrapper in the Metadata hierarchy.
Definition: Metadata.h:450
Value * getValue() const
Definition: Metadata.h:490
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, bool doMergeWithPrevious) const
Check the instruction before/after the passed instruction.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:197
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
void addCandidateForPush2Pop2(Register Reg)
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
bool hasBasePointer(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const
findDeadCallerSavedReg - Return a caller-saved register that isn't live when it reaches the "return" ...
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
bool isOSWindows() const
Definition: X86Subtarget.h:335
const X86TargetLowering * getTargetLowering() const override
Definition: X86Subtarget.h:125
bool isTargetDragonFly() const
Definition: X86Subtarget.h:295
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:313
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:180
bool isTargetDarwin() const
Definition: X86Subtarget.h:293
bool isTargetWin64() const
Definition: X86Subtarget.h:337
bool isTarget64BitLP64() const
Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
Definition: X86Subtarget.h:185
bool swiftAsyncContextIsDynamicallySet() const
Return whether FrameLowering should always set the "extended frame present" bit in FP,...
Definition: X86Subtarget.h:399
bool isTargetWindowsCoreCLR() const
Definition: X86Subtarget.h:317
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:129
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:350
bool isTargetFreeBSD() const
Definition: X86Subtarget.h:294
bool isTargetNaCl64() const
Definition: X86Subtarget.h:309
bool isTargetWin32() const
Definition: X86Subtarget.h:339
bool useIndirectThunkCalls() const
Definition: X86Subtarget.h:232
bool isTargetLinux() const
Definition: X86Subtarget.h:303
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override
Returns the name of the symbol used to emit stack probes or the empty string if not applicable.
bool hasStackProbeSymbol(const MachineFunction &MF) const override
Returns true if stack probing through a function call is requested.
unsigned getStackProbeSize(const MachineFunction &MF) const
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:199
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:401
CallingConvention
Definition: Dwarf.h:421
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
@ Always
Always set the bit.
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
static bool recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for a MBB.
Definition: LivePhysRegs.h:198
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
union llvm::TargetFrameLowering::DwarfFrameBase::@235 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
Definition: WinEHFuncInfo.h:97
SmallVector< WinEHHandlerType, 1 > HandlerArray
Definition: WinEHFuncInfo.h:76