LLVM 17.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45
46using namespace llvm;
47
49 MaybeAlign StackAlignOverride)
50 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
51 STI.is64Bit() ? -8 : -4),
52 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
55 Is64Bit = STI.is64Bit();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
60}
61
63 return !MF.getFrameInfo().hasVarSizedObjects() &&
64 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
65 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
66}
67
68/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69/// call frame pseudos can be simplified. Having a FP, as in the default
70/// implementation, is not sufficient here since we can't always use it.
71/// Use a more nuanced condition.
72bool
74 return hasReservedCallFrame(MF) ||
75 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
76 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
78}
79
80// needsFrameIndexResolution - Do we need to perform FI resolution for
81// this function. Normally, this is required only when the function
82// has any stack objects. However, FI resolution actually has another job,
83// not apparent from the title - it resolves callframesetup/destroy
84// that were not simplified earlier.
85// So, this is required for x86 functions that have push sequences even
86// when there are no stack objects.
87bool
89 return MF.getFrameInfo().hasStackObjects() ||
90 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
91}
92
93/// hasFP - Return true if the specified function should have a dedicated frame
94/// pointer register. This is true if the function has variable sized allocas
95/// or if frame pointer elimination is disabled.
97 const MachineFrameInfo &MFI = MF.getFrameInfo();
98 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
99 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
103 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
104 MFI.hasStackMap() || MFI.hasPatchPoint() ||
105 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
106}
107
108static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
109 if (IsLP64) {
110 if (isInt<8>(Imm))
111 return X86::SUB64ri8;
112 return X86::SUB64ri32;
113 } else {
114 if (isInt<8>(Imm))
115 return X86::SUB32ri8;
116 return X86::SUB32ri;
117 }
118}
119
120static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
121 if (IsLP64) {
122 if (isInt<8>(Imm))
123 return X86::ADD64ri8;
124 return X86::ADD64ri32;
125 } else {
126 if (isInt<8>(Imm))
127 return X86::ADD32ri8;
128 return X86::ADD32ri;
129 }
130}
131
132static unsigned getSUBrrOpcode(bool IsLP64) {
133 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
134}
135
136static unsigned getADDrrOpcode(bool IsLP64) {
137 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
138}
139
140static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
141 if (IsLP64) {
142 if (isInt<8>(Imm))
143 return X86::AND64ri8;
144 return X86::AND64ri32;
145 }
146 if (isInt<8>(Imm))
147 return X86::AND32ri8;
148 return X86::AND32ri;
149}
150
151static unsigned getLEArOpcode(bool IsLP64) {
152 return IsLP64 ? X86::LEA64r : X86::LEA32r;
153}
154
155static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
156 if (Use64BitReg) {
157 if (isUInt<32>(Imm))
158 return X86::MOV32ri64;
159 if (isInt<32>(Imm))
160 return X86::MOV64ri32;
161 return X86::MOV64ri;
162 }
163 return X86::MOV32ri;
164}
165
168 unsigned Reg = RegMask.PhysReg;
169
170 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
171 Reg == X86::AH || Reg == X86::AL)
172 return true;
173 }
174
175 return false;
176}
177
178/// Check if the flags need to be preserved before the terminators.
179/// This would be the case, if the eflags is live-in of the region
180/// composed by the terminators or live-out of that region, without
181/// being defined by a terminator.
182static bool
184 for (const MachineInstr &MI : MBB.terminators()) {
185 bool BreakNext = false;
186 for (const MachineOperand &MO : MI.operands()) {
187 if (!MO.isReg())
188 continue;
189 Register Reg = MO.getReg();
190 if (Reg != X86::EFLAGS)
191 continue;
192
193 // This terminator needs an eflags that is not defined
194 // by a previous another terminator:
195 // EFLAGS is live-in of the region composed by the terminators.
196 if (!MO.isDef())
197 return true;
198 // This terminator defines the eflags, i.e., we don't need to preserve it.
199 // However, we still need to check this specific terminator does not
200 // read a live-in value.
201 BreakNext = true;
202 }
203 // We found a definition of the eflags, no need to preserve them.
204 if (BreakNext)
205 return false;
206 }
207
208 // None of the terminators use or define the eflags.
209 // Check if they are live-out, that would imply we need to preserve them.
210 for (const MachineBasicBlock *Succ : MBB.successors())
211 if (Succ->isLiveIn(X86::EFLAGS))
212 return true;
213
214 return false;
215}
216
217/// emitSPUpdate - Emit a series of instructions to increment / decrement the
218/// stack pointer by a constant value.
221 const DebugLoc &DL,
222 int64_t NumBytes, bool InEpilogue) const {
223 bool isSub = NumBytes < 0;
224 uint64_t Offset = isSub ? -NumBytes : NumBytes;
227
228 uint64_t Chunk = (1LL << 31) - 1;
229
233 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
234
235 // It's ok to not take into account large chunks when probing, as the
236 // allocation is split in smaller chunks anyway.
237 if (EmitInlineStackProbe && !InEpilogue) {
238
239 // This pseudo-instruction is going to be expanded, potentially using a
240 // loop, by inlineStackProbe().
241 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
242 return;
243 } else if (Offset > Chunk) {
244 // Rather than emit a long series of instructions for large offsets,
245 // load the offset into a register and do one sub/add
246 unsigned Reg = 0;
247 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
248
249 if (isSub && !isEAXLiveIn(MBB))
250 Reg = Rax;
251 else
253
254 unsigned AddSubRROpc =
256 if (Reg) {
258 .addImm(Offset)
259 .setMIFlag(Flag);
260 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
262 .addReg(Reg);
263 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
264 return;
265 } else if (Offset > 8 * Chunk) {
266 // If we would need more than 8 add or sub instructions (a >16GB stack
267 // frame), it's worth spilling RAX to materialize this immediate.
268 // pushq %rax
269 // movabsq +-$Offset+-SlotSize, %rax
270 // addq %rsp, %rax
271 // xchg %rax, (%rsp)
272 // movq (%rsp), %rsp
273 assert(Is64Bit && "can't have 32-bit 16GB stack frame");
274 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
276 .setMIFlag(Flag);
277 // Subtract is not commutative, so negate the offset and always use add.
278 // Subtract 8 less and add 8 more to account for the PUSH we just did.
279 if (isSub)
280 Offset = -(Offset - SlotSize);
281 else
284 .addImm(Offset)
285 .setMIFlag(Flag);
286 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
287 .addReg(Rax)
289 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
290 // Exchange the new SP in RAX with the top of the stack.
292 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
293 StackPtr, false, 0);
294 // Load new SP from the top of the stack into RSP.
295 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
296 StackPtr, false, 0);
297 return;
298 }
299 }
300
301 while (Offset) {
302 uint64_t ThisVal = std::min(Offset, Chunk);
303 if (ThisVal == SlotSize) {
304 // Use push / pop for slot sized adjustments as a size optimization. We
305 // need to find a dead register when using pop.
306 unsigned Reg = isSub
307 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
309 if (Reg) {
310 unsigned Opc = isSub
311 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
312 : (Is64Bit ? X86::POP64r : X86::POP32r);
313 BuildMI(MBB, MBBI, DL, TII.get(Opc))
314 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
315 .setMIFlag(Flag);
316 Offset -= ThisVal;
317 continue;
318 }
319 }
320
321 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
322 .setMIFlag(Flag);
323
324 Offset -= ThisVal;
325 }
326}
327
328MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
330 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
331 assert(Offset != 0 && "zero offset stack adjustment requested");
332
333 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
334 // is tricky.
335 bool UseLEA;
336 if (!InEpilogue) {
337 // Check if inserting the prologue at the beginning
338 // of MBB would require to use LEA operations.
339 // We need to use LEA operations if EFLAGS is live in, because
340 // it means an instruction will read it before it gets defined.
341 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
342 } else {
343 // If we can use LEA for SP but we shouldn't, check that none
344 // of the terminators uses the eflags. Otherwise we will insert
345 // a ADD that will redefine the eflags and break the condition.
346 // Alternatively, we could move the ADD, but this may not be possible
347 // and is an optimization anyway.
349 if (UseLEA && !STI.useLeaForSP())
351 // If that assert breaks, that means we do not do the right thing
352 // in canUseAsEpilogue.
354 "We shouldn't have allowed this insertion point");
355 }
356
358 if (UseLEA) {
361 StackPtr),
362 StackPtr, false, Offset);
363 } else {
364 bool IsSub = Offset < 0;
365 uint64_t AbsOffset = IsSub ? -Offset : Offset;
366 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
367 : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
368 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
370 .addImm(AbsOffset);
371 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
372 }
373 return MI;
374}
375
378 bool doMergeWithPrevious) const {
379 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
380 (!doMergeWithPrevious && MBBI == MBB.end()))
381 return 0;
382
383 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
384
386 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
387 // instruction, and that there are no DBG_VALUE or other instructions between
388 // ADD/SUB/LEA and its corresponding CFI instruction.
389 /* TODO: Add support for the case where there are multiple CFI instructions
390 below the ADD/SUB/LEA, e.g.:
391 ...
392 add
393 cfi_def_cfa_offset
394 cfi_offset
395 ...
396 */
397 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
398 PI = std::prev(PI);
399
400 unsigned Opc = PI->getOpcode();
401 int Offset = 0;
402
403 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
404 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
405 PI->getOperand(0).getReg() == StackPtr){
406 assert(PI->getOperand(1).getReg() == StackPtr);
407 Offset = PI->getOperand(2).getImm();
408 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
409 PI->getOperand(0).getReg() == StackPtr &&
410 PI->getOperand(1).getReg() == StackPtr &&
411 PI->getOperand(2).getImm() == 1 &&
412 PI->getOperand(3).getReg() == X86::NoRegister &&
413 PI->getOperand(5).getReg() == X86::NoRegister) {
414 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
415 Offset = PI->getOperand(4).getImm();
416 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
417 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
418 PI->getOperand(0).getReg() == StackPtr) {
419 assert(PI->getOperand(1).getReg() == StackPtr);
420 Offset = -PI->getOperand(2).getImm();
421 } else
422 return 0;
423
424 PI = MBB.erase(PI);
425 if (PI != MBB.end() && PI->isCFIInstruction()) {
426 auto CIs = MBB.getParent()->getFrameInstructions();
427 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
430 PI = MBB.erase(PI);
431 }
432 if (!doMergeWithPrevious)
434
435 return Offset;
436}
437
440 const DebugLoc &DL,
441 const MCCFIInstruction &CFIInst,
442 MachineInstr::MIFlag Flag) const {
444 unsigned CFIIndex = MF.addFrameInst(CFIInst);
445 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
446 .addCFIIndex(CFIIndex)
447 .setMIFlag(Flag);
448}
449
450/// Emits Dwarf Info specifying offsets of callee saved registers and
451/// frame pointer. This is called only when basic block sections are enabled.
455 if (!hasFP(MF)) {
457 return;
458 }
459 const MachineModuleInfo &MMI = MF.getMMI();
462 const Register MachineFramePtr =
464 : FramePtr;
465 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
466 // Offset = space for return address + size of the frame pointer itself.
467 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
469 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
471}
472
475 const DebugLoc &DL, bool IsPrologue) const {
477 MachineFrameInfo &MFI = MF.getFrameInfo();
478 MachineModuleInfo &MMI = MF.getMMI();
481
482 // Add callee saved registers to move list.
483 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
484
485 // Calculate offsets.
486 for (const CalleeSavedInfo &I : CSI) {
487 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
488 Register Reg = I.getReg();
489 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
490
491 if (IsPrologue) {
492 if (X86FI->getStackPtrSaveMI()) {
493 // +2*SlotSize because there is return address and ebp at the bottom
494 // of the stack.
495 // | retaddr |
496 // | ebp |
497 // | |<--ebp
498 Offset += 2 * SlotSize;
499 SmallString<64> CfaExpr;
500 CfaExpr.push_back(dwarf::DW_CFA_expression);
501 uint8_t buffer[16];
502 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
503 CfaExpr.push_back(2);
505 const Register MachineFramePtr =
508 : FramePtr;
509 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
510 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
511 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
513 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
515 } else {
517 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
518 }
519 } else {
521 MCCFIInstruction::createRestore(nullptr, DwarfReg));
522 }
523 }
524 if (auto *MI = X86FI->getStackPtrSaveMI()) {
525 int FI = MI->getOperand(1).getIndex();
526 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
527 SmallString<64> CfaExpr;
529 const Register MachineFramePtr =
532 : FramePtr;
533 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
534 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
535 uint8_t buffer[16];
536 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
537 CfaExpr.push_back(dwarf::DW_OP_deref);
538
539 SmallString<64> DefCfaExpr;
540 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
541 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
542 DefCfaExpr.append(CfaExpr.str());
543 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
545 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
547 }
548}
549
550void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
551 MachineBasicBlock &MBB) const {
552 const MachineFunction &MF = *MBB.getParent();
553
554 // Insertion point.
556
557 // Fake a debug loc.
558 DebugLoc DL;
559 if (MBBI != MBB.end())
560 DL = MBBI->getDebugLoc();
561
562 // Zero out FP stack if referenced. Do this outside of the loop below so that
563 // it's done only once.
564 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
565 for (MCRegister Reg : RegsToZero.set_bits()) {
566 if (!X86::RFP80RegClass.contains(Reg))
567 continue;
568
569 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
570 for (unsigned i = 0; i != NumFPRegs; ++i)
571 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
572
573 for (unsigned i = 0; i != NumFPRegs; ++i)
574 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
575 break;
576 }
577
578 // For GPRs, we only care to clear out the 32-bit register.
579 BitVector GPRsToZero(TRI->getNumRegs());
580 for (MCRegister Reg : RegsToZero.set_bits())
581 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
582 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
583 RegsToZero.reset(Reg);
584 }
585
586 for (MCRegister Reg : GPRsToZero.set_bits())
587 BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
589 .addReg(Reg, RegState::Undef);
590
591 // Zero out registers.
592 for (MCRegister Reg : RegsToZero.set_bits()) {
593 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
594 // FIXME: Ignore MMX registers?
595 continue;
596
597 unsigned XorOp;
598 if (X86::VR128RegClass.contains(Reg)) {
599 // XMM#
600 if (!ST.hasSSE1())
601 continue;
602 XorOp = X86::PXORrr;
603 } else if (X86::VR256RegClass.contains(Reg)) {
604 // YMM#
605 if (!ST.hasAVX())
606 continue;
607 XorOp = X86::VPXORrr;
608 } else if (X86::VR512RegClass.contains(Reg)) {
609 // ZMM#
610 if (!ST.hasAVX512())
611 continue;
612 XorOp = X86::VPXORYrr;
613 } else if (X86::VK1RegClass.contains(Reg) ||
614 X86::VK2RegClass.contains(Reg) ||
615 X86::VK4RegClass.contains(Reg) ||
616 X86::VK8RegClass.contains(Reg) ||
617 X86::VK16RegClass.contains(Reg)) {
618 if (!ST.hasVLX())
619 continue;
620 XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
621 } else {
622 continue;
623 }
624
625 BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
627 .addReg(Reg, RegState::Undef);
628 }
629}
630
633 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
634 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
637 if (InProlog) {
638 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
639 .addImm(0 /* no explicit stack size */);
640 } else {
641 emitStackProbeInline(MF, MBB, MBBI, DL, false);
642 }
643 } else {
644 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
645 }
646}
647
649 return STI.isOSWindows() && !STI.isTargetWin64();
650}
651
653 MachineBasicBlock &PrologMBB) const {
654 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
655 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
656 });
657 if (Where != PrologMBB.end()) {
658 DebugLoc DL = PrologMBB.findDebugLoc(Where);
659 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
660 Where->eraseFromParent();
661 }
662}
663
664void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
667 const DebugLoc &DL,
668 bool InProlog) const {
670 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
671 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
672 else
673 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
674}
675
676void X86FrameLowering::emitStackProbeInlineGeneric(
678 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
679 MachineInstr &AllocWithProbe = *MBBI;
680 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
681
684 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
685 "different expansion expected for CoreCLR 64 bit");
686
687 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
688 uint64_t ProbeChunk = StackProbeSize * 8;
689
690 uint64_t MaxAlign =
691 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
692
693 // Synthesize a loop or unroll it, depending on the number of iterations.
694 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
695 // between the unaligned rsp and current rsp.
696 if (Offset > ProbeChunk) {
697 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
698 MaxAlign % StackProbeSize);
699 } else {
700 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
701 MaxAlign % StackProbeSize);
702 }
703}
704
705void X86FrameLowering::emitStackProbeInlineGenericBlock(
708 uint64_t AlignOffset) const {
709
710 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
711 const bool HasFP = hasFP(MF);
714 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
715 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
716
717 uint64_t CurrentOffset = 0;
718
719 assert(AlignOffset < StackProbeSize);
720
721 // If the offset is so small it fits within a page, there's nothing to do.
722 if (StackProbeSize < Offset + AlignOffset) {
723
724 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
725 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
727 if (!HasFP && NeedsDwarfCFI) {
728 BuildCFI(
729 MBB, MBBI, DL,
730 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
731 }
732
733 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
735 StackPtr, false, 0)
736 .addImm(0)
738 NumFrameExtraProbe++;
739 CurrentOffset = StackProbeSize - AlignOffset;
740 }
741
742 // For the next N - 1 pages, just probe. I tried to take advantage of
743 // natural probes but it implies much more logic and there was very few
744 // interesting natural probes to interleave.
745 while (CurrentOffset + StackProbeSize < Offset) {
746 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
748
749 if (!HasFP && NeedsDwarfCFI) {
750 BuildCFI(
751 MBB, MBBI, DL,
752 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
753 }
754 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
756 StackPtr, false, 0)
757 .addImm(0)
759 NumFrameExtraProbe++;
760 CurrentOffset += StackProbeSize;
761 }
762
763 // No need to probe the tail, it is smaller than a Page.
764 uint64_t ChunkSize = Offset - CurrentOffset;
765 if (ChunkSize == SlotSize) {
766 // Use push for slot sized adjustments as a size optimization,
767 // like emitSPUpdate does when not probing.
768 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
769 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
770 BuildMI(MBB, MBBI, DL, TII.get(Opc))
773 } else {
774 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
776 }
777 // No need to adjust Dwarf CFA offset here, the last position of the stack has
778 // been defined
779}
780
781void X86FrameLowering::emitStackProbeInlineGenericLoop(
784 uint64_t AlignOffset) const {
785 assert(Offset && "null offset");
786
787 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
789 "Inline stack probe loop will clobber live EFLAGS.");
790
791 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
792 const bool HasFP = hasFP(MF);
795 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
796 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
797
798 if (AlignOffset) {
799 if (AlignOffset < StackProbeSize) {
800 // Perform a first smaller allocation followed by a probe.
801 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
803
804 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
806 StackPtr, false, 0)
807 .addImm(0)
809 NumFrameExtraProbe++;
810 Offset -= AlignOffset;
811 }
812 }
813
814 // Synthesize a loop
815 NumFrameLoopProbe++;
816 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
817
818 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
819 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
820
822 MF.insert(MBBIter, testMBB);
823 MF.insert(MBBIter, tailMBB);
824
825 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
826 : Is64Bit ? X86::R11D
827 : X86::EAX;
828
829 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
832
833 // save loop bound
834 {
835 const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
836 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
837 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
838 .addReg(FinalStackProbed)
839 .addImm(BoundOffset)
841
842 // while in the loop, use loop-invariant reg for CFI,
843 // instead of the stack pointer, which changes during the loop
844 if (!HasFP && NeedsDwarfCFI) {
845 // x32 uses the same DWARF register numbers as x86-64,
846 // so there isn't a register number for r11d, we must use r11 instead
847 const Register DwarfFinalStackProbed =
849 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
850 : FinalStackProbed;
851
854 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
856 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
857 }
858 }
859
860 // allocate a page
861 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
862 /*InEpilogue=*/false)
864
865 // touch the page
866 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
868 StackPtr, false, 0)
869 .addImm(0)
871
872 // cmp with stack pointer bound
873 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
875 .addReg(FinalStackProbed)
877
878 // jump
879 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
880 .addMBB(testMBB)
883 testMBB->addSuccessor(testMBB);
884 testMBB->addSuccessor(tailMBB);
885
886 // BB management
887 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
889 MBB.addSuccessor(testMBB);
890
891 // handle tail
892 const uint64_t TailOffset = Offset % StackProbeSize;
893 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
894 if (TailOffset) {
895 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
896 /*InEpilogue=*/false)
898 }
899
900 // after the loop, switch back to stack pointer for CFI
901 if (!HasFP && NeedsDwarfCFI) {
902 // x32 uses the same DWARF register numbers as x86-64,
903 // so there isn't a register number for esp, we must use rsp instead
904 const Register DwarfStackPtr =
908
909 BuildCFI(*tailMBB, TailMBBIter, DL,
911 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
912 }
913
914 // Update Live In information
915 recomputeLiveIns(*testMBB);
916 recomputeLiveIns(*tailMBB);
917}
918
919void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
921 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
923 assert(STI.is64Bit() && "different expansion needed for 32 bit");
924 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
926 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
927
928 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
930 "Inline stack probe loop will clobber live EFLAGS.");
931
932 // RAX contains the number of bytes of desired stack adjustment.
933 // The handling here assumes this value has already been updated so as to
934 // maintain stack alignment.
935 //
936 // We need to exit with RSP modified by this amount and execute suitable
937 // page touches to notify the OS that we're growing the stack responsibly.
938 // All stack probing must be done without modifying RSP.
939 //
940 // MBB:
941 // SizeReg = RAX;
942 // ZeroReg = 0
943 // CopyReg = RSP
944 // Flags, TestReg = CopyReg - SizeReg
945 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
946 // LimitReg = gs magic thread env access
947 // if FinalReg >= LimitReg goto ContinueMBB
948 // RoundBB:
949 // RoundReg = page address of FinalReg
950 // LoopMBB:
951 // LoopReg = PHI(LimitReg,ProbeReg)
952 // ProbeReg = LoopReg - PageSize
953 // [ProbeReg] = 0
954 // if (ProbeReg > RoundReg) goto LoopMBB
955 // ContinueMBB:
956 // RSP = RSP - RAX
957 // [rest of original MBB]
958
959 // Set up the new basic blocks
960 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
961 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
962 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
963
964 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
965 MF.insert(MBBIter, RoundMBB);
966 MF.insert(MBBIter, LoopMBB);
967 MF.insert(MBBIter, ContinueMBB);
968
969 // Split MBB and move the tail portion down to ContinueMBB.
970 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
971 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
973
974 // Some useful constants
975 const int64_t ThreadEnvironmentStackLimit = 0x10;
976 const int64_t PageSize = 0x1000;
977 const int64_t PageMask = ~(PageSize - 1);
978
979 // Registers we need. For the normal case we use virtual
980 // registers. For the prolog expansion we use RAX, RCX and RDX.
982 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
983 const Register SizeReg = InProlog ? X86::RAX
984 : MRI.createVirtualRegister(RegClass),
985 ZeroReg = InProlog ? X86::RCX
986 : MRI.createVirtualRegister(RegClass),
987 CopyReg = InProlog ? X86::RDX
988 : MRI.createVirtualRegister(RegClass),
989 TestReg = InProlog ? X86::RDX
990 : MRI.createVirtualRegister(RegClass),
991 FinalReg = InProlog ? X86::RDX
992 : MRI.createVirtualRegister(RegClass),
993 RoundedReg = InProlog ? X86::RDX
994 : MRI.createVirtualRegister(RegClass),
995 LimitReg = InProlog ? X86::RCX
996 : MRI.createVirtualRegister(RegClass),
997 JoinReg = InProlog ? X86::RCX
998 : MRI.createVirtualRegister(RegClass),
999 ProbeReg = InProlog ? X86::RCX
1000 : MRI.createVirtualRegister(RegClass);
1001
1002 // SP-relative offsets where we can save RCX and RDX.
1003 int64_t RCXShadowSlot = 0;
1004 int64_t RDXShadowSlot = 0;
1005
1006 // If inlining in the prolog, save RCX and RDX.
1007 if (InProlog) {
1008 // Compute the offsets. We need to account for things already
1009 // pushed onto the stack at this point: return address, frame
1010 // pointer (if used), and callee saves.
1012 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1013 const bool HasFP = hasFP(MF);
1014
1015 // Check if we need to spill RCX and/or RDX.
1016 // Here we assume that no earlier prologue instruction changes RCX and/or
1017 // RDX, so checking the block live-ins is enough.
1018 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1019 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1020 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1021 // Assign the initial slot to both registers, then change RDX's slot if both
1022 // need to be spilled.
1023 if (IsRCXLiveIn)
1024 RCXShadowSlot = InitSlot;
1025 if (IsRDXLiveIn)
1026 RDXShadowSlot = InitSlot;
1027 if (IsRDXLiveIn && IsRCXLiveIn)
1028 RDXShadowSlot += 8;
1029 // Emit the saves if needed.
1030 if (IsRCXLiveIn)
1031 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1032 RCXShadowSlot)
1033 .addReg(X86::RCX);
1034 if (IsRDXLiveIn)
1035 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1036 RDXShadowSlot)
1037 .addReg(X86::RDX);
1038 } else {
1039 // Not in the prolog. Copy RAX to a virtual reg.
1040 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1041 }
1042
1043 // Add code to MBB to check for overflow and set the new target stack pointer
1044 // to zero if so.
1045 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1046 .addReg(ZeroReg, RegState::Undef)
1047 .addReg(ZeroReg, RegState::Undef);
1048 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1049 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1050 .addReg(CopyReg)
1051 .addReg(SizeReg);
1052 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1053 .addReg(TestReg)
1054 .addReg(ZeroReg)
1056
1057 // FinalReg now holds final stack pointer value, or zero if
1058 // allocation would overflow. Compare against the current stack
1059 // limit from the thread environment block. Note this limit is the
1060 // lowest touched page on the stack, not the point at which the OS
1061 // will cause an overflow exception, so this is just an optimization
1062 // to avoid unnecessarily touching pages that are below the current
1063 // SP but already committed to the stack by the OS.
1064 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1065 .addReg(0)
1066 .addImm(1)
1067 .addReg(0)
1068 .addImm(ThreadEnvironmentStackLimit)
1069 .addReg(X86::GS);
1070 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1071 // Jump if the desired stack pointer is at or above the stack limit.
1072 BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
1073
1074 // Add code to roundMBB to round the final stack pointer to a page boundary.
1075 RoundMBB->addLiveIn(FinalReg);
1076 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1077 .addReg(FinalReg)
1078 .addImm(PageMask);
1079 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1080
1081 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1082 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1083 // and probe until we reach RoundedReg.
1084 if (!InProlog) {
1085 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1086 .addReg(LimitReg)
1087 .addMBB(RoundMBB)
1088 .addReg(ProbeReg)
1089 .addMBB(LoopMBB);
1090 }
1091
1092 LoopMBB->addLiveIn(JoinReg);
1093 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1094 false, -PageSize);
1095
1096 // Probe by storing a byte onto the stack.
1097 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1098 .addReg(ProbeReg)
1099 .addImm(1)
1100 .addReg(0)
1101 .addImm(0)
1102 .addReg(0)
1103 .addImm(0);
1104
1105 LoopMBB->addLiveIn(RoundedReg);
1106 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1107 .addReg(RoundedReg)
1108 .addReg(ProbeReg);
1109 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
1110
1111 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1112
1113 // If in prolog, restore RDX and RCX.
1114 if (InProlog) {
1115 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1116 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1117 TII.get(X86::MOV64rm), X86::RCX),
1118 X86::RSP, false, RCXShadowSlot);
1119 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1120 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1121 TII.get(X86::MOV64rm), X86::RDX),
1122 X86::RSP, false, RDXShadowSlot);
1123 }
1124
1125 // Now that the probing is done, add code to continueMBB to update
1126 // the stack pointer for real.
1127 ContinueMBB->addLiveIn(SizeReg);
1128 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1129 .addReg(X86::RSP)
1130 .addReg(SizeReg);
1131
1132 // Add the control flow edges we need.
1133 MBB.addSuccessor(ContinueMBB);
1134 MBB.addSuccessor(RoundMBB);
1135 RoundMBB->addSuccessor(LoopMBB);
1136 LoopMBB->addSuccessor(ContinueMBB);
1137 LoopMBB->addSuccessor(LoopMBB);
1138
1139 // Mark all the instructions added to the prolog as frame setup.
1140 if (InProlog) {
1141 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1142 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1143 }
1144 for (MachineInstr &MI : *RoundMBB) {
1146 }
1147 for (MachineInstr &MI : *LoopMBB) {
1149 }
1150 for (MachineInstr &MI :
1151 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1153 }
1154 }
1155}
1156
1157void X86FrameLowering::emitStackProbeCall(
1159 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1160 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1161 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1162
1163 // FIXME: Add indirect thunk support and remove this.
1164 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1165 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1166 "code model and indirect thunks not yet implemented.");
1167
1168 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1170 "Stack probe calls will clobber live EFLAGS.");
1171
1172 unsigned CallOp;
1173 if (Is64Bit)
1174 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1175 else
1176 CallOp = X86::CALLpcrel32;
1177
1179
1181 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1182
1183 // All current stack probes take AX and SP as input, clobber flags, and
1184 // preserve all registers. x86_64 probes leave RSP unmodified.
1186 // For the large code model, we have to call through a register. Use R11,
1187 // as it is scratch in all supported calling conventions.
1188 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1190 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1191 } else {
1192 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1194 }
1195
1196 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1197 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1203
1204 MachineInstr *ModInst = CI;
1205 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1206 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1207 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1208 // themselves. They also does not clobber %rax so we can reuse it when
1209 // adjusting %rsp.
1210 // All other platforms do not specify a particular ABI for the stack probe
1211 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1212 ModInst =
1214 .addReg(SP)
1215 .addReg(AX);
1216 }
1217
1218 // DebugInfo variable locations -- if there's an instruction number for the
1219 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1220 // modifies SP.
1221 if (InstrNum) {
1222 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1223 // Label destination operand of the subtract.
1224 MF.makeDebugValueSubstitution(*InstrNum,
1225 {ModInst->getDebugInstrNum(), 0});
1226 } else {
1227 // Label the call. The operand number is the penultimate operand, zero
1228 // based.
1229 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1231 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1232 }
1233 }
1234
1235 if (InProlog) {
1236 // Apply the frame setup flag to all inserted instrs.
1237 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1238 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1239 }
1240}
1241
1242static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1243 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1244 // and might require smaller successive adjustments.
1245 const uint64_t Win64MaxSEHOffset = 128;
1246 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1247 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1248 return SEHFrameOffset & -16;
1249}
1250
1251// If we're forcing a stack realignment we can't rely on just the frame
1252// info, we need to know the ABI stack alignment as well in case we
1253// have a call out. Otherwise just make sure we have some alignment - we'll
1254// go with the minimum SlotSize.
1255uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1256 const MachineFrameInfo &MFI = MF.getFrameInfo();
1257 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1259 if (MF.getFunction().hasFnAttribute("stackrealign")) {
1260 if (MFI.hasCalls())
1261 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1262 else if (MaxAlign < SlotSize)
1263 MaxAlign = Align(SlotSize);
1264 }
1265 return MaxAlign.value();
1266}
1267
1268void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1270 const DebugLoc &DL, unsigned Reg,
1271 uint64_t MaxAlign) const {
1272 uint64_t Val = -MaxAlign;
1273 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1274
1275 MachineFunction &MF = *MBB.getParent();
1277 const X86TargetLowering &TLI = *STI.getTargetLowering();
1278 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1279 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1280
1281 // We want to make sure that (in worst case) less than StackProbeSize bytes
1282 // are not probed after the AND. This assumption is used in
1283 // emitStackProbeInlineGeneric.
1284 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1285 {
1286 NumFrameLoopProbe++;
1287 MachineBasicBlock *entryMBB =
1289 MachineBasicBlock *headMBB =
1291 MachineBasicBlock *bodyMBB =
1293 MachineBasicBlock *footMBB =
1295
1297 MF.insert(MBBIter, entryMBB);
1298 MF.insert(MBBIter, headMBB);
1299 MF.insert(MBBIter, bodyMBB);
1300 MF.insert(MBBIter, footMBB);
1301 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1302 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1303 : Is64Bit ? X86::R11D
1304 : X86::EAX;
1305
1306 // Setup entry block
1307 {
1308
1309 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1310 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1313 MachineInstr *MI =
1314 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1315 .addReg(FinalStackProbed)
1316 .addImm(Val)
1318
1319 // The EFLAGS implicit def is dead.
1320 MI->getOperand(3).setIsDead();
1321
1322 BuildMI(entryMBB, DL,
1323 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1324 .addReg(FinalStackProbed)
1327 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1328 .addMBB(&MBB)
1331 entryMBB->addSuccessor(headMBB);
1332 entryMBB->addSuccessor(&MBB);
1333 }
1334
1335 // Loop entry block
1336
1337 {
1338 const unsigned SUBOpc =
1339 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
1340 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1342 .addImm(StackProbeSize)
1344
1345 BuildMI(headMBB, DL,
1346 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1348 .addReg(FinalStackProbed)
1350
1351 // jump to the footer if StackPtr < FinalStackProbed
1352 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1353 .addMBB(footMBB)
1356
1357 headMBB->addSuccessor(bodyMBB);
1358 headMBB->addSuccessor(footMBB);
1359 }
1360
1361 // setup loop body
1362 {
1363 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1365 StackPtr, false, 0)
1366 .addImm(0)
1368
1369 const unsigned SUBOpc =
1370 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
1371 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1373 .addImm(StackProbeSize)
1375
1376 // cmp with stack pointer bound
1377 BuildMI(bodyMBB, DL,
1378 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1379 .addReg(FinalStackProbed)
1382
1383 // jump back while FinalStackProbed < StackPtr
1384 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1385 .addMBB(bodyMBB)
1388 bodyMBB->addSuccessor(bodyMBB);
1389 bodyMBB->addSuccessor(footMBB);
1390 }
1391
1392 // setup loop footer
1393 {
1394 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1395 .addReg(FinalStackProbed)
1397 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1399 StackPtr, false, 0)
1400 .addImm(0)
1402 footMBB->addSuccessor(&MBB);
1403 }
1404
1405 recomputeLiveIns(*headMBB);
1406 recomputeLiveIns(*bodyMBB);
1407 recomputeLiveIns(*footMBB);
1409 }
1410 } else {
1411 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1412 .addReg(Reg)
1413 .addImm(Val)
1415
1416 // The EFLAGS implicit def is dead.
1417 MI->getOperand(3).setIsDead();
1418 }
1419}
1420
1422 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1423 // clobbered by any interrupt handler.
1424 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1425 "MF used frame lowering for wrong subtarget");
1426 const Function &Fn = MF.getFunction();
1427 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1428 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1429}
1430
1431/// Return true if we need to use the restricted Windows x64 prologue and
1432/// epilogue code patterns that can be described with WinCFI (.seh_*
1433/// directives).
1434bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1435 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1436}
1437
1438bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1439 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1440}
1441
1442/// emitPrologue - Push callee-saved registers onto the stack, which
1443/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1444/// space for local variables. Also emit labels used by the exception handler to
1445/// generate the exception handling frames.
1446
1447/*
1448 Here's a gist of what gets emitted:
1449
1450 ; Establish frame pointer, if needed
1451 [if needs FP]
1452 push %rbp
1453 .cfi_def_cfa_offset 16
1454 .cfi_offset %rbp, -16
1455 .seh_pushreg %rpb
1456 mov %rsp, %rbp
1457 .cfi_def_cfa_register %rbp
1458
1459 ; Spill general-purpose registers
1460 [for all callee-saved GPRs]
1461 pushq %<reg>
1462 [if not needs FP]
1463 .cfi_def_cfa_offset (offset from RETADDR)
1464 .seh_pushreg %<reg>
1465
1466 ; If the required stack alignment > default stack alignment
1467 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1468 ; of unknown size in the stack frame.
1469 [if stack needs re-alignment]
1470 and $MASK, %rsp
1471
1472 ; Allocate space for locals
1473 [if target is Windows and allocated space > 4096 bytes]
1474 ; Windows needs special care for allocations larger
1475 ; than one page.
1476 mov $NNN, %rax
1477 call ___chkstk_ms/___chkstk
1478 sub %rax, %rsp
1479 [else]
1480 sub $NNN, %rsp
1481
1482 [if needs FP]
1483 .seh_stackalloc (size of XMM spill slots)
1484 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1485 [else]
1486 .seh_stackalloc NNN
1487
1488 ; Spill XMMs
1489 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1490 ; they may get spilled on any platform, if the current function
1491 ; calls @llvm.eh.unwind.init
1492 [if needs FP]
1493 [for all callee-saved XMM registers]
1494 movaps %<xmm reg>, -MMM(%rbp)
1495 [for all callee-saved XMM registers]
1496 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1497 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1498 [else]
1499 [for all callee-saved XMM registers]
1500 movaps %<xmm reg>, KKK(%rsp)
1501 [for all callee-saved XMM registers]
1502 .seh_savexmm %<xmm reg>, KKK
1503
1504 .seh_endprologue
1505
1506 [if needs base pointer]
1507 mov %rsp, %rbx
1508 [if needs to restore base pointer]
1509 mov %rsp, -MMM(%rbp)
1510
1511 ; Emit CFI info
1512 [if needs FP]
1513 [for all callee-saved registers]
1514 .cfi_offset %<reg>, (offset from %rbp)
1515 [else]
1516 .cfi_def_cfa_offset (offset from RETADDR)
1517 [for all callee-saved registers]
1518 .cfi_offset %<reg>, (offset from %rsp)
1519
1520 Notes:
1521 - .seh directives are emitted only for Windows 64 ABI
1522 - .cv_fpo directives are emitted on win32 when emitting CodeView
1523 - .cfi directives are emitted for all other ABIs
1524 - for 32-bit code, substitute %e?? registers for %r??
1525*/
1526
1528 MachineBasicBlock &MBB) const {
1529 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1530 "MF used frame lowering for wrong subtarget");
1532 MachineFrameInfo &MFI = MF.getFrameInfo();
1533 const Function &Fn = MF.getFunction();
1534 MachineModuleInfo &MMI = MF.getMMI();
1536 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1537 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1538 bool IsFunclet = MBB.isEHFuncletEntry();
1540 if (Fn.hasPersonalityFn())
1541 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1542 bool FnHasClrFunclet =
1543 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1544 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1545 bool HasFP = hasFP(MF);
1546 bool IsWin64Prologue = isWin64Prologue(MF);
1547 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1548 // FIXME: Emit FPO data for EH funclets.
1549 bool NeedsWinFPO =
1550 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
1551 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1552 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1554 const Register MachineFramePtr =
1557 Register BasePtr = TRI->getBaseRegister();
1558 bool HasWinCFI = false;
1559
1560 // Debug location must be unknown since the first debug location is used
1561 // to determine the end of the prologue.
1562 DebugLoc DL;
1563 Register ArgBaseReg;
1564
1565 // Emit extra prolog for argument stack slot reference.
1566 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1567 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1568 // Creat extra prolog for stack realignment.
1569 ArgBaseReg = MI->getOperand(0).getReg();
1570 // leal 4(%esp), %basereg
1571 // .cfi_def_cfa %basereg, 0
1572 // andl $-128, %esp
1573 // pushl -4(%basereg)
1574 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1575 ArgBaseReg)
1577 .addImm(1)
1578 .addUse(X86::NoRegister)
1580 .addUse(X86::NoRegister)
1582 if (NeedsDwarfCFI) {
1583 // .cfi_def_cfa %basereg, 0
1584 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1585 BuildCFI(MBB, MBBI, DL,
1586 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1588 }
1589 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1590 int64_t Offset = -(int64_t)SlotSize;
1591 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm))
1592 .addReg(ArgBaseReg)
1593 .addImm(1)
1594 .addReg(X86::NoRegister)
1595 .addImm(Offset)
1596 .addReg(X86::NoRegister)
1598 }
1599
1600 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1601 // tail call.
1602 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1603 if (TailCallArgReserveSize && IsWin64Prologue)
1604 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1605
1606 const bool EmitStackProbeCall =
1608 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1609
1610 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1614 // The special symbol below is absolute and has a *value* suitable to be
1615 // combined with the frame pointer directly.
1616 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1617 .addUse(MachineFramePtr)
1618 .addUse(X86::RIP)
1619 .addImm(1)
1620 .addUse(X86::NoRegister)
1621 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1623 .addUse(X86::NoRegister);
1624 break;
1625 }
1626 [[fallthrough]];
1627
1629 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1630 .addUse(MachineFramePtr)
1631 .addImm(60)
1633 break;
1634
1636 break;
1637 }
1638 }
1639
1640 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1641 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1642 // stack alignment.
1644 Fn.arg_size() == 2) {
1645 StackSize += 8;
1646 MFI.setStackSize(StackSize);
1647 emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
1648 }
1649
1650 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1651 // function, and use up to 128 bytes of stack space, don't have a frame
1652 // pointer, calls, or dynamic alloca then we do not need to adjust the
1653 // stack pointer (we fit in the Red Zone). We also check that we don't
1654 // push and pop from the stack.
1655 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1656 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1657 !MFI.adjustsStack() && // No calls.
1658 !EmitStackProbeCall && // No stack probes.
1659 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1660 !MF.shouldSplitStack()) { // Regular stack
1661 uint64_t MinSize =
1663 if (HasFP) MinSize += SlotSize;
1664 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1665 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1666 MFI.setStackSize(StackSize);
1667 }
1668
1669 // Insert stack pointer adjustment for later moving of return addr. Only
1670 // applies to tail call optimized functions where the callee argument stack
1671 // size is bigger than the callers.
1672 if (TailCallArgReserveSize != 0) {
1673 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1674 /*InEpilogue=*/false)
1676 }
1677
1678 // Mapping for machine moves:
1679 //
1680 // DST: VirtualFP AND
1681 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1682 // ELSE => DW_CFA_def_cfa
1683 //
1684 // SRC: VirtualFP AND
1685 // DST: Register => DW_CFA_def_cfa_register
1686 //
1687 // ELSE
1688 // OFFSET < 0 => DW_CFA_offset_extended_sf
1689 // REG < 64 => DW_CFA_offset + Reg
1690 // ELSE => DW_CFA_offset_extended
1691
1692 uint64_t NumBytes = 0;
1693 int stackGrowth = -SlotSize;
1694
1695 // Find the funclet establisher parameter
1696 Register Establisher = X86::NoRegister;
1697 if (IsClrFunclet)
1698 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1699 else if (IsFunclet)
1700 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1701
1702 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1703 // Immediately spill establisher into the home slot.
1704 // The runtime cares about this.
1705 // MOV64mr %rdx, 16(%rsp)
1706 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1707 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1708 .addReg(Establisher)
1710 MBB.addLiveIn(Establisher);
1711 }
1712
1713 if (HasFP) {
1714 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1715
1716 // Calculate required stack adjustment.
1717 uint64_t FrameSize = StackSize - SlotSize;
1718 NumBytes = FrameSize -
1719 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1720
1721 // Callee-saved registers are pushed on stack before the stack is realigned.
1722 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1723 NumBytes = alignTo(NumBytes, MaxAlign);
1724
1725 // Save EBP/RBP into the appropriate stack slot.
1726 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
1727 .addReg(MachineFramePtr, RegState::Kill)
1729
1730 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1731 // Mark the place where EBP/RBP was saved.
1732 // Define the current CFA rule to use the provided offset.
1733 assert(StackSize);
1734 BuildCFI(MBB, MBBI, DL,
1736 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1738
1739 // Change the rule for the FramePtr to be an "offset" rule.
1740 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1741 BuildCFI(MBB, MBBI, DL,
1742 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1743 2 * stackGrowth -
1744 (int)TailCallArgReserveSize),
1746 }
1747
1748 if (NeedsWinCFI) {
1749 HasWinCFI = true;
1750 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1753 }
1754
1755 if (!IsFunclet) {
1756 if (X86FI->hasSwiftAsyncContext()) {
1757 const auto &Attrs = MF.getFunction().getAttributes();
1758
1759 // Before we update the live frame pointer we have to ensure there's a
1760 // valid (or null) asynchronous context in its slot just before FP in
1761 // the frame record, so store it now.
1762 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1763 // We have an initial context in r14, store it just before the frame
1764 // pointer.
1765 MBB.addLiveIn(X86::R14);
1766 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1767 .addReg(X86::R14)
1769 } else {
1770 // No initial context, store null so that there's no pointer that
1771 // could be misused.
1772 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
1773 .addImm(0)
1775 }
1776
1777 if (NeedsWinCFI) {
1778 HasWinCFI = true;
1779 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1780 .addImm(X86::R14)
1782 }
1783
1784 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1785 .addUse(X86::RSP)
1786 .addImm(1)
1787 .addUse(X86::NoRegister)
1788 .addImm(8)
1789 .addUse(X86::NoRegister)
1791 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
1792 .addUse(X86::RSP)
1793 .addImm(8)
1795 }
1796
1797 if (!IsWin64Prologue && !IsFunclet) {
1798 // Update EBP with the new base value.
1799 if (!X86FI->hasSwiftAsyncContext())
1800 BuildMI(MBB, MBBI, DL,
1801 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1802 FramePtr)
1805
1806 if (NeedsDwarfCFI) {
1807 if (ArgBaseReg.isValid()) {
1808 SmallString<64> CfaExpr;
1809 CfaExpr.push_back(dwarf::DW_CFA_expression);
1810 uint8_t buffer[16];
1811 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1812 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1813 CfaExpr.push_back(2);
1814 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1815 CfaExpr.push_back(0);
1816 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1817 BuildCFI(MBB, MBBI, DL,
1818 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1820 } else {
1821 // Mark effective beginning of when frame pointer becomes valid.
1822 // Define the current CFA to use the EBP/RBP register.
1823 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1824 BuildCFI(
1825 MBB, MBBI, DL,
1826 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1828 }
1829 }
1830
1831 if (NeedsWinFPO) {
1832 // .cv_fpo_setframe $FramePtr
1833 HasWinCFI = true;
1834 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1836 .addImm(0)
1838 }
1839 }
1840 }
1841 } else {
1842 assert(!IsFunclet && "funclets without FPs not yet implemented");
1843 NumBytes = StackSize -
1844 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1845 }
1846
1847 // Update the offset adjustment, which is mainly used by codeview to translate
1848 // from ESP to VFRAME relative local variable offsets.
1849 if (!IsFunclet) {
1850 if (HasFP && TRI->hasStackRealignment(MF))
1851 MFI.setOffsetAdjustment(-NumBytes);
1852 else
1853 MFI.setOffsetAdjustment(-StackSize);
1854 }
1855
1856 // For EH funclets, only allocate enough space for outgoing calls. Save the
1857 // NumBytes value that we would've used for the parent frame.
1858 unsigned ParentFrameNumBytes = NumBytes;
1859 if (IsFunclet)
1860 NumBytes = getWinEHFuncletFrameSize(MF);
1861
1862 // Skip the callee-saved push instructions.
1863 bool PushedRegs = false;
1864 int StackOffset = 2 * stackGrowth;
1865
1866 while (MBBI != MBB.end() &&
1867 MBBI->getFlag(MachineInstr::FrameSetup) &&
1868 (MBBI->getOpcode() == X86::PUSH32r ||
1869 MBBI->getOpcode() == X86::PUSH64r)) {
1870 PushedRegs = true;
1871 Register Reg = MBBI->getOperand(0).getReg();
1872 ++MBBI;
1873
1874 if (!HasFP && NeedsDwarfCFI) {
1875 // Mark callee-saved push instruction.
1876 // Define the current CFA rule to use the provided offset.
1877 assert(StackSize);
1878 BuildCFI(MBB, MBBI, DL,
1881 StackOffset += stackGrowth;
1882 }
1883
1884 if (NeedsWinCFI) {
1885 HasWinCFI = true;
1886 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1887 .addImm(Reg)
1889 }
1890 }
1891
1892 // Realign stack after we pushed callee-saved registers (so that we'll be
1893 // able to calculate their offsets from the frame pointer).
1894 // Don't do this for Win64, it needs to realign the stack after the prologue.
1895 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
1896 !ArgBaseReg.isValid()) {
1897 assert(HasFP && "There should be a frame pointer if stack is realigned.");
1898 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1899
1900 if (NeedsWinCFI) {
1901 HasWinCFI = true;
1902 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1903 .addImm(MaxAlign)
1905 }
1906 }
1907
1908 // If there is an SUB32ri of ESP immediately before this instruction, merge
1909 // the two. This can be the case when tail call elimination is enabled and
1910 // the callee has more arguments then the caller.
1911 NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1912
1913 // Adjust stack pointer: ESP -= numbytes.
1914
1915 // Windows and cygwin/mingw require a prologue helper routine when allocating
1916 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1917 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1918 // stack and adjust the stack pointer in one go. The 64-bit version of
1919 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1920 // responsible for adjusting the stack pointer. Touching the stack at 4K
1921 // increments is necessary to ensure that the guard pages used by the OS
1922 // virtual memory manager are allocated in correct sequence.
1923 uint64_t AlignedNumBytes = NumBytes;
1924 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
1925 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1926 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
1927 assert(!X86FI->getUsesRedZone() &&
1928 "The Red Zone is not accounted for in stack probes");
1929
1930 // Check whether EAX is livein for this block.
1931 bool isEAXAlive = isEAXLiveIn(MBB);
1932
1933 if (isEAXAlive) {
1934 if (Is64Bit) {
1935 // Save RAX
1936 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1937 .addReg(X86::RAX, RegState::Kill)
1939 } else {
1940 // Save EAX
1941 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1942 .addReg(X86::EAX, RegState::Kill)
1944 }
1945 }
1946
1947 if (Is64Bit) {
1948 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1949 // Function prologue is responsible for adjusting the stack pointer.
1950 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1951 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
1952 .addImm(Alloc)
1954 } else {
1955 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1956 // We'll also use 4 already allocated bytes for EAX.
1957 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1958 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1960 }
1961
1962 // Call __chkstk, __chkstk_ms, or __alloca.
1963 emitStackProbe(MF, MBB, MBBI, DL, true);
1964
1965 if (isEAXAlive) {
1966 // Restore RAX/EAX
1968 if (Is64Bit)
1969 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
1970 StackPtr, false, NumBytes - 8);
1971 else
1972 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
1973 StackPtr, false, NumBytes - 4);
1974 MI->setFlag(MachineInstr::FrameSetup);
1975 MBB.insert(MBBI, MI);
1976 }
1977 } else if (NumBytes) {
1978 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
1979 }
1980
1981 if (NeedsWinCFI && NumBytes) {
1982 HasWinCFI = true;
1983 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
1984 .addImm(NumBytes)
1986 }
1987
1988 int SEHFrameOffset = 0;
1989 unsigned SPOrEstablisher;
1990 if (IsFunclet) {
1991 if (IsClrFunclet) {
1992 // The establisher parameter passed to a CLR funclet is actually a pointer
1993 // to the (mostly empty) frame of its nearest enclosing funclet; we have
1994 // to find the root function establisher frame by loading the PSPSym from
1995 // the intermediate frame.
1996 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1997 MachinePointerInfo NoInfo;
1998 MBB.addLiveIn(Establisher);
1999 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2000 Establisher, false, PSPSlotOffset)
2003 ;
2004 // Save the root establisher back into the current funclet's (mostly
2005 // empty) frame, in case a sub-funclet or the GC needs it.
2006 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2007 false, PSPSlotOffset)
2008 .addReg(Establisher)
2010 NoInfo,
2013 }
2014 SPOrEstablisher = Establisher;
2015 } else {
2016 SPOrEstablisher = StackPtr;
2017 }
2018
2019 if (IsWin64Prologue && HasFP) {
2020 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2021 // this calculation on the incoming establisher, which holds the value of
2022 // RSP from the parent frame at the end of the prologue.
2023 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2024 if (SEHFrameOffset)
2025 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2026 SPOrEstablisher, false, SEHFrameOffset);
2027 else
2028 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2029 .addReg(SPOrEstablisher);
2030
2031 // If this is not a funclet, emit the CFI describing our frame pointer.
2032 if (NeedsWinCFI && !IsFunclet) {
2033 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2034 HasWinCFI = true;
2035 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2037 .addImm(SEHFrameOffset)
2039 if (isAsynchronousEHPersonality(Personality))
2040 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2041 }
2042 } else if (IsFunclet && STI.is32Bit()) {
2043 // Reset EBP / ESI to something good for funclets.
2045 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2046 // into the registration node so that the runtime will restore it for us.
2047 if (!MBB.isCleanupFuncletEntry()) {
2048 assert(Personality == EHPersonality::MSVC_CXX);
2049 Register FrameReg;
2051 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2052 // ESP is the first field, so no extra displacement is needed.
2053 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2054 false, EHRegOffset)
2055 .addReg(X86::ESP);
2056 }
2057 }
2058
2059 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2060 const MachineInstr &FrameInstr = *MBBI;
2061 ++MBBI;
2062
2063 if (NeedsWinCFI) {
2064 int FI;
2065 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2066 if (X86::FR64RegClass.contains(Reg)) {
2067 int Offset;
2068 Register IgnoredFrameReg;
2069 if (IsWin64Prologue && IsFunclet)
2070 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2071 else
2072 Offset =
2073 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2074 SEHFrameOffset;
2075
2076 HasWinCFI = true;
2077 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2078 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2079 .addImm(Reg)
2080 .addImm(Offset)
2082 }
2083 }
2084 }
2085 }
2086
2087 if (NeedsWinCFI && HasWinCFI)
2088 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2090
2091 if (FnHasClrFunclet && !IsFunclet) {
2092 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2093 // immediately after the prolog) into the PSPSlot so that funclets
2094 // and the GC can recover it.
2095 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2096 auto PSPInfo = MachinePointerInfo::getFixedStack(
2098 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2099 PSPSlotOffset)
2104 }
2105
2106 // Realign stack after we spilled callee-saved registers (so that we'll be
2107 // able to calculate their offsets from the frame pointer).
2108 // Win64 requires aligning the stack after the prologue.
2109 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2110 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2111 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2112 }
2113
2114 // We already dealt with stack realignment and funclets above.
2115 if (IsFunclet && STI.is32Bit())
2116 return;
2117
2118 // If we need a base pointer, set it up here. It's whatever the value
2119 // of the stack pointer is at this point. Any variable size objects
2120 // will be allocated after this, so we can still use the base pointer
2121 // to reference locals.
2122 if (TRI->hasBasePointer(MF)) {
2123 // Update the base pointer with the current stack pointer.
2124 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2125 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2126 .addReg(SPOrEstablisher)
2128 if (X86FI->getRestoreBasePointer()) {
2129 // Stash value of base pointer. Saving RSP instead of EBP shortens
2130 // dependence chain. Used by SjLj EH.
2131 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2132 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
2133 FramePtr, true, X86FI->getRestoreBasePointerOffset())
2134 .addReg(SPOrEstablisher)
2136 }
2137
2138 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2139 // Stash the value of the frame pointer relative to the base pointer for
2140 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2141 // it recovers the frame pointer from the base pointer rather than the
2142 // other way around.
2143 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2144 Register UsedReg;
2145 int Offset =
2146 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2147 .getFixed();
2148 assert(UsedReg == BasePtr);
2149 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2152 }
2153 }
2154 if (ArgBaseReg.isValid()) {
2155 // Save argument base pointer.
2156 auto *MI = X86FI->getStackPtrSaveMI();
2157 int FI = MI->getOperand(1).getIndex();
2158 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2159 // movl %basereg, offset(%ebp)
2160 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2161 .addReg(ArgBaseReg)
2163 }
2164
2165 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2166 // Mark end of stack pointer adjustment.
2167 if (!HasFP && NumBytes) {
2168 // Define the current CFA rule to use the provided offset.
2169 assert(StackSize);
2170 BuildCFI(
2171 MBB, MBBI, DL,
2172 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2174 }
2175
2176 // Emit DWARF info specifying the offsets of the callee-saved registers.
2178 }
2179
2180 // X86 Interrupt handling function cannot assume anything about the direction
2181 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2182 // in each prologue of interrupt handler function.
2183 //
2184 // FIXME: Create "cld" instruction only in these cases:
2185 // 1. The interrupt handling function uses any of the "rep" instructions.
2186 // 2. Interrupt handling function calls another function.
2187 //
2189 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2191
2192 // At this point we know if the function has WinCFI or not.
2193 MF.setHasWinCFI(HasWinCFI);
2194}
2195
2197 const MachineFunction &MF) const {
2198 // We can't use LEA instructions for adjusting the stack pointer if we don't
2199 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2200 // to deallocate the stack.
2201 // This means that we can use LEA for SP in two situations:
2202 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2203 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2204 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
2205}
2206
2208 switch (MI.getOpcode()) {
2209 case X86::CATCHRET:
2210 case X86::CLEANUPRET:
2211 return true;
2212 default:
2213 return false;
2214 }
2215 llvm_unreachable("impossible");
2216}
2217
2218// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2219// stack. It holds a pointer to the bottom of the root function frame. The
2220// establisher frame pointer passed to a nested funclet may point to the
2221// (mostly empty) frame of its parent funclet, but it will need to find
2222// the frame of the root function to access locals. To facilitate this,
2223// every funclet copies the pointer to the bottom of the root function
2224// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2225// same offset for the PSPSym in the root function frame that's used in the
2226// funclets' frames allows each funclet to dynamically accept any ancestor
2227// frame as its establisher argument (the runtime doesn't guarantee the
2228// immediate parent for some reason lost to history), and also allows the GC,
2229// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2230// frame with only a single offset reported for the entire method.
2231unsigned
2232X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2233 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2234 Register SPReg;
2235 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2236 /*IgnoreSPUpdates*/ true)
2237 .getFixed();
2238 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2239 return static_cast<unsigned>(Offset);
2240}
2241
2242unsigned
2243X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2245 // This is the size of the pushed CSRs.
2246 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2247 // This is the size of callee saved XMMs.
2248 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2249 unsigned XMMSize = WinEHXMMSlotInfo.size() *
2250 TRI->getSpillSize(X86::VR128RegClass);
2251 // This is the amount of stack a funclet needs to allocate.
2252 unsigned UsedSize;
2253 EHPersonality Personality =
2255 if (Personality == EHPersonality::CoreCLR) {
2256 // CLR funclets need to hold enough space to include the PSPSym, at the
2257 // same offset from the stack pointer (immediately after the prolog) as it
2258 // resides at in the main function.
2259 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2260 } else {
2261 // Other funclets just need enough stack for outgoing call arguments.
2262 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2263 }
2264 // RBP is not included in the callee saved register block. After pushing RBP,
2265 // everything is 16 byte aligned. Everything we allocate before an outgoing
2266 // call must also be 16 byte aligned.
2267 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2268 // Subtract out the size of the callee saved registers. This is how much stack
2269 // each funclet will allocate.
2270 return FrameSizeMinusRBP + XMMSize - CSSize;
2271}
2272
2273static bool isTailCallOpcode(unsigned Opc) {
2274 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
2275 Opc == X86::TCRETURNmi ||
2276 Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
2277 Opc == X86::TCRETURNmi64;
2278}
2279
2281 MachineBasicBlock &MBB) const {
2282 const MachineFrameInfo &MFI = MF.getFrameInfo();
2285 MachineBasicBlock::iterator MBBI = Terminator;
2286 DebugLoc DL;
2287 if (MBBI != MBB.end())
2288 DL = MBBI->getDebugLoc();
2289 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2290 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2292 Register MachineFramePtr =
2293 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2294
2295 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2296 bool NeedsWin64CFI =
2297 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2298 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2299
2300 // Get the number of bytes to allocate from the FrameInfo.
2301 uint64_t StackSize = MFI.getStackSize();
2302 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2303 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2304 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2305 bool HasFP = hasFP(MF);
2306 uint64_t NumBytes = 0;
2307
2308 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2310 MF.needsFrameMoves();
2311
2312 Register ArgBaseReg;
2313 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2314 unsigned Opc = X86::LEA32r;
2315 Register StackReg = X86::ESP;
2316 ArgBaseReg = MI->getOperand(0).getReg();
2317 if (STI.is64Bit()) {
2318 Opc = X86::LEA64r;
2319 StackReg = X86::RSP;
2320 }
2321 // leal -4(%basereg), %esp
2322 // .cfi_def_cfa %esp, 4
2323 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2324 .addUse(ArgBaseReg)
2325 .addImm(1)
2326 .addUse(X86::NoRegister)
2327 .addImm(-(int64_t)SlotSize)
2328 .addUse(X86::NoRegister)
2330 if (NeedsDwarfCFI) {
2331 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2332 BuildCFI(MBB, MBBI, DL,
2333 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2335 --MBBI;
2336 }
2337 --MBBI;
2338 }
2339
2340 if (IsFunclet) {
2341 assert(HasFP && "EH funclets without FP not yet implemented");
2342 NumBytes = getWinEHFuncletFrameSize(MF);
2343 } else if (HasFP) {
2344 // Calculate required stack adjustment.
2345 uint64_t FrameSize = StackSize - SlotSize;
2346 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2347
2348 // Callee-saved registers were pushed on stack before the stack was
2349 // realigned.
2350 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2351 NumBytes = alignTo(FrameSize, MaxAlign);
2352 } else {
2353 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2354 }
2355 uint64_t SEHStackAllocAmt = NumBytes;
2356
2357 // AfterPop is the position to insert .cfi_restore.
2359 if (HasFP) {
2360 if (X86FI->hasSwiftAsyncContext()) {
2361 // Discard the context.
2362 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2363 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
2364 }
2365 // Pop EBP.
2366 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
2367 MachineFramePtr)
2369
2370 // We need to reset FP to its untagged state on return. Bit 60 is currently
2371 // used to show the presence of an extended frame.
2372 if (X86FI->hasSwiftAsyncContext()) {
2373 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
2374 MachineFramePtr)
2375 .addUse(MachineFramePtr)
2376 .addImm(60)
2378 }
2379
2380 if (NeedsDwarfCFI) {
2381 if (!ArgBaseReg.isValid()) {
2382 unsigned DwarfStackPtr =
2383 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2384 BuildCFI(MBB, MBBI, DL,
2385 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2387 }
2388 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2389 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2390 BuildCFI(MBB, AfterPop, DL,
2391 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2393 --MBBI;
2394 --AfterPop;
2395 }
2396 --MBBI;
2397 }
2398 }
2399
2400 MachineBasicBlock::iterator FirstCSPop = MBBI;
2401 // Skip the callee-saved pop instructions.
2402 while (MBBI != MBB.begin()) {
2403 MachineBasicBlock::iterator PI = std::prev(MBBI);
2404 unsigned Opc = PI->getOpcode();
2405
2406 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2407 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2408 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2409 (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2410 (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
2411 break;
2412 FirstCSPop = PI;
2413 }
2414
2415 --MBBI;
2416 }
2417 if (ArgBaseReg.isValid()) {
2418 // Restore argument base pointer.
2419 auto *MI = X86FI->getStackPtrSaveMI();
2420 int FI = MI->getOperand(1).getIndex();
2421 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2422 // movl offset(%ebp), %basereg
2423 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2425 }
2426 MBBI = FirstCSPop;
2427
2428 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2429 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2430
2431 if (MBBI != MBB.end())
2432 DL = MBBI->getDebugLoc();
2433 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2434 // instruction, merge the two instructions.
2435 if (NumBytes || MFI.hasVarSizedObjects())
2436 NumBytes += mergeSPUpdates(MBB, MBBI, true);
2437
2438 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2439 // slot before popping them off! Same applies for the case, when stack was
2440 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2441 // will not do realignment or dynamic stack allocation.
2442 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2443 !IsFunclet) {
2444 if (TRI->hasStackRealignment(MF))
2445 MBBI = FirstCSPop;
2446 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2447 uint64_t LEAAmount =
2448 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2449
2450 if (X86FI->hasSwiftAsyncContext())
2451 LEAAmount -= 16;
2452
2453 // There are only two legal forms of epilogue:
2454 // - add SEHAllocationSize, %rsp
2455 // - lea SEHAllocationSize(%FramePtr), %rsp
2456 //
2457 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2458 // However, we may use this sequence if we have a frame pointer because the
2459 // effects of the prologue can safely be undone.
2460 if (LEAAmount != 0) {
2461 unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
2462 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
2463 FramePtr, false, LEAAmount);
2464 --MBBI;
2465 } else {
2466 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2467 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
2468 .addReg(FramePtr);
2469 --MBBI;
2470 }
2471 } else if (NumBytes) {
2472 // Adjust stack pointer back: ESP += numbytes.
2473 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2474 if (!HasFP && NeedsDwarfCFI) {
2475 // Define the current CFA rule to use the provided offset.
2476 BuildCFI(MBB, MBBI, DL,
2478 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2480 }
2481 --MBBI;
2482 }
2483
2484 // Windows unwinder will not invoke function's exception handler if IP is
2485 // either in prologue or in epilogue. This behavior causes a problem when a
2486 // call immediately precedes an epilogue, because the return address points
2487 // into the epilogue. To cope with that, we insert an epilogue marker here,
2488 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2489 // final emitted code.
2490 if (NeedsWin64CFI && MF.hasWinCFI())
2491 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
2492
2493 if (!HasFP && NeedsDwarfCFI) {
2494 MBBI = FirstCSPop;
2495 int64_t Offset = -CSSize - SlotSize;
2496 // Mark callee-saved pop instruction.
2497 // Define the current CFA rule to use the provided offset.
2498 while (MBBI != MBB.end()) {
2500 unsigned Opc = PI->getOpcode();
2501 ++MBBI;
2502 if (Opc == X86::POP32r || Opc == X86::POP64r) {
2503 Offset += SlotSize;
2504 BuildCFI(MBB, MBBI, DL,
2507 }
2508 }
2509 }
2510
2511 // Emit DWARF info specifying the restores of the callee-saved registers.
2512 // For epilogue with return inside or being other block without successor,
2513 // no need to generate .cfi_restore for callee-saved registers.
2514 if (NeedsDwarfCFI && !MBB.succ_empty())
2515 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2516
2517 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2518 // Add the return addr area delta back since we are not tail calling.
2519 int Offset = -1 * X86FI->getTCReturnAddrDelta();
2520 assert(Offset >= 0 && "TCDelta should never be positive");
2521 if (Offset) {
2522 // Check for possible merge with preceding ADD instruction.
2523 Offset += mergeSPUpdates(MBB, Terminator, true);
2524 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2525 }
2526 }
2527
2528 // Emit tilerelease for AMX kernel.
2529 if (X86FI->hasVirtualTileReg())
2530 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2531}
2532
2534 int FI,
2535 Register &FrameReg) const {
2536 const MachineFrameInfo &MFI = MF.getFrameInfo();
2537
2538 bool IsFixed = MFI.isFixedObjectIndex(FI);
2539 // We can't calculate offset from frame pointer if the stack is realigned,
2540 // so enforce usage of stack/base pointer. The base pointer is used when we
2541 // have dynamic allocas in addition to dynamic realignment.
2542 if (TRI->hasBasePointer(MF))
2543 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2544 else if (TRI->hasStackRealignment(MF))
2545 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2546 else
2547 FrameReg = TRI->getFrameRegister(MF);
2548
2549 // Offset will hold the offset from the stack pointer at function entry to the
2550 // object.
2551 // We need to factor in additional offsets applied during the prologue to the
2552 // frame, base, and stack pointer depending on which is used.
2555 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2556 uint64_t StackSize = MFI.getStackSize();
2557 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2558 int64_t FPDelta = 0;
2559
2560 // In an x86 interrupt, remove the offset we added to account for the return
2561 // address from any stack object allocated in the caller's frame. Interrupts
2562 // do not have a standard return address. Fixed objects in the current frame,
2563 // such as SSE register spills, should not get this treatment.
2565 Offset >= 0) {
2567 }
2568
2569 if (IsWin64Prologue) {
2570 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2571
2572 // Calculate required stack adjustment.
2573 uint64_t FrameSize = StackSize - SlotSize;
2574 // If required, include space for extra hidden slot for stashing base pointer.
2575 if (X86FI->getRestoreBasePointer())
2576 FrameSize += SlotSize;
2577 uint64_t NumBytes = FrameSize - CSSize;
2578
2579 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2580 if (FI && FI == X86FI->getFAIndex())
2581 return StackOffset::getFixed(-SEHFrameOffset);
2582
2583 // FPDelta is the offset from the "traditional" FP location of the old base
2584 // pointer followed by return address and the location required by the
2585 // restricted Win64 prologue.
2586 // Add FPDelta to all offsets below that go through the frame pointer.
2587 FPDelta = FrameSize - SEHFrameOffset;
2588 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2589 "FPDelta isn't aligned per the Win64 ABI!");
2590 }
2591
2592 if (FrameReg == TRI->getFramePtr()) {
2593 // Skip saved EBP/RBP
2594 Offset += SlotSize;
2595
2596 // Account for restricted Windows prologue.
2597 Offset += FPDelta;
2598
2599 // Skip the RETADDR move area
2600 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2601 if (TailCallReturnAddrDelta < 0)
2602 Offset -= TailCallReturnAddrDelta;
2603
2605 }
2606
2607 // FrameReg is either the stack pointer or a base pointer. But the base is
2608 // located at the end of the statically known StackSize so the distinction
2609 // doesn't really matter.
2610 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2611 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2612 return StackOffset::getFixed(Offset + StackSize);
2613}
2614
2616 Register &FrameReg) const {
2617 const MachineFrameInfo &MFI = MF.getFrameInfo();
2619 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2620 const auto it = WinEHXMMSlotInfo.find(FI);
2621
2622 if (it == WinEHXMMSlotInfo.end())
2623 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2624
2625 FrameReg = TRI->getStackRegister();
2627 it->second;
2628}
2629
2632 Register &FrameReg,
2633 int Adjustment) const {
2634 const MachineFrameInfo &MFI = MF.getFrameInfo();
2635 FrameReg = TRI->getStackRegister();
2636 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2637 getOffsetOfLocalArea() + Adjustment);
2638}
2639
2642 int FI, Register &FrameReg,
2643 bool IgnoreSPUpdates) const {
2644
2645 const MachineFrameInfo &MFI = MF.getFrameInfo();
2646 // Does not include any dynamic realign.
2647 const uint64_t StackSize = MFI.getStackSize();
2648 // LLVM arranges the stack as follows:
2649 // ...
2650 // ARG2
2651 // ARG1
2652 // RETADDR
2653 // PUSH RBP <-- RBP points here
2654 // PUSH CSRs
2655 // ~~~~~~~ <-- possible stack realignment (non-win64)
2656 // ...
2657 // STACK OBJECTS
2658 // ... <-- RSP after prologue points here
2659 // ~~~~~~~ <-- possible stack realignment (win64)
2660 //
2661 // if (hasVarSizedObjects()):
2662 // ... <-- "base pointer" (ESI/RBX) points here
2663 // DYNAMIC ALLOCAS
2664 // ... <-- RSP points here
2665 //
2666 // Case 1: In the simple case of no stack realignment and no dynamic
2667 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2668 // with fixed offsets from RSP.
2669 //
2670 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2671 // stack objects are addressed with RBP and regular stack objects with RSP.
2672 //
2673 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2674 // to address stack arguments for outgoing calls and nothing else. The "base
2675 // pointer" points to local variables, and RBP points to fixed objects.
2676 //
2677 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2678 // answer we give is relative to the SP after the prologue, and not the
2679 // SP in the middle of the function.
2680
2681 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2682 !STI.isTargetWin64())
2683 return getFrameIndexReference(MF, FI, FrameReg);
2684
2685 // If !hasReservedCallFrame the function might have SP adjustement in the
2686 // body. So, even though the offset is statically known, it depends on where
2687 // we are in the function.
2688 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2689 return getFrameIndexReference(MF, FI, FrameReg);
2690
2691 // We don't handle tail calls, and shouldn't be seeing them either.
2693 "we don't handle this case!");
2694
2695 // This is how the math works out:
2696 //
2697 // %rsp grows (i.e. gets lower) left to right. Each box below is
2698 // one word (eight bytes). Obj0 is the stack slot we're trying to
2699 // get to.
2700 //
2701 // ----------------------------------
2702 // | BP | Obj0 | Obj1 | ... | ObjN |
2703 // ----------------------------------
2704 // ^ ^ ^ ^
2705 // A B C E
2706 //
2707 // A is the incoming stack pointer.
2708 // (B - A) is the local area offset (-8 for x86-64) [1]
2709 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2710 //
2711 // |(E - B)| is the StackSize (absolute value, positive). For a
2712 // stack that grown down, this works out to be (B - E). [3]
2713 //
2714 // E is also the value of %rsp after stack has been set up, and we
2715 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2716 // (C - E) == (C - A) - (B - A) + (B - E)
2717 // { Using [1], [2] and [3] above }
2718 // == getObjectOffset - LocalAreaOffset + StackSize
2719
2720 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2721}
2722
2725 std::vector<CalleeSavedInfo> &CSI) const {
2726 MachineFrameInfo &MFI = MF.getFrameInfo();
2728
2729 unsigned CalleeSavedFrameSize = 0;
2730 unsigned XMMCalleeSavedFrameSize = 0;
2731 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2732 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
2733
2734 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2735
2736 if (TailCallReturnAddrDelta < 0) {
2737 // create RETURNADDR area
2738 // arg
2739 // arg
2740 // RETADDR
2741 // { ...
2742 // RETADDR area
2743 // ...
2744 // }
2745 // [EBP]
2746 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
2747 TailCallReturnAddrDelta - SlotSize, true);
2748 }
2749
2750 // Spill the BasePtr if it's used.
2751 if (this->TRI->hasBasePointer(MF)) {
2752 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2753 if (MF.hasEHFunclets()) {
2755 X86FI->setHasSEHFramePtrSave(true);
2756 X86FI->setSEHFramePtrSaveIndex(FI);
2757 }
2758 }
2759
2760 if (hasFP(MF)) {
2761 // emitPrologue always spills frame register the first thing.
2762 SpillSlotOffset -= SlotSize;
2763 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2764
2765 // The async context lives directly before the frame pointer, and we
2766 // allocate a second slot to preserve stack alignment.
2767 if (X86FI->hasSwiftAsyncContext()) {
2768 SpillSlotOffset -= SlotSize;
2769 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2770 SpillSlotOffset -= SlotSize;
2771 }
2772
2773 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2774 // the frame register, we can delete it from CSI list and not have to worry
2775 // about avoiding it later.
2776 Register FPReg = TRI->getFrameRegister(MF);
2777 for (unsigned i = 0; i < CSI.size(); ++i) {
2778 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
2779 CSI.erase(CSI.begin() + i);
2780 break;
2781 }
2782 }
2783 }
2784
2785 // Assign slots for GPRs. It increases frame size.
2786 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2787 Register Reg = I.getReg();
2788
2789 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2790 continue;
2791
2792 SpillSlotOffset -= SlotSize;
2793 CalleeSavedFrameSize += SlotSize;
2794
2795 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2796 I.setFrameIdx(SlotIndex);
2797 }
2798
2799 // Adjust the offset of spill slot as we know the accurate callee saved frame
2800 // size.
2801 if (X86FI->getRestoreBasePointer()) {
2802 SpillSlotOffset -= SlotSize;
2803 CalleeSavedFrameSize += SlotSize;
2804
2805 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2806 // TODO: saving the slot index is better?
2807 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
2808 }
2809 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2810 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2811
2812 // Assign slots for XMMs.
2813 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
2814 Register Reg = I.getReg();
2815 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2816 continue;
2817
2818 // If this is k-register make sure we lookup via the largest legal type.
2819 MVT VT = MVT::Other;
2820 if (X86::VK16RegClass.contains(Reg))
2821 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2822
2823 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2824 unsigned Size = TRI->getSpillSize(*RC);
2825 Align Alignment = TRI->getSpillAlign(*RC);
2826 // ensure alignment
2827 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2828 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
2829
2830 // spill into slot
2831 SpillSlotOffset -= Size;
2832 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2833 I.setFrameIdx(SlotIndex);
2834 MFI.ensureMaxAlignment(Alignment);
2835
2836 // Save the start offset and size of XMM in stack frame for funclets.
2837 if (X86::VR128RegClass.contains(Reg)) {
2838 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2839 XMMCalleeSavedFrameSize += Size;
2840 }
2841 }
2842
2843 return true;
2844}
2845
2850
2851 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2852 // for us, and there are no XMM CSRs on Win32.
2853 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2854 return true;
2855
2856 // Push GPRs. It increases frame size.
2857 const MachineFunction &MF = *MBB.getParent();
2858 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2859 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2860 Register Reg = I.getReg();
2861
2862 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2863 continue;
2864
2865 const MachineRegisterInfo &MRI = MF.getRegInfo();
2866 bool isLiveIn = MRI.isLiveIn(Reg);
2867 if (!isLiveIn)
2868 MBB.addLiveIn(Reg);
2869
2870 // Decide whether we can add a kill flag to the use.
2871 bool CanKill = !isLiveIn;
2872 // Check if any subregister is live-in
2873 if (CanKill) {
2874 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
2875 if (MRI.isLiveIn(*AReg)) {
2876 CanKill = false;
2877 break;
2878 }
2879 }
2880 }
2881
2882 // Do not set a kill flag on values that are also marked as live-in. This
2883 // happens with the @llvm-returnaddress intrinsic and with arguments
2884 // passed in callee saved registers.
2885 // Omitting the kill flags is conservatively correct even if the live-in
2886 // is not used after all.
2887 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
2889 }
2890
2892 if (X86FI->getRestoreBasePointer()) {
2893 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2894 Register BaseReg = this->TRI->getBaseRegister();
2895 BuildMI(MBB, MI, DL, TII.get(Opc))
2896 .addReg(BaseReg, getKillRegState(true))
2898 }
2899
2900 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
2901 // It can be done by spilling XMMs to stack frame.
2902 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
2903 Register Reg = I.getReg();
2904 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2905 continue;
2906
2907 // If this is k-register make sure we lookup via the largest legal type.
2908 MVT VT = MVT::Other;
2909 if (X86::VK16RegClass.contains(Reg))
2910 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2911
2912 // Add the callee-saved register as live-in. It's killed at the spill.
2913 MBB.addLiveIn(Reg);
2914 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2915
2916 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
2917 Register());
2918 --MI;
2919 MI->setFlag(MachineInstr::FrameSetup);
2920 ++MI;
2921 }
2922
2923 return true;
2924}
2925
2926void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
2928 MachineInstr *CatchRet) const {
2929 // SEH shouldn't use catchret.
2932 "SEH should not use CATCHRET");
2933 const DebugLoc &DL = CatchRet->getDebugLoc();
2934 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
2935
2936 // Fill EAX/RAX with the address of the target block.
2937 if (STI.is64Bit()) {
2938 // LEA64r CatchRetTarget(%rip), %rax
2939 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
2940 .addReg(X86::RIP)
2941 .addImm(0)
2942 .addReg(0)
2943 .addMBB(CatchRetTarget)
2944 .addReg(0);
2945 } else {
2946 // MOV32ri $CatchRetTarget, %eax
2947 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2948 .addMBB(CatchRetTarget);
2949 }
2950
2951 // Record that we've taken the address of CatchRetTarget and no longer just
2952 // reference it in a terminator.
2953 CatchRetTarget->setMachineBlockAddressTaken();
2954}
2955
2959 if (CSI.empty())
2960 return false;
2961
2962 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
2963 // Don't restore CSRs in 32-bit EH funclets. Matches
2964 // spillCalleeSavedRegisters.
2965 if (STI.is32Bit())
2966 return true;
2967 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
2968 // funclets. emitEpilogue transforms these to normal jumps.
2969 if (MI->getOpcode() == X86::CATCHRET) {
2970 const Function &F = MBB.getParent()->getFunction();
2971 bool IsSEH = isAsynchronousEHPersonality(
2972 classifyEHPersonality(F.getPersonalityFn()));
2973 if (IsSEH)
2974 return true;
2975 }
2976 }
2977
2979
2980 // Reload XMMs from stack frame.
2981 for (const CalleeSavedInfo &I : CSI) {
2982 Register Reg = I.getReg();
2983 if (X86::GR64RegClass.contains(Reg) ||
2984 X86::GR32RegClass.contains(Reg))
2985 continue;
2986
2987 // If this is k-register make sure we lookup via the largest legal type.
2988 MVT VT = MVT::Other;
2989 if (X86::VK16RegClass.contains(Reg))
2990 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2991
2992 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2993 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
2994 Register());
2995 }
2996
2997 // Clear the stack slot for spill base pointer register.
2998 MachineFunction &MF = *MBB.getParent();
3000 if (X86FI->getRestoreBasePointer()) {
3001 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3002 Register BaseReg = this->TRI->getBaseRegister();
3003 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3005 }
3006
3007 // POP GPRs.
3008 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3009 for (const CalleeSavedInfo &I : CSI) {
3010 Register Reg = I.getReg();
3011 if (!X86::GR64RegClass.contains(Reg) &&
3012 !X86::GR32RegClass.contains(Reg))
3013 continue;
3014
3015 BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
3017 }
3018 return true;
3019}
3020
3022 BitVector &SavedRegs,
3023 RegScavenger *RS) const {
3025
3026 // Spill the BasePtr if it's used.
3027 if (TRI->hasBasePointer(MF)){
3028 Register BasePtr = TRI->getBaseRegister();
3029 if (STI.isTarget64BitILP32())
3030 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3031 SavedRegs.set(BasePtr);
3032 }
3033}
3034
3035static bool
3037 const Function &F = MF->getFunction();
3038 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
3039 I != E; I++) {
3040 if (I->hasNestAttr() && !I->use_empty())
3041 return true;
3042 }
3043 return false;
3044}
3045
3046/// GetScratchRegister - Get a temp register for performing work in the
3047/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3048/// and the properties of the function either one or two registers will be
3049/// needed. Set primary to true for the first register, false for the second.
3050static unsigned
3051GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
3053
3054 // Erlang stuff.
3056 if (Is64Bit)
3057 return Primary ? X86::R14 : X86::R13;
3058 else
3059 return Primary ? X86::EBX : X86::EDI;
3060 }
3061
3062 if (Is64Bit) {
3063 if (IsLP64)
3064 return Primary ? X86::R11 : X86::R12;
3065 else
3066 return Primary ? X86::R11D : X86::R12D;
3067 }
3068
3069 bool IsNested = HasNestArgument(&MF);
3070
3074 if (IsNested)
3075 report_fatal_error("Segmented stacks does not support fastcall with "
3076 "nested function.");
3077 return Primary ? X86::EAX : X86::ECX;
3078 }
3079 if (IsNested)
3080 return Primary ? X86::EDX : X86::EAX;
3081 return Primary ? X86::ECX : X86::EAX;
3082}
3083
3084// The stack limit in the TCB is set to this many bytes above the actual stack
3085// limit.
3087
3089 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3090 MachineFrameInfo &MFI = MF.getFrameInfo();
3091 uint64_t StackSize;
3092 unsigned TlsReg, TlsOffset;
3093 DebugLoc DL;
3094
3095 // To support shrink-wrapping we would need to insert the new blocks
3096 // at the right place and update the branches to PrologueMBB.
3097 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3098
3099 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3100 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3101 "Scratch register is live-in");
3102
3103 if (MF.getFunction().isVarArg())
3104 report_fatal_error("Segmented stacks do not support vararg functions.");
3105 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3108 report_fatal_error("Segmented stacks not supported on this platform.");
3109
3110 // Eventually StackSize will be calculated by a link-time pass; which will
3111 // also decide whether checking code needs to be injected into this particular
3112 // prologue.
3113 StackSize = MFI.getStackSize();
3114
3115 if (!MFI.needsSplitStackProlog())
3116 return;
3117
3121 bool IsNested = false;
3122
3123 // We need to know if the function has a nest argument only in 64 bit mode.
3124 if (Is64Bit)
3125 IsNested = HasNestArgument(&MF);
3126
3127 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3128 // allocMBB needs to be last (terminating) instruction.
3129
3130 for (const auto &LI : PrologueMBB.liveins()) {
3131 allocMBB->addLiveIn(LI);
3132 checkMBB->addLiveIn(LI);
3133 }
3134
3135 if (IsNested)
3136 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3137
3138 MF.push_front(allocMBB);
3139 MF.push_front(checkMBB);
3140
3141 // When the frame size is less than 256 we just compare the stack
3142 // boundary directly to the value of the stack pointer, per gcc.
3143 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3144
3145 // Read the limit off the current stacklet off the stack_guard location.
3146 if (Is64Bit) {
3147 if (STI.isTargetLinux()) {
3148 TlsReg = X86::FS;
3149 TlsOffset = IsLP64 ? 0x70 : 0x40;
3150 } else if (STI.isTargetDarwin()) {
3151 TlsReg = X86::GS;
3152 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
3153 } else if (STI.isTargetWin64()) {
3154 TlsReg = X86::GS;
3155 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3156 } else if (STI.isTargetFreeBSD()) {
3157 TlsReg = X86::FS;
3158 TlsOffset = 0x18;
3159 } else if (STI.isTargetDragonFly()) {
3160 TlsReg = X86::FS;
3161 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3162 } else {
3163 report_fatal_error("Segmented stacks not supported on this platform.");
3164 }
3165
3166 if (CompareStackPointer)
3167 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3168 else
3169 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
3170 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
3171
3172 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
3173 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
3174 } else {
3175 if (STI.isTargetLinux()) {
3176 TlsReg = X86::GS;
3177 TlsOffset = 0x30;
3178 } else if (STI.isTargetDarwin()) {
3179 TlsReg = X86::GS;
3180 TlsOffset = 0x48 + 90*4;
3181 } else if (STI.isTargetWin32()) {
3182 TlsReg = X86::FS;
3183 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3184 } else if (STI.isTargetDragonFly()) {
3185 TlsReg = X86::FS;
3186 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3187 } else if (STI.isTargetFreeBSD()) {
3188 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3189 } else {
3190 report_fatal_error("Segmented stacks not supported on this platform.");
3191 }
3192
3193 if (CompareStackPointer)
3194 ScratchReg = X86::ESP;
3195 else
3196 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
3197 .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
3198
3201 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
3202 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
3203 } else if (STI.isTargetDarwin()) {
3204
3205 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3206 unsigned ScratchReg2;
3207 bool SaveScratch2;
3208 if (CompareStackPointer) {
3209 // The primary scratch register is available for holding the TLS offset.
3210 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3211 SaveScratch2 = false;
3212 } else {
3213 // Need to use a second register to hold the TLS offset
3214 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3215
3216 // Unfortunately, with fastcc the second scratch register may hold an
3217 // argument.
3218 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3219 }
3220
3221 // If Scratch2 is live-in then it needs to be saved.
3222 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3223 "Scratch register is live-in and not saved");
3224
3225 if (SaveScratch2)
3226 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3227 .addReg(ScratchReg2, RegState::Kill);
3228
3229 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3230 .addImm(TlsOffset);
3231 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3232 .addReg(ScratchReg)
3233 .addReg(ScratchReg2).addImm(1).addReg(0)
3234 .addImm(0)
3235 .addReg(TlsReg);
3236
3237 if (SaveScratch2)
3238 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3239 }
3240 }
3241
3242 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3243 // It jumps to normal execution of the function body.
3244 BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
3245
3246 // On 32 bit we first push the arguments size and then the frame size. On 64
3247 // bit, we pass the stack frame size in r10 and the argument size in r11.
3248 if (Is64Bit) {
3249 // Functions with nested arguments use R10, so it needs to be saved across
3250 // the call to _morestack
3251
3252 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3253 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3254 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3255 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3256
3257 if (IsNested)
3258 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3259
3260 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3261 .addImm(StackSize);
3262 BuildMI(allocMBB, DL,
3264 Reg11)
3265 .addImm(X86FI->getArgumentStackSize());
3266 } else {
3267 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
3268 .addImm(X86FI->getArgumentStackSize());
3269 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
3270 .addImm(StackSize);
3271 }
3272
3273 // __morestack is in libgcc
3275 // Under the large code model, we cannot assume that __morestack lives
3276 // within 2^31 bytes of the call site, so we cannot use pc-relative
3277 // addressing. We cannot perform the call via a temporary register,
3278 // as the rax register may be used to store the static chain, and all
3279 // other suitable registers may be either callee-save or used for
3280 // parameter passing. We cannot use the stack at this point either
3281 // because __morestack manipulates the stack directly.
3282 //
3283 // To avoid these issues, perform an indirect call via a read-only memory
3284 // location containing the address.
3285 //
3286 // This solution is not perfect, as it assumes that the .rodata section
3287 // is laid out within 2^31 bytes of each function body, but this seems
3288 // to be sufficient for JIT.
3289 // FIXME: Add retpoline support and remove the error here..
3291 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3292 "code model and thunks not yet implemented.");
3293 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3294 .addReg(X86::RIP)
3295 .addImm(0)
3296 .addReg(0)
3297 .addExternalSymbol("__morestack_addr")
3298 .addReg(0);
3299 } else {
3300 if (Is64Bit)
3301 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3302 .addExternalSymbol("__morestack");
3303 else
3304 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3305 .addExternalSymbol("__morestack");
3306 }
3307
3308 if (IsNested)
3309 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3310 else
3311 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3312
3313 allocMBB->addSuccessor(&PrologueMBB);
3314
3315 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3316 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3317
3318#ifdef EXPENSIVE_CHECKS
3319 MF.verify();
3320#endif
3321}
3322
3323/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3324/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3325/// to fields it needs, through a named metadata node "hipe.literals" containing
3326/// name-value pairs.
3327static unsigned getHiPELiteral(
3328 NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
3329 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3330 MDNode *Node = HiPELiteralsMD->getOperand(i);
3331 if (Node->getNumOperands() != 2) continue;
3332 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3333 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3334 if (!NodeName || !NodeVal) continue;
3335 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3336 if (ValConst && NodeName->getString() == LiteralName) {
3337 return ValConst->getZExtValue();
3338 }
3339 }
3340
3341 report_fatal_error("HiPE literal " + LiteralName
3342 + " required but not provided");
3343}
3344
3345// Return true if there are no non-ehpad successors to MBB and there are no
3346// non-meta instructions between MBBI and MBB.end().
3349 return llvm::all_of(
3350 MBB.successors(),
3351 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3352 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3353 return MI.isMetaInstruction();
3354 });
3355}
3356
3357/// Erlang programs may need a special prologue to handle the stack size they
3358/// might need at runtime. That is because Erlang/OTP does not implement a C
3359/// stack but uses a custom implementation of hybrid stack/heap architecture.
3360/// (for more information see Eric Stenman's Ph.D. thesis:
3361/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3362///
3363/// CheckStack:
3364/// temp0 = sp - MaxStack
3365/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3366/// OldStart:
3367/// ...
3368/// IncStack:
3369/// call inc_stack # doubles the stack space
3370/// temp0 = sp - MaxStack
3371/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3373 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3374 MachineFrameInfo &MFI = MF.getFrameInfo();
3375 DebugLoc DL;
3376
3377 // To support shrink-wrapping we would need to insert the new blocks
3378 // at the right place and update the branches to PrologueMBB.
3379 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3380
3381 // HiPE-specific values
3382 NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
3383 ->getNamedMetadata("hipe.literals");
3384 if (!HiPELiteralsMD)
3386 "Can't generate HiPE prologue without runtime parameters");
3387 const unsigned HipeLeafWords
3388 = getHiPELiteral(HiPELiteralsMD,
3389 Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3390 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3391 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3392 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
3393 MF.getFunction().arg_size() - CCRegisteredArgs : 0;
3394 unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
3395
3397 "HiPE prologue is only supported on Linux operating systems.");
3398
3399 // Compute the largest caller's frame that is needed to fit the callees'
3400 // frames. This 'MaxStack' is computed from:
3401 //
3402 // a) the fixed frame size, which is the space needed for all spilled temps,
3403 // b) outgoing on-stack parameter areas, and
3404 // c) the minimum stack space this function needs to make available for the
3405 // functions it calls (a tunable ABI property).
3406 if (MFI.hasCalls()) {
3407 unsigned MoreStackForCalls = 0;
3408
3409 for (auto &MBB : MF) {
3410 for (auto &MI : MBB) {
3411 if (!MI.isCall())
3412 continue;
3413
3414 // Get callee operand.
3415 const MachineOperand &MO = MI.getOperand(0);
3416
3417 // Only take account of global function calls (no closures etc.).
3418 if (!MO.isGlobal())
3419 continue;
3420
3421 const Function *F = dyn_cast<Function>(MO.getGlobal());
3422 if (!F)
3423 continue;
3424
3425 // Do not update 'MaxStack' for primitive and built-in functions
3426 // (encoded with names either starting with "erlang."/"bif_" or not
3427 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3428 // "_", such as the BIF "suspend_0") as they are executed on another
3429 // stack.
3430 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3431 F->getName().find_first_of("._") == StringRef::npos)
3432 continue;
3433
3434 unsigned CalleeStkArity =
3435 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
3436 if (HipeLeafWords - 1 > CalleeStkArity)
3437 MoreStackForCalls = std::max(MoreStackForCalls,
3438 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3439 }
3440 }
3441 MaxStack += MoreStackForCalls;
3442 }
3443
3444 // If the stack frame needed is larger than the guaranteed then runtime checks
3445 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3446 if (MaxStack > Guaranteed) {
3447 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3448 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3449
3450 for (const auto &LI : PrologueMBB.liveins()) {
3451 stackCheckMBB->addLiveIn(LI);
3452 incStackMBB->addLiveIn(LI);
3453 }
3454
3455 MF.push_front(incStackMBB);
3456 MF.push_front(stackCheckMBB);
3457
3458 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3459 unsigned LEAop, CMPop, CALLop;
3460 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3461 if (Is64Bit) {
3462 SPReg = X86::RSP;
3463 PReg = X86::RBP;
3464 LEAop = X86::LEA64r;
3465 CMPop = X86::CMP64rm;
3466 CALLop = X86::CALL64pcrel32;
3467 } else {
3468 SPReg = X86::ESP;
3469 PReg = X86::EBP;
3470 LEAop = X86::LEA32r;
3471 CMPop = X86::CMP32rm;
3472 CALLop = X86::CALLpcrel32;
3473 }
3474
3475 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3476 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3477 "HiPE prologue scratch register is live-in");
3478
3479 // Create new MBB for StackCheck:
3480 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
3481 SPReg, false, -MaxStack);
3482 // SPLimitOffset is in a fixed heap location (pointed by BP).
3483 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
3484 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3485 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
3486
3487 // Create new MBB for IncStack:
3488 BuildMI(incStackMBB, DL, TII.get(CALLop)).
3489 addExternalSymbol("inc_stack_0");
3490 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
3491 SPReg, false, -MaxStack);
3492 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
3493 .addReg(ScratchReg), PReg, false, SPLimitOffset);
3494 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
3495
3496 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3497 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3498 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3499 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3500 }
3501#ifdef EXPENSIVE_CHECKS
3502 MF.verify();
3503#endif
3504}
3505
3506bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3508 const DebugLoc &DL,
3509 int Offset) const {
3510 if (Offset <= 0)
3511 return false;
3512
3513 if (Offset % SlotSize)
3514 return false;
3515
3516 int NumPops = Offset / SlotSize;
3517 // This is only worth it if we have at most 2 pops.
3518 if (NumPops != 1 && NumPops != 2)
3519 return false;
3520
3521 // Handle only the trivial case where the adjustment directly follows
3522 // a call. This is the most common one, anyway.
3523 if (MBBI == MBB.begin())
3524 return false;
3525 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3526 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3527 return false;
3528
3529 unsigned Regs[2];
3530 unsigned FoundRegs = 0;
3531
3533 const MachineOperand &RegMask = Prev->getOperand(1);
3534
3535 auto &RegClass =
3536 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3537 // Try to find up to NumPops free registers.
3538 for (auto Candidate : RegClass) {
3539 // Poor man's liveness:
3540 // Since we're immediately after a call, any register that is clobbered
3541 // by the call and not defined by it can be considered dead.
3542 if (!RegMask.clobbersPhysReg(Candidate))
3543 continue;
3544
3545 // Don't clobber reserved registers
3546 if (MRI.isReserved(Candidate))
3547 continue;
3548
3549 bool IsDef = false;
3550 for (const MachineOperand &MO : Prev->implicit_operands()) {
3551 if (MO.isReg() && MO.isDef() &&
3552 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3553 IsDef = true;
3554 break;
3555 }
3556 }
3557
3558 if (IsDef)
3559 continue;
3560
3561 Regs[FoundRegs++] = Candidate;
3562 if (FoundRegs == (unsigned)NumPops)
3563 break;
3564 }
3565
3566 if (FoundRegs == 0)
3567 return false;
3568
3569 // If we found only one free register, but need two, reuse the same one twice.
3570 while (FoundRegs < (unsigned)NumPops)
3571 Regs[FoundRegs++] = Regs[0];
3572
3573 for (int i = 0; i < NumPops; ++i)
3574 BuildMI(MBB, MBBI, DL,
3575 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
3576
3577 return true;
3578}
3579
3583 bool reserveCallFrame = hasReservedCallFrame(MF);
3584 unsigned Opcode = I->getOpcode();
3585 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3586 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3587 uint64_t Amount = TII.getFrameSize(*I);
3588 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3589 I = MBB.erase(I);
3590 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3591
3592 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3593 // typically because the function is marked noreturn (abort, throw,
3594 // assert_fail, etc).
3595 if (isDestroy && blockEndIsUnreachable(MBB, I))
3596 return I;
3597
3598 if (!reserveCallFrame) {
3599 // If the stack pointer can be changed after prologue, turn the
3600 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3601 // adjcallstackdown instruction into 'add ESP, <amt>'
3602
3603 // We need to keep the stack aligned properly. To do this, we round the
3604 // amount of space needed for the outgoing arguments up to the next
3605 // alignment boundary.
3606 Amount = alignTo(Amount, getStackAlign());
3607
3608 const Function &F = MF.getFunction();
3609 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
3610 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3611
3612 // If we have any exception handlers in this function, and we adjust
3613 // the SP before calls, we may need to indicate this to the unwinder
3614 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3615 // Amount == 0, because the preceding function may have set a non-0
3616 // GNU_ARGS_SIZE.
3617 // TODO: We don't need to reset this between subsequent functions,
3618 // if it didn't change.
3619 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3620
3621 if (HasDwarfEHHandlers && !isDestroy &&
3623 BuildCFI(MBB, InsertPos, DL,
3624 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3625
3626 if (Amount == 0)
3627 return I;
3628
3629 // Factor out the amount that gets handled inside the sequence
3630 // (Pushes of argument for frame setup, callee pops for frame destroy)
3631 Amount -= InternalAmt;
3632
3633 // TODO: This is needed only if we require precise CFA.
3634 // If this is a callee-pop calling convention, emit a CFA adjust for
3635 // the amount the callee popped.
3636 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
3637 BuildCFI(MBB, InsertPos, DL,
3638 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
3639
3640 // Add Amount to SP to destroy a frame, or subtract to setup.
3641 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
3642
3643 if (StackAdjustment) {
3644 // Merge with any previous or following adjustment instruction. Note: the
3645 // instructions merged with here do not have CFI, so their stack
3646 // adjustments do not feed into CfaAdjustment.
3647 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
3648 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
3649
3650 if (StackAdjustment) {
3651 if (!(F.hasMinSize() &&
3652 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
3653 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
3654 /*InEpilogue=*/false);
3655 }
3656 }
3657
3658 if (DwarfCFI && !hasFP(MF)) {
3659 // If we don't have FP, but need to generate unwind information,
3660 // we need to set the correct CFA offset after the stack adjustment.
3661 // How much we adjust the CFA offset depends on whether we're emitting
3662 // CFI only for EH purposes or for debugging. EH only requires the CFA
3663 // offset to be correct at each call site, while for debugging we want
3664 // it to be more precise.
3665
3666 int64_t CfaAdjustment = -StackAdjustment;
3667 // TODO: When not using precise CFA, we also need to adjust for the
3668 // InternalAmt here.
3669 if (CfaAdjustment) {
3670 BuildCFI(MBB, InsertPos, DL,
3672 CfaAdjustment));
3673 }
3674 }
3675
3676 return I;
3677 }
3678
3679 if (InternalAmt) {
3682 while (CI != B && !std::prev(CI)->isCall())
3683 --CI;
3684 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
3685 }
3686
3687 return I;
3688}
3689
3691 assert(MBB.getParent() && "Block is not attached to a function!");
3692 const MachineFunction &MF = *MBB.getParent();
3693 if (!MBB.isLiveIn(X86::EFLAGS))
3694 return true;
3695
3696 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3697 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3699 const X86TargetLowering &TLI = *STI.getTargetLowering();
3700 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
3701 return false;
3702
3704 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
3705}
3706
3708 assert(MBB.getParent() && "Block is not attached to a function!");
3709
3710 // Win64 has strict requirements in terms of epilogue and we are
3711 // not taking a chance at messing with them.
3712 // I.e., unless this block is already an exit block, we can't use
3713 // it as an epilogue.
3714 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
3715 return false;
3716
3717 // Swift async context epilogue has a BTR instruction that clobbers parts of
3718 // EFLAGS.
3719 const MachineFunction &MF = *MBB.getParent();
3722
3724 return true;
3725
3726 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3727 // clobbers the EFLAGS. Check that we do not need to preserve it,
3728 // otherwise, conservatively assume this is not
3729 // safe to insert the epilogue here.
3731}
3732
3734 // If we may need to emit frameless compact unwind information, give
3735 // up as this is currently broken: PR25614.
3736 bool CompactUnwind =
3738 nullptr;
3739 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
3740 !CompactUnwind) &&
3741 // The lowering of segmented stack and HiPE only support entry
3742 // blocks as prologue blocks: PR26107. This limitation may be
3743 // lifted if we fix:
3744 // - adjustForSegmentedStacks
3745 // - adjustForHiPEPrologue
3747 !MF.shouldSplitStack();
3748}
3749
3752 const DebugLoc &DL, bool RestoreSP) const {
3753 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3754 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
3755 assert(STI.is32Bit() && !Uses64BitFramePtr &&
3756 "restoring EBP/ESI on non-32-bit target");
3757
3758 MachineFunction &MF = *MBB.getParent();
3760 Register BasePtr = TRI->getBaseRegister();
3763 MachineFrameInfo &MFI = MF.getFrameInfo();
3764
3765 // FIXME: Don't set FrameSetup flag in catchret case.
3766
3767 int FI = FuncInfo.EHRegNodeFrameIndex;
3768 int EHRegSize = MFI.getObjectSize(FI);
3769
3770 if (RestoreSP) {
3771 // MOV32rm -EHRegSize(%ebp), %esp
3772 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
3773 X86::EBP, true, -EHRegSize)
3775 }
3776
3777 Register UsedReg;
3778 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
3779 int EndOffset = -EHRegOffset - EHRegSize;
3780 FuncInfo.EHRegNodeEndOffset = EndOffset;
3781
3782 if (UsedReg == FramePtr) {
3783 // ADD $offset, %ebp
3784 unsigned ADDri = getADDriOpcode(false, EndOffset);
3785 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
3787 .addImm(EndOffset)
3789 ->getOperand(3)
3790 .setIsDead();
3791 assert(EndOffset >= 0 &&
3792 "end of registration object above normal EBP position!");
3793 } else if (UsedReg == BasePtr) {
3794 // LEA offset(%ebp), %esi
3795 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
3796 FramePtr, false, EndOffset)
3798 // MOV32rm SavedEBPOffset(%esi), %ebp
3799 assert(X86FI->getHasSEHFramePtrSave());
3800 int Offset =
3801 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
3802 .getFixed();
3803 assert(UsedReg == BasePtr);
3804 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
3805 UsedReg, true, Offset)
3807 } else {
3808 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3809 }
3810 return MBBI;
3811}
3812
3814 return TRI->getSlotSize();
3815}
3816
3819 return TRI->getDwarfRegNum(StackPtr, true);
3820}
3821
3822namespace {
3823// Struct used by orderFrameObjects to help sort the stack objects.
3824struct X86FrameSortingObject {
3825 bool IsValid = false; // true if we care about this Object.
3826 unsigned ObjectIndex = 0; // Index of Object into MFI list.
3827 unsigned ObjectSize = 0; // Size of Object in bytes.
3828 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3829 unsigned ObjectNumUses = 0; // Object static number of uses.
3830};
3831
3832// The comparison function we use for std::sort to order our local
3833// stack symbols. The current algorithm is to use an estimated
3834// "density". This takes into consideration the size and number of
3835// uses each object has in order to roughly minimize code size.
3836// So, for example, an object of size 16B that is referenced 5 times
3837// will get higher priority than 4 4B objects referenced 1 time each.
3838// It's not perfect and we may be able to squeeze a few more bytes out of
3839// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
3840// fringe end can have special consideration, given their size is less
3841// important, etc.), but the algorithmic complexity grows too much to be
3842// worth the extra gains we get. This gets us pretty close.
3843// The final order leaves us with objects with highest priority going
3844// at the end of our list.
3845struct X86FrameSortingComparator {
3846 inline bool operator()(const X86FrameSortingObject &A,
3847 const X86FrameSortingObject &B) const {
3848 uint64_t DensityAScaled, DensityBScaled;
3849
3850 // For consistency in our comparison, all invalid objects are placed
3851 // at the end. This also allows us to stop walking when we hit the
3852 // first invalid item after it's all sorted.
3853 if (!A.IsValid)
3854 return false;
3855 if (!B.IsValid)
3856 return true;
3857
3858 // The density is calculated by doing :
3859 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
3860 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
3861 // Since this approach may cause inconsistencies in
3862 // the floating point <, >, == comparisons, depending on the floating
3863 // point model with which the compiler was built, we're going
3864 // to scale both sides by multiplying with
3865 // A.ObjectSize * B.ObjectSize. This ends up factoring away
3866 // the division and, with it, the need for any floating point
3867 // arithmetic.
3868 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
3869 static_cast<uint64_t>(B.ObjectSize);
3870 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
3871 static_cast<uint64_t>(A.ObjectSize);
3872
3873 // If the two densities are equal, prioritize highest alignment
3874 // objects. This allows for similar alignment objects
3875 // to be packed together (given the same density).
3876 // There's room for improvement here, also, since we can pack
3877 // similar alignment (different density) objects next to each
3878 // other to save padding. This will also require further
3879 // complexity/iterations, and the overall gain isn't worth it,
3880 // in general. Something to keep in mind, though.
3881 if (DensityAScaled == DensityBScaled)
3882 return A.ObjectAlignment < B.ObjectAlignment;
3883
3884 return DensityAScaled < DensityBScaled;
3885 }
3886};
3887} // namespace
3888
3889// Order the symbols in the local stack.
3890// We want to place the local stack objects in some sort of sensible order.
3891// The heuristic we use is to try and pack them according to static number
3892// of uses and size of object in order to minimize code size.
3894 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3895 const MachineFrameInfo &MFI = MF.getFrameInfo();
3896
3897 // Don't waste time if there's nothing to do.
3898 if (ObjectsToAllocate.empty())
3899 return;
3900
3901 // Create an array of all MFI objects. We won't need all of these
3902 // objects, but we're going to create a full array of them to make
3903 // it easier to index into when we're counting "uses" down below.
3904 // We want to be able to easily/cheaply access an object by simply
3905 // indexing into it, instead of having to search for it every time.
3906 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
3907
3908 // Walk the objects we care about and mark them as such in our working
3909 // struct.
3910 for (auto &Obj : ObjectsToAllocate) {
3911 SortingObjects[Obj].IsValid = true;
3912 SortingObjects[Obj].ObjectIndex = Obj;
3913 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
3914 // Set the size.
3915 int ObjectSize = MFI.getObjectSize(Obj);
3916 if (ObjectSize == 0)
3917 // Variable size. Just use 4.
3918 SortingObjects[Obj].ObjectSize = 4;
3919 else
3920 SortingObjects[Obj].ObjectSize = ObjectSize;
3921 }
3922
3923 // Count the number of uses for each object.
3924 for (auto &MBB : MF) {
3925 for (auto &MI : MBB) {
3926 if (MI.isDebugInstr())
3927 continue;
3928 for (const MachineOperand &MO : MI.operands()) {
3929 // Check to see if it's a local stack symbol.
3930 if (!MO.isFI())
3931 continue;
3932 int Index = MO.getIndex();
3933 // Check to see if it falls within our range, and is tagged
3934 // to require ordering.
3935 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
3936 SortingObjects[Index].IsValid)
3937 SortingObjects[Index].ObjectNumUses++;
3938 }
3939 }
3940 }
3941
3942 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
3943 // info).
3944 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
3945
3946 // Now modify the original list to represent the final order that
3947 // we want. The order will depend on whether we're going to access them
3948 // from the stack pointer or the frame pointer. For SP, the list should
3949 // end up with the END containing objects that we want with smaller offsets.
3950 // For FP, it should be flipped.
3951 int i = 0;
3952 for (auto &Obj : SortingObjects) {
3953 // All invalid items are sorted at the end, so it's safe to stop.
3954 if (!Obj.IsValid)
3955 break;
3956 ObjectsToAllocate[i++] = Obj.ObjectIndex;
3957 }
3958
3959 // Flip it if we're accessing off of the FP.
3960 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
3961 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
3962}
3963
3964
3966 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
3967 unsigned Offset = 16;
3968 // RBP is immediately pushed.
3969 Offset += SlotSize;
3970 // All callee-saved registers are then pushed.
3971 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
3972 // Every funclet allocates enough stack space for the largest outgoing call.
3973 Offset += getWinEHFuncletFrameSize(MF);
3974 return Offset;
3975}
3976
3978 MachineFunction &MF, RegScavenger *RS) const {
3979 // Mark the function as not having WinCFI. We will set it back to true in
3980 // emitPrologue if it gets called and emits CFI.
3981 MF.setHasWinCFI(false);
3982
3983 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
3984 // aligned. The format doesn't support misaligned stack adjustments.
3987
3988 // If this function isn't doing Win64-style C++ EH, we don't need to do
3989 // anything.
3990 if (STI.is64Bit() && MF.hasEHFunclets() &&
3993 adjustFrameForMsvcCxxEh(MF);
3994 }
3995}
3996
3997void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
3998 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
3999 // relative to RSP after the prologue. Find the offset of the last fixed
4000 // object, so that we can allocate a slot immediately following it. If there
4001 // were no fixed objects, use offset -SlotSize, which is immediately after the
4002 // return address. Fixed objects have negative frame indices.
4003 MachineFrameInfo &MFI = MF.getFrameInfo();
4004 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4005 int64_t MinFixedObjOffset = -SlotSize;
4006 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4007 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4008
4009 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4010 for (WinEHHandlerType &H : TBME.HandlerArray) {
4011 int FrameIndex = H.CatchObj.FrameIndex;
4012 if (FrameIndex != INT_MAX) {
4013 // Ensure alignment.
4014 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4015 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4016 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4017 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4018 }
4019 }
4020 }
4021
4022 // Ensure alignment.
4023 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4024 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4025 int UnwindHelpFI =
4026 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4027 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4028
4029 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4030 // other frame setup instructions.
4031 MachineBasicBlock &MBB = MF.front();
4032 auto MBBI = MBB.begin();
4033 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4034 ++MBBI;
4035
4037 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4038 UnwindHelpFI)
4039 .addImm(-2);
4040}
4041
4043 MachineFunction &MF, RegScavenger *RS) const {
4044 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4045
4046 if (STI.is32Bit() && MF.hasEHFunclets())
4048 // We have emitted prolog and epilog. Don't need stack pointer saving
4049 // instruction any more.
4050 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4051 MI->eraseFromParent();
4052 X86FI->setStackPtrSaveMI(nullptr);
4053 }
4054}
4055
4057 MachineFunction &MF) const {
4058 // 32-bit functions have to restore stack pointers when control is transferred
4059 // back to the parent function. These blocks are identified as eh pads that
4060 // are not funclet entries.
4061 bool IsSEH = isAsynchronousEHPersonality(
4063 for (MachineBasicBlock &MBB : MF) {
4064 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4065 if (NeedsRestore)
4067 /*RestoreSP=*/IsSEH);
4068 }
4069}
unsigned const MachineRegisterInfo * MRI
static bool isFuncletReturnInstr(const MachineInstr &MI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const uint64_t kSplitStackAvailable
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Given that RA is a live value
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getADDriOpcode(bool IsLP64, int64_t Imm)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm)
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static const unsigned FramePtr
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BitVector & reset()
Definition: BitVector.h:392
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:808
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1999
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:313
size_t arg_size() const
Definition: Function.h:804
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:624
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:187
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:797
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register)
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition: MCDwarf.h:604
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size)
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition: MCDwarf.h:638
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
OpType getOperation() const
Definition: MCDwarf.h:642
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:554
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:632
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:450
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
Metadata node.
Definition: Metadata.h:943
A single uniqued string.
Definition: Metadata.h:611
StringRef getString() const
Definition: Metadata.cpp:507
Machine Value Type.
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
Definition: MachineBasicBl