LLVM 23.0.0git
X86FrameLowering.cpp
Go to the documentation of this file.
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrInfo.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Module.h"
33#include "llvm/MC/MCAsmInfo.h"
35#include "llvm/MC/MCSymbol.h"
36#include "llvm/Support/LEB128.h"
38#include <cstdlib>
39
40#define DEBUG_TYPE "x86-fl"
41
42STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
43STATISTIC(NumFrameExtraProbe,
44 "Number of extra stack probes generated in prologue");
45STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2");
46
47using namespace llvm;
48
49static const TargetRegisterClass *
51 const TargetRegisterInfo &TRI) {
52 if (X86::VK16RegClass.contains(Reg))
53 return STI.hasBWI() ? &X86::VK64RegClass : &X86::VK16RegClass;
54 return TRI.getMinimalPhysRegClass(Reg);
55}
56
58 MaybeAlign StackAlignOverride)
59 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
60 STI.is64Bit() ? -8 : -4),
61 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
62 // Cache a bunch of frame-related predicates for this subtarget.
63 SlotSize = TRI->getSlotSize();
64 assert(SlotSize == 4 || SlotSize == 8);
65 Is64Bit = STI.is64Bit();
66 IsLP64 = STI.isTarget64BitLP64();
67 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
68 Uses64BitFramePtr = STI.isTarget64BitLP64();
69 StackPtr = TRI->getStackRegister();
70}
71
73 return !MF.getFrameInfo().hasVarSizedObjects() &&
74 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
75 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
76}
77
78/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
79/// call frame pseudos can be simplified. Having a FP, as in the default
80/// implementation, is not sufficient here since we can't always use it.
81/// Use a more nuanced condition.
83 const MachineFunction &MF) const {
84 return hasReservedCallFrame(MF) ||
85 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
86 (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
87 TRI->hasBasePointer(MF);
88}
89
90// needsFrameIndexResolution - Do we need to perform FI resolution for
91// this function. Normally, this is required only when the function
92// has any stack objects. However, FI resolution actually has another job,
93// not apparent from the title - it resolves callframesetup/destroy
94// that were not simplified earlier.
95// So, this is required for x86 functions that have push sequences even
96// when there are no stack objects.
98 const MachineFunction &MF) const {
99 return MF.getFrameInfo().hasStackObjects() ||
100 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
101}
102
103/// hasFPImpl - Return true if the specified function should have a dedicated
104/// frame pointer register. This is true if the function has variable sized
105/// allocas or if frame pointer elimination is disabled.
107 const MachineFrameInfo &MFI = MF.getFrameInfo();
108 return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
109 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
113 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
114 MFI.hasStackMap() || MFI.hasPatchPoint() ||
115 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
116}
117
118static unsigned getSUBriOpcode(bool IsLP64) {
119 return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
120}
121
122static unsigned getADDriOpcode(bool IsLP64) {
123 return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
124}
125
126static unsigned getSUBrrOpcode(bool IsLP64) {
127 return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
128}
129
130static unsigned getADDrrOpcode(bool IsLP64) {
131 return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
132}
133
134static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
135 return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
136}
137
138static unsigned getLEArOpcode(bool IsLP64) {
139 return IsLP64 ? X86::LEA64r : X86::LEA32r;
140}
141
142static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
143 if (Use64BitReg) {
144 if (isUInt<32>(Imm))
145 return X86::MOV32ri64;
146 if (isInt<32>(Imm))
147 return X86::MOV64ri32;
148 return X86::MOV64ri;
149 }
150 return X86::MOV32ri;
151}
152
153// Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
154// value written by the PUSH from the stack. The processor tracks these marked
155// instructions internally and fast-forwards register data between matching PUSH
156// and POP instructions, without going through memory or through the training
157// loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
158// memory-renaming optimization can be used.
159//
160// The PPX hint is purely a performance hint. Instructions with this hint have
161// the same functional semantics as those without. PPX hints set by the
162// compiler that violate the balancing rule may turn off the PPX optimization,
163// but they will not affect program semantics.
164//
165// Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
166// are not considered).
167//
168// PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
169// GPRs at a time to/from the stack.
170static unsigned getPUSHOpcode(const X86Subtarget &ST) {
171 return ST.is64Bit() ? (ST.hasPPX() ? X86::PUSHP64r : X86::PUSH64r)
172 : X86::PUSH32r;
173}
174static unsigned getPOPOpcode(const X86Subtarget &ST) {
175 return ST.is64Bit() ? (ST.hasPPX() ? X86::POPP64r : X86::POP64r)
176 : X86::POP32r;
177}
178static unsigned getPUSH2Opcode(const X86Subtarget &ST) {
179 return ST.hasPPX() ? X86::PUSH2P : X86::PUSH2;
180}
181static unsigned getPOP2Opcode(const X86Subtarget &ST) {
182 return ST.hasPPX() ? X86::POP2P : X86::POP2;
183}
184
186 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
187 MCRegister Reg = RegMask.PhysReg;
188
189 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
190 Reg == X86::AH || Reg == X86::AL)
191 return true;
192 }
193
194 return false;
195}
196
197/// Check if the flags need to be preserved before the terminators.
198/// This would be the case, if the eflags is live-in of the region
199/// composed by the terminators or live-out of that region, without
200/// being defined by a terminator.
201static bool
203 for (const MachineInstr &MI : MBB.terminators()) {
204 bool BreakNext = false;
205 for (const MachineOperand &MO : MI.operands()) {
206 if (!MO.isReg())
207 continue;
208 Register Reg = MO.getReg();
209 if (Reg != X86::EFLAGS)
210 continue;
211
212 // This terminator needs an eflags that is not defined
213 // by a previous another terminator:
214 // EFLAGS is live-in of the region composed by the terminators.
215 if (!MO.isDef())
216 return true;
217 // This terminator defines the eflags, i.e., we don't need to preserve it.
218 // However, we still need to check this specific terminator does not
219 // read a live-in value.
220 BreakNext = true;
221 }
222 // We found a definition of the eflags, no need to preserve them.
223 if (BreakNext)
224 return false;
225 }
226
227 // None of the terminators use or define the eflags.
228 // Check if they are live-out, that would imply we need to preserve them.
229 for (const MachineBasicBlock *Succ : MBB.successors())
230 if (Succ->isLiveIn(X86::EFLAGS))
231 return true;
232
233 return false;
234}
235
236constexpr uint64_t MaxSPChunk = (1ULL << 31) - 1;
237
238/// emitSPUpdate - Emit a series of instructions to increment / decrement the
239/// stack pointer by a constant value.
242 const DebugLoc &DL, int64_t NumBytes,
243 bool InEpilogue) const {
244 bool isSub = NumBytes < 0;
245 uint64_t Offset = isSub ? -NumBytes : NumBytes;
248
250 // We're being asked to adjust a 32-bit stack pointer by 4 GiB or more.
251 // This might be unreachable code, so don't complain now; just trap if
252 // it's reached at runtime.
253 BuildMI(MBB, MBBI, DL, TII.get(X86::TRAP));
254 return;
255 }
256
257 MachineFunction &MF = *MBB.getParent();
259 const X86TargetLowering &TLI = *STI.getTargetLowering();
260 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
261
262 // It's ok to not take into account large chunks when probing, as the
263 // allocation is split in smaller chunks anyway.
264 if (EmitInlineStackProbe && !InEpilogue) {
265
266 // This pseudo-instruction is going to be expanded, potentially using a
267 // loop, by inlineStackProbe().
268 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
269 return;
270 } else if (Offset > MaxSPChunk) {
271 // Rather than emit a long series of instructions for large offsets,
272 // load the offset into a register and do one sub/add
273 unsigned Reg = 0;
274 unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
275
276 if (isSub && !isEAXLiveIn(MBB))
277 Reg = Rax;
278 else
279 Reg = getX86SubSuperRegister(TRI->findDeadCallerSavedReg(MBB, MBBI),
280 Uses64BitFramePtr ? 64 : 32);
281
282 unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
284 if (Reg) {
286 Reg)
287 .addImm(Offset)
288 .setMIFlag(Flag);
289 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
291 .addReg(Reg);
292 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
293 return;
294 } else if (Offset > 8 * MaxSPChunk) {
295 // If we would need more than 8 add or sub instructions (a >16GB stack
296 // frame), it's worth spilling RAX to materialize this immediate.
297 // pushq %rax
298 // movabsq +-$Offset+-SlotSize, %rax
299 // addq %rsp, %rax
300 // xchg %rax, (%rsp)
301 // movq (%rsp), %rsp
302 assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
303 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
305 .setMIFlag(Flag);
306 // Subtract is not commutative, so negate the offset and always use add.
307 // Subtract 8 less and add 8 more to account for the PUSH we just did.
308 if (isSub)
309 Offset = -(Offset - SlotSize);
310 else
313 Rax)
314 .addImm(Offset)
315 .setMIFlag(Flag);
316 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
317 .addReg(Rax)
319 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
320 // Exchange the new SP in RAX with the top of the stack.
322 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
323 StackPtr, false, 0);
324 // Load new SP from the top of the stack into RSP.
325 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
326 StackPtr, false, 0);
327 return;
328 }
329 }
330
331 while (Offset) {
332 if (Offset == SlotSize) {
333 // Use push / pop for slot sized adjustments as a size optimization. We
334 // need to find a dead register when using pop.
335 unsigned Reg = isSub ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
336 : TRI->findDeadCallerSavedReg(MBB, MBBI);
337 if (Reg) {
338 unsigned Opc = isSub ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
339 : (Is64Bit ? X86::POP64r : X86::POP32r);
340 BuildMI(MBB, MBBI, DL, TII.get(Opc))
341 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
342 .setMIFlag(Flag);
343 return;
344 }
345 }
346
347 uint64_t ThisVal = std::min(Offset, MaxSPChunk);
348
349 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
350 .setMIFlag(Flag);
351
352 Offset -= ThisVal;
353 }
354}
355
356MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
358 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
359 assert(Offset != 0 && "zero offset stack adjustment requested");
360
361 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
362 // is tricky.
363 bool UseLEA;
364 if (!InEpilogue) {
365 // Check if inserting the prologue at the beginning
366 // of MBB would require to use LEA operations.
367 // We need to use LEA operations if EFLAGS is live in, because
368 // it means an instruction will read it before it gets defined.
369 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
370 } else {
371 // If we can use LEA for SP but we shouldn't, check that none
372 // of the terminators uses the eflags. Otherwise we will insert
373 // a ADD that will redefine the eflags and break the condition.
374 // Alternatively, we could move the ADD, but this may not be possible
375 // and is an optimization anyway.
376 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
377 if (UseLEA && !STI.useLeaForSP())
379 // If that assert breaks, that means we do not do the right thing
380 // in canUseAsEpilogue.
382 "We shouldn't have allowed this insertion point");
383 }
384
385 MachineInstrBuilder MI;
386 if (UseLEA) {
389 StackPtr),
390 StackPtr, false, Offset);
391 } else {
392 bool IsSub = Offset < 0;
393 uint64_t AbsOffset = IsSub ? -Offset : Offset;
394 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
396 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
398 .addImm(AbsOffset);
399 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
400 }
401 return MI;
402}
403
404template <typename FoundT, typename CalcT>
405int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
407 FoundT FoundStackAdjust,
408 CalcT CalcNewOffset,
409 bool doMergeWithPrevious) const {
410 if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
411 (!doMergeWithPrevious && MBBI == MBB.end()))
412 return CalcNewOffset(0);
413
414 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
415
417 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
418 // instruction, and that there are no DBG_VALUE or other instructions between
419 // ADD/SUB/LEA and its corresponding CFI instruction.
420 /* TODO: Add support for the case where there are multiple CFI instructions
421 below the ADD/SUB/LEA, e.g.:
422 ...
423 add
424 cfi_def_cfa_offset
425 cfi_offset
426 ...
427 */
428 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
429 PI = std::prev(PI);
430
431 int64_t Offset = 0;
432 for (;;) {
433 unsigned Opc = PI->getOpcode();
434
435 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
436 PI->getOperand(0).getReg() == StackPtr) {
437 assert(PI->getOperand(1).getReg() == StackPtr);
438 Offset = PI->getOperand(2).getImm();
439 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
440 PI->getOperand(0).getReg() == StackPtr &&
441 PI->getOperand(1).getReg() == StackPtr &&
442 PI->getOperand(2).getImm() == 1 &&
443 PI->getOperand(3).getReg() == X86::NoRegister &&
444 PI->getOperand(5).getReg() == X86::NoRegister) {
445 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
446 Offset = PI->getOperand(4).getImm();
447 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
448 PI->getOperand(0).getReg() == StackPtr) {
449 assert(PI->getOperand(1).getReg() == StackPtr);
450 Offset = -PI->getOperand(2).getImm();
451 } else
452 return CalcNewOffset(0);
453
454 FoundStackAdjust(PI, Offset);
455 if ((uint64_t)std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk)
456 break;
457
458 if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
459 return CalcNewOffset(0);
460
461 PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
462 }
463
464 PI = MBB.erase(PI);
465 if (PI != MBB.end() && PI->isCFIInstruction()) {
466 auto CIs = MBB.getParent()->getFrameInstructions();
467 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
470 PI = MBB.erase(PI);
471 }
472 if (!doMergeWithPrevious)
474
475 return CalcNewOffset(Offset);
476}
477
480 int64_t AddOffset,
481 bool doMergeWithPrevious) const {
482 return mergeSPUpdates(
483 MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; },
484 doMergeWithPrevious);
485}
486
489 const DebugLoc &DL,
490 const MCCFIInstruction &CFIInst,
491 MachineInstr::MIFlag Flag) const {
492 MachineFunction &MF = *MBB.getParent();
493 unsigned CFIIndex = MF.addFrameInst(CFIInst);
494
496 MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
497
498 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
499 .addCFIIndex(CFIIndex)
500 .setMIFlag(Flag);
501}
502
503/// Emits Dwarf Info specifying offsets of callee saved registers and
504/// frame pointer. This is called only when basic block sections are enabled.
507 MachineFunction &MF = *MBB.getParent();
508 if (!hasFP(MF)) {
510 return;
511 }
512 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
513 const Register FramePtr = TRI->getFrameRegister(MF);
514 const Register MachineFramePtr =
515 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
516 : FramePtr;
517 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
518 // Offset = space for return address + size of the frame pointer itself.
519 int64_t Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
521 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
523}
524
527 const DebugLoc &DL, bool IsPrologue) const {
528 MachineFunction &MF = *MBB.getParent();
529 MachineFrameInfo &MFI = MF.getFrameInfo();
530 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
532
533 // Add callee saved registers to move list.
534 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
535
536 // Calculate offsets.
537 for (const CalleeSavedInfo &I : CSI) {
538 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
539 MCRegister Reg = I.getReg();
540 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
541
542 if (IsPrologue) {
543 if (X86FI->getStackPtrSaveMI()) {
544 // +2*SlotSize because there is return address and ebp at the bottom
545 // of the stack.
546 // | retaddr |
547 // | ebp |
548 // | |<--ebp
549 Offset += 2 * SlotSize;
550 SmallString<64> CfaExpr;
551 CfaExpr.push_back(dwarf::DW_CFA_expression);
552 uint8_t buffer[16];
553 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
554 CfaExpr.push_back(2);
555 Register FramePtr = TRI->getFrameRegister(MF);
556 const Register MachineFramePtr =
557 STI.isTarget64BitILP32()
559 : FramePtr;
560 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
561 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
562 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
564 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
566 } else {
568 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
569 }
570 } else {
572 MCCFIInstruction::createRestore(nullptr, DwarfReg));
573 }
574 }
575 if (auto *MI = X86FI->getStackPtrSaveMI()) {
576 int FI = MI->getOperand(1).getIndex();
577 int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
578 SmallString<64> CfaExpr;
579 Register FramePtr = TRI->getFrameRegister(MF);
580 const Register MachineFramePtr =
581 STI.isTarget64BitILP32()
583 : FramePtr;
584 unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
585 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
586 uint8_t buffer[16];
587 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
588 CfaExpr.push_back(dwarf::DW_OP_deref);
589
590 SmallString<64> DefCfaExpr;
591 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
592 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
593 DefCfaExpr.append(CfaExpr.str());
594 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
596 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
598 }
599}
600
601void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
602 MachineBasicBlock &MBB) const {
603 const MachineFunction &MF = *MBB.getParent();
604
605 // Insertion point.
606 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
607
608 // Fake a debug loc.
609 DebugLoc DL;
610 if (MBBI != MBB.end())
611 DL = MBBI->getDebugLoc();
612
613 // Zero out FP stack if referenced. Do this outside of the loop below so that
614 // it's done only once.
615 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
616 for (MCRegister Reg : RegsToZero.set_bits()) {
617 if (!X86::RFP80RegClass.contains(Reg))
618 continue;
619
620 unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
621 for (unsigned i = 0; i != NumFPRegs; ++i)
622 BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
623
624 for (unsigned i = 0; i != NumFPRegs; ++i)
625 BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
626 break;
627 }
628
629 // For GPRs, we only care to clear out the 32-bit register.
630 BitVector GPRsToZero(TRI->getNumRegs());
631 for (MCRegister Reg : RegsToZero.set_bits())
632 if (TRI->isGeneralPurposeRegister(MF, Reg)) {
633 GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
634 RegsToZero.reset(Reg);
635 }
636
637 // Zero out the GPRs first.
638 for (MCRegister Reg : GPRsToZero.set_bits())
639 TII.buildClearRegister(Reg, MBB, MBBI, DL);
640
641 // Zero out the remaining registers.
642 for (MCRegister Reg : RegsToZero.set_bits())
643 TII.buildClearRegister(Reg, MBB, MBBI, DL);
644}
645
648 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
649 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
651 if (STI.isTargetWindowsCoreCLR()) {
652 if (InProlog) {
653 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
654 .addImm(0 /* no explicit stack size */);
655 } else {
656 emitStackProbeInline(MF, MBB, MBBI, DL, false);
657 }
658 } else {
659 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
660 }
661}
662
664 return STI.isOSWindows() && !STI.isTargetWin64();
665}
666
668 MachineBasicBlock &PrologMBB) const {
669 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
670 return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
671 });
672 if (Where != PrologMBB.end()) {
673 DebugLoc DL = PrologMBB.findDebugLoc(Where);
674 emitStackProbeInline(MF, PrologMBB, Where, DL, true);
675 Where->eraseFromParent();
676 }
677}
678
679void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
682 const DebugLoc &DL,
683 bool InProlog) const {
685 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
686 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
687 else
688 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
689}
690
691void X86FrameLowering::emitStackProbeInlineGeneric(
693 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
694 MachineInstr &AllocWithProbe = *MBBI;
695 uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
696
699 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
700 "different expansion expected for CoreCLR 64 bit");
701
702 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
703 uint64_t ProbeChunk = StackProbeSize * 8;
704
705 uint64_t MaxAlign =
706 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
707
708 // Synthesize a loop or unroll it, depending on the number of iterations.
709 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
710 // between the unaligned rsp and current rsp.
711 if (Offset > ProbeChunk) {
712 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
713 MaxAlign % StackProbeSize);
714 } else {
715 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
716 MaxAlign % StackProbeSize);
717 }
718}
719
720void X86FrameLowering::emitStackProbeInlineGenericBlock(
723 uint64_t AlignOffset) const {
724
725 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
726 const bool HasFP = hasFP(MF);
727 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
728 const X86TargetLowering &TLI = *STI.getTargetLowering();
729 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
730 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
731
732 uint64_t CurrentOffset = 0;
733
734 assert(AlignOffset < StackProbeSize);
735
736 // If the offset is so small it fits within a page, there's nothing to do.
737 if (StackProbeSize < Offset + AlignOffset) {
738
739 uint64_t StackAdjustment = StackProbeSize - AlignOffset;
740 BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
741 .setMIFlag(MachineInstr::FrameSetup);
742 if (!HasFP && NeedsDwarfCFI) {
743 BuildCFI(
744 MBB, MBBI, DL,
745 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
746 }
747
748 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
750 StackPtr, false, 0)
751 .addImm(0)
753 NumFrameExtraProbe++;
754 CurrentOffset = StackProbeSize - AlignOffset;
755 }
756
757 // For the next N - 1 pages, just probe. I tried to take advantage of
758 // natural probes but it implies much more logic and there was very few
759 // interesting natural probes to interleave.
760 while (CurrentOffset + StackProbeSize < Offset) {
761 BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
762 .setMIFlag(MachineInstr::FrameSetup);
763
764 if (!HasFP && NeedsDwarfCFI) {
765 BuildCFI(
766 MBB, MBBI, DL,
767 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
768 }
769 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
771 StackPtr, false, 0)
772 .addImm(0)
774 NumFrameExtraProbe++;
775 CurrentOffset += StackProbeSize;
776 }
777
778 // No need to probe the tail, it is smaller than a Page.
779 uint64_t ChunkSize = Offset - CurrentOffset;
780 if (ChunkSize == SlotSize) {
781 // Use push for slot sized adjustments as a size optimization,
782 // like emitSPUpdate does when not probing.
783 unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
784 unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
785 BuildMI(MBB, MBBI, DL, TII.get(Opc))
788 } else {
789 BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
790 .setMIFlag(MachineInstr::FrameSetup);
791 }
792 // No need to adjust Dwarf CFA offset here, the last position of the stack has
793 // been defined
794}
795
796void X86FrameLowering::emitStackProbeInlineGenericLoop(
799 uint64_t AlignOffset) const {
800 assert(Offset && "null offset");
801
802 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
804 "Inline stack probe loop will clobber live EFLAGS.");
805
806 const bool NeedsDwarfCFI = needsDwarfCFI(MF);
807 const bool HasFP = hasFP(MF);
808 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
809 const X86TargetLowering &TLI = *STI.getTargetLowering();
810 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
811 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
812
813 if (AlignOffset) {
814 if (AlignOffset < StackProbeSize) {
815 // Perform a first smaller allocation followed by a probe.
816 BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
817 .setMIFlag(MachineInstr::FrameSetup);
818
819 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
821 StackPtr, false, 0)
822 .addImm(0)
824 NumFrameExtraProbe++;
825 Offset -= AlignOffset;
826 }
827 }
828
829 // Synthesize a loop
830 NumFrameLoopProbe++;
831 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
832
833 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
834 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
835
837 MF.insert(MBBIter, testMBB);
838 MF.insert(MBBIter, tailMBB);
839
840 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
841 : Is64Bit ? X86::R11D
842 : X86::EAX;
843
844 // save loop bound
845 {
846 const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
847
848 // Can we calculate the loop bound using SUB with a 32-bit immediate?
849 // Note that the immediate gets sign-extended when used with a 64-bit
850 // register, so in that case we only have 31 bits to work with.
851 bool canUseSub =
852 Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
853
854 if (canUseSub) {
855 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
856
857 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
860 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
861 .addReg(FinalStackProbed)
862 .addImm(BoundOffset)
864 } else if (Uses64BitFramePtr) {
865 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
866 .addImm(-BoundOffset)
868 BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
869 .addReg(FinalStackProbed)
872 } else {
873 llvm_unreachable("Offset too large for 32-bit stack pointer");
874 }
875
876 // while in the loop, use loop-invariant reg for CFI,
877 // instead of the stack pointer, which changes during the loop
878 if (!HasFP && NeedsDwarfCFI) {
879 // x32 uses the same DWARF register numbers as x86-64,
880 // so there isn't a register number for r11d, we must use r11 instead
881 const Register DwarfFinalStackProbed =
882 STI.isTarget64BitILP32()
883 ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
884 : FinalStackProbed;
885
888 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
890 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
891 }
892 }
893
894 // allocate a page
895 BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
896 /*InEpilogue=*/false)
897 .setMIFlag(MachineInstr::FrameSetup);
898
899 // touch the page
900 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
902 StackPtr, false, 0)
903 .addImm(0)
905
906 // cmp with stack pointer bound
907 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
909 .addReg(FinalStackProbed)
911
912 // jump
913 BuildMI(testMBB, DL, TII.get(X86::JCC_1))
914 .addMBB(testMBB)
917 testMBB->addSuccessor(testMBB);
918 testMBB->addSuccessor(tailMBB);
919
920 // BB management
921 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
923 MBB.addSuccessor(testMBB);
924
925 // handle tail
926 const uint64_t TailOffset = Offset % StackProbeSize;
927 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
928 if (TailOffset) {
929 BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
930 /*InEpilogue=*/false)
931 .setMIFlag(MachineInstr::FrameSetup);
932 }
933
934 // after the loop, switch back to stack pointer for CFI
935 if (!HasFP && NeedsDwarfCFI) {
936 // x32 uses the same DWARF register numbers as x86-64,
937 // so there isn't a register number for esp, we must use rsp instead
938 const Register DwarfStackPtr =
939 STI.isTarget64BitILP32()
942
943 BuildCFI(*tailMBB, TailMBBIter, DL,
945 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
946 }
947
948 // Update Live In information
949 fullyRecomputeLiveIns({tailMBB, testMBB});
950}
951
952void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
954 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
955 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
956 assert(STI.is64Bit() && "different expansion needed for 32 bit");
957 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
958 const TargetInstrInfo &TII = *STI.getInstrInfo();
959 const BasicBlock *LLVM_BB = MBB.getBasicBlock();
960
961 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
963 "Inline stack probe loop will clobber live EFLAGS.");
964
965 // RAX contains the number of bytes of desired stack adjustment.
966 // The handling here assumes this value has already been updated so as to
967 // maintain stack alignment.
968 //
969 // We need to exit with RSP modified by this amount and execute suitable
970 // page touches to notify the OS that we're growing the stack responsibly.
971 // All stack probing must be done without modifying RSP.
972 //
973 // MBB:
974 // SizeReg = RAX;
975 // ZeroReg = 0
976 // CopyReg = RSP
977 // Flags, TestReg = CopyReg - SizeReg
978 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
979 // LimitReg = gs magic thread env access
980 // if FinalReg >= LimitReg goto ContinueMBB
981 // RoundBB:
982 // RoundReg = page address of FinalReg
983 // LoopMBB:
984 // LoopReg = PHI(LimitReg,ProbeReg)
985 // ProbeReg = LoopReg - PageSize
986 // [ProbeReg] = 0
987 // if (ProbeReg > RoundReg) goto LoopMBB
988 // ContinueMBB:
989 // RSP = RSP - RAX
990 // [rest of original MBB]
991
992 // Set up the new basic blocks
993 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
994 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
995 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
996
997 MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
998 MF.insert(MBBIter, RoundMBB);
999 MF.insert(MBBIter, LoopMBB);
1000 MF.insert(MBBIter, ContinueMBB);
1001
1002 // Split MBB and move the tail portion down to ContinueMBB.
1003 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
1004 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
1005 ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1006
1007 // Some useful constants
1008 const int64_t ThreadEnvironmentStackLimit = 0x10;
1009 const int64_t PageSize = 0x1000;
1010 const int64_t PageMask = ~(PageSize - 1);
1011
1012 // Registers we need. For the normal case we use virtual
1013 // registers. For the prolog expansion we use RAX, RCX and RDX.
1014 MachineRegisterInfo &MRI = MF.getRegInfo();
1015 const TargetRegisterClass *RegClass = &X86::GR64RegClass;
1016 const Register
1017 SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister(RegClass),
1018 ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1019 CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1020 TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1021 FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1022 RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister(RegClass),
1023 LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1024 JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass),
1025 ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister(RegClass);
1026
1027 // SP-relative offsets where we can save RCX and RDX.
1028 int64_t RCXShadowSlot = 0;
1029 int64_t RDXShadowSlot = 0;
1030
1031 // If inlining in the prolog, save RCX and RDX.
1032 if (InProlog) {
1033 // Compute the offsets. We need to account for things already
1034 // pushed onto the stack at this point: return address, frame
1035 // pointer (if used), and callee saves.
1036 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1037 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
1038 const bool HasFP = hasFP(MF);
1039
1040 // Check if we need to spill RCX and/or RDX.
1041 // Here we assume that no earlier prologue instruction changes RCX and/or
1042 // RDX, so checking the block live-ins is enough.
1043 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
1044 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
1045 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
1046 // Assign the initial slot to both registers, then change RDX's slot if both
1047 // need to be spilled.
1048 if (IsRCXLiveIn)
1049 RCXShadowSlot = InitSlot;
1050 if (IsRDXLiveIn)
1051 RDXShadowSlot = InitSlot;
1052 if (IsRDXLiveIn && IsRCXLiveIn)
1053 RDXShadowSlot += 8;
1054 // Emit the saves if needed.
1055 if (IsRCXLiveIn)
1056 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1057 RCXShadowSlot)
1058 .addReg(X86::RCX);
1059 if (IsRDXLiveIn)
1060 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
1061 RDXShadowSlot)
1062 .addReg(X86::RDX);
1063 } else {
1064 // Not in the prolog. Copy RAX to a virtual reg.
1065 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
1066 }
1067
1068 // Add code to MBB to check for overflow and set the new target stack pointer
1069 // to zero if so.
1070 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
1071 .addReg(ZeroReg, RegState::Undef)
1072 .addReg(ZeroReg, RegState::Undef);
1073 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
1074 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
1075 .addReg(CopyReg)
1076 .addReg(SizeReg);
1077 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
1078 .addReg(TestReg)
1079 .addReg(ZeroReg)
1081
1082 // FinalReg now holds final stack pointer value, or zero if
1083 // allocation would overflow. Compare against the current stack
1084 // limit from the thread environment block. Note this limit is the
1085 // lowest touched page on the stack, not the point at which the OS
1086 // will cause an overflow exception, so this is just an optimization
1087 // to avoid unnecessarily touching pages that are below the current
1088 // SP but already committed to the stack by the OS.
1089 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
1090 .addReg(0)
1091 .addImm(1)
1092 .addReg(0)
1093 .addImm(ThreadEnvironmentStackLimit)
1094 .addReg(X86::GS);
1095 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
1096 // Jump if the desired stack pointer is at or above the stack limit.
1097 BuildMI(&MBB, DL, TII.get(X86::JCC_1))
1098 .addMBB(ContinueMBB)
1100
1101 // Add code to roundMBB to round the final stack pointer to a page boundary.
1102 if (InProlog)
1103 RoundMBB->addLiveIn(FinalReg);
1104 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
1105 .addReg(FinalReg)
1106 .addImm(PageMask);
1107 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
1108
1109 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1110 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1111 // and probe until we reach RoundedReg.
1112 if (!InProlog) {
1113 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
1114 .addReg(LimitReg)
1115 .addMBB(RoundMBB)
1116 .addReg(ProbeReg)
1117 .addMBB(LoopMBB);
1118 }
1119
1120 if (InProlog)
1121 LoopMBB->addLiveIn(JoinReg);
1122 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
1123 false, -PageSize);
1124
1125 // Probe by storing a byte onto the stack.
1126 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
1127 .addReg(ProbeReg)
1128 .addImm(1)
1129 .addReg(0)
1130 .addImm(0)
1131 .addReg(0)
1132 .addImm(0);
1133
1134 if (InProlog)
1135 LoopMBB->addLiveIn(RoundedReg);
1136 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
1137 .addReg(RoundedReg)
1138 .addReg(ProbeReg);
1139 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1))
1140 .addMBB(LoopMBB)
1142
1143 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
1144
1145 // If in prolog, restore RDX and RCX.
1146 if (InProlog) {
1147 if (RCXShadowSlot) // It means we spilled RCX in the prologue.
1148 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1149 TII.get(X86::MOV64rm), X86::RCX),
1150 X86::RSP, false, RCXShadowSlot);
1151 if (RDXShadowSlot) // It means we spilled RDX in the prologue.
1152 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
1153 TII.get(X86::MOV64rm), X86::RDX),
1154 X86::RSP, false, RDXShadowSlot);
1155 }
1156
1157 // Now that the probing is done, add code to continueMBB to update
1158 // the stack pointer for real.
1159 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
1160 .addReg(X86::RSP)
1161 .addReg(SizeReg);
1162
1163 // Add the control flow edges we need.
1164 MBB.addSuccessor(ContinueMBB);
1165 MBB.addSuccessor(RoundMBB);
1166 RoundMBB->addSuccessor(LoopMBB);
1167 LoopMBB->addSuccessor(ContinueMBB);
1168 LoopMBB->addSuccessor(LoopMBB);
1169
1170 if (InProlog) {
1171 LivePhysRegs LiveRegs;
1172 computeAndAddLiveIns(LiveRegs, *ContinueMBB);
1173 }
1174
1175 // Mark all the instructions added to the prolog as frame setup.
1176 if (InProlog) {
1177 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
1178 BeforeMBBI->setFlag(MachineInstr::FrameSetup);
1179 }
1180 for (MachineInstr &MI : *RoundMBB) {
1182 }
1183 for (MachineInstr &MI : *LoopMBB) {
1185 }
1186 for (MachineInstr &MI :
1187 llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
1189 }
1190 }
1191}
1192
1193void X86FrameLowering::emitStackProbeCall(
1195 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
1196 std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
1197 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
1198
1199 // FIXME: Add indirect thunk support and remove this.
1200 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
1201 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1202 "code model and indirect thunks not yet implemented.");
1203
1204 assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
1206 "Stack probe calls will clobber live EFLAGS.");
1207
1208 unsigned CallOp;
1209 if (Is64Bit)
1210 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
1211 else
1212 CallOp = X86::CALLpcrel32;
1213
1214 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
1215
1216 MachineInstrBuilder CI;
1217 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
1218
1219 // All current stack probes take AX and SP as input, clobber flags, and
1220 // preserve all registers. x86_64 probes leave RSP unmodified.
1222 // For the large code model, we have to call through a register. Use R11,
1223 // as it is scratch in all supported calling conventions.
1224 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
1226 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
1227 } else {
1228 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
1230 }
1231
1232 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
1233 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
1239
1240 MachineInstr *ModInst = CI;
1241 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1242 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1243 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1244 // themselves. They also does not clobber %rax so we can reuse it when
1245 // adjusting %rsp.
1246 // All other platforms do not specify a particular ABI for the stack probe
1247 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1248 ModInst =
1250 .addReg(SP)
1251 .addReg(AX);
1252 }
1253
1254 // DebugInfo variable locations -- if there's an instruction number for the
1255 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1256 // modifies SP.
1257 if (InstrNum) {
1258 if (STI.isTargetWin64() || !STI.isOSWindows()) {
1259 // Label destination operand of the subtract.
1260 MF.makeDebugValueSubstitution(*InstrNum,
1261 {ModInst->getDebugInstrNum(), 0});
1262 } else {
1263 // Label the call. The operand number is the penultimate operand, zero
1264 // based.
1265 unsigned SPDefOperand = ModInst->getNumOperands() - 2;
1267 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
1268 }
1269 }
1270
1271 if (InProlog) {
1272 // Apply the frame setup flag to all inserted instrs.
1273 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
1274 ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
1275 }
1276}
1277
1278static unsigned calculateSetFPREG(uint64_t SPAdjust) {
1279 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1280 // and might require smaller successive adjustments.
1281 const uint64_t Win64MaxSEHOffset = 128;
1282 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
1283 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1284 return SEHFrameOffset & -16;
1285}
1286
1287// If we're forcing a stack realignment we can't rely on just the frame
1288// info, we need to know the ABI stack alignment as well in case we
1289// have a call out. Otherwise just make sure we have some alignment - we'll
1290// go with the minimum SlotSize.
1291uint64_t
1292X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
1293 const MachineFrameInfo &MFI = MF.getFrameInfo();
1294 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
1295 Align StackAlign = getStackAlign();
1296 bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
1297 if (HasRealign) {
1298 if (MFI.hasCalls())
1299 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
1300 else if (MaxAlign < SlotSize)
1301 MaxAlign = Align(SlotSize);
1302 }
1303
1305 if (HasRealign)
1306 MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
1307 else
1308 MaxAlign = Align(16);
1309 }
1310 return MaxAlign.value();
1311}
1312
1313void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1315 const DebugLoc &DL, Register Reg,
1316 uint64_t MaxAlign) const {
1317 uint64_t Val = -MaxAlign;
1318 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
1319
1320 MachineFunction &MF = *MBB.getParent();
1321 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
1322 const X86TargetLowering &TLI = *STI.getTargetLowering();
1323 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
1324 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
1325
1326 // We want to make sure that (in worst case) less than StackProbeSize bytes
1327 // are not probed after the AND. This assumption is used in
1328 // emitStackProbeInlineGeneric.
1329 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1330 {
1331 NumFrameLoopProbe++;
1332 MachineBasicBlock *entryMBB =
1334 MachineBasicBlock *headMBB =
1336 MachineBasicBlock *bodyMBB =
1338 MachineBasicBlock *footMBB =
1340
1342 MF.insert(MBBIter, entryMBB);
1343 MF.insert(MBBIter, headMBB);
1344 MF.insert(MBBIter, bodyMBB);
1345 MF.insert(MBBIter, footMBB);
1346 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1347 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
1348 : Is64Bit ? X86::R11D
1349 : X86::EAX;
1350
1351 // Setup entry block
1352 {
1353
1354 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
1355 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
1358 MachineInstr *MI =
1359 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
1360 .addReg(FinalStackProbed)
1361 .addImm(Val)
1363
1364 // The EFLAGS implicit def is dead.
1365 MI->getOperand(3).setIsDead();
1366
1367 BuildMI(entryMBB, DL,
1368 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1369 .addReg(FinalStackProbed)
1372 BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
1373 .addMBB(&MBB)
1376 entryMBB->addSuccessor(headMBB);
1377 entryMBB->addSuccessor(&MBB);
1378 }
1379
1380 // Loop entry block
1381
1382 {
1383 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1384 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
1386 .addImm(StackProbeSize)
1388
1389 BuildMI(headMBB, DL,
1390 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1392 .addReg(FinalStackProbed)
1394
1395 // jump to the footer if StackPtr < FinalStackProbed
1396 BuildMI(headMBB, DL, TII.get(X86::JCC_1))
1397 .addMBB(footMBB)
1400
1401 headMBB->addSuccessor(bodyMBB);
1402 headMBB->addSuccessor(footMBB);
1403 }
1404
1405 // setup loop body
1406 {
1407 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
1409 StackPtr, false, 0)
1410 .addImm(0)
1412
1413 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
1414 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
1416 .addImm(StackProbeSize)
1418
1419 // cmp with stack pointer bound
1420 BuildMI(bodyMBB, DL,
1421 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1422 .addReg(FinalStackProbed)
1425
1426 // jump back while FinalStackProbed < StackPtr
1427 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
1428 .addMBB(bodyMBB)
1431 bodyMBB->addSuccessor(bodyMBB);
1432 bodyMBB->addSuccessor(footMBB);
1433 }
1434
1435 // setup loop footer
1436 {
1437 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
1438 .addReg(FinalStackProbed)
1440 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
1442 StackPtr, false, 0)
1443 .addImm(0)
1445 footMBB->addSuccessor(&MBB);
1446 }
1447
1448 fullyRecomputeLiveIns({footMBB, bodyMBB, headMBB, &MBB});
1449 }
1450 } else {
1451 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
1452 .addReg(Reg)
1453 .addImm(Val)
1455
1456 // The EFLAGS implicit def is dead.
1457 MI->getOperand(3).setIsDead();
1458 }
1459}
1460
1462 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1463 // clobbered by any interrupt handler.
1464 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1465 "MF used frame lowering for wrong subtarget");
1466 const Function &Fn = MF.getFunction();
1467 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
1468 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
1469}
1470
1471/// Return true if we need to use the restricted Windows x64 prologue and
1472/// epilogue code patterns that can be described with WinCFI (.seh_*
1473/// directives).
1474bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
1475 return MF.getTarget().getMCAsmInfo().usesWindowsCFI();
1476}
1477
1478bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
1479 return !isWin64Prologue(MF) && MF.needsFrameMoves();
1480}
1481
1482/// Return true if an opcode is part of the REP group of instructions
1483static bool isOpcodeRep(unsigned Opcode) {
1484 switch (Opcode) {
1485 case X86::REPNE_PREFIX:
1486 case X86::REP_MOVSB_32:
1487 case X86::REP_MOVSB_64:
1488 case X86::REP_MOVSD_32:
1489 case X86::REP_MOVSD_64:
1490 case X86::REP_MOVSQ_32:
1491 case X86::REP_MOVSQ_64:
1492 case X86::REP_MOVSW_32:
1493 case X86::REP_MOVSW_64:
1494 case X86::REP_PREFIX:
1495 case X86::REP_STOSB_32:
1496 case X86::REP_STOSB_64:
1497 case X86::REP_STOSD_32:
1498 case X86::REP_STOSD_64:
1499 case X86::REP_STOSQ_32:
1500 case X86::REP_STOSQ_64:
1501 case X86::REP_STOSW_32:
1502 case X86::REP_STOSW_64:
1503 return true;
1504 default:
1505 break;
1506 }
1507 return false;
1508}
1509
1510/// emitPrologue - Push callee-saved registers onto the stack, which
1511/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1512/// space for local variables. Also emit labels used by the exception handler to
1513/// generate the exception handling frames.
1514
1515/*
1516 Here's a gist of what gets emitted:
1517
1518 ; Establish frame pointer, if needed
1519 [if needs FP]
1520 push %rbp
1521 .cfi_def_cfa_offset 16
1522 .cfi_offset %rbp, -16
1523 .seh_pushreg %rpb
1524 mov %rsp, %rbp
1525 .cfi_def_cfa_register %rbp
1526
1527 ; Spill general-purpose registers
1528 [for all callee-saved GPRs]
1529 pushq %<reg>
1530 [if not needs FP]
1531 .cfi_def_cfa_offset (offset from RETADDR)
1532 .seh_pushreg %<reg>
1533
1534 ; If the required stack alignment > default stack alignment
1535 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1536 ; of unknown size in the stack frame.
1537 [if stack needs re-alignment]
1538 and $MASK, %rsp
1539
1540 ; Allocate space for locals
1541 [if target is Windows and allocated space > 4096 bytes]
1542 ; Windows needs special care for allocations larger
1543 ; than one page.
1544 mov $NNN, %rax
1545 call ___chkstk_ms/___chkstk
1546 sub %rax, %rsp
1547 [else]
1548 sub $NNN, %rsp
1549
1550 [if needs FP]
1551 .seh_stackalloc (size of XMM spill slots)
1552 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1553 [else]
1554 .seh_stackalloc NNN
1555
1556 ; Spill XMMs
1557 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1558 ; they may get spilled on any platform, if the current function
1559 ; calls @llvm.eh.unwind.init
1560 [if needs FP]
1561 [for all callee-saved XMM registers]
1562 movaps %<xmm reg>, -MMM(%rbp)
1563 [for all callee-saved XMM registers]
1564 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1565 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1566 [else]
1567 [for all callee-saved XMM registers]
1568 movaps %<xmm reg>, KKK(%rsp)
1569 [for all callee-saved XMM registers]
1570 .seh_savexmm %<xmm reg>, KKK
1571
1572 .seh_endprologue
1573
1574 [if needs base pointer]
1575 mov %rsp, %rbx
1576 [if needs to restore base pointer]
1577 mov %rsp, -MMM(%rbp)
1578
1579 ; Emit CFI info
1580 [if needs FP]
1581 [for all callee-saved registers]
1582 .cfi_offset %<reg>, (offset from %rbp)
1583 [else]
1584 .cfi_def_cfa_offset (offset from RETADDR)
1585 [for all callee-saved registers]
1586 .cfi_offset %<reg>, (offset from %rsp)
1587
1588 Notes:
1589 - .seh directives are emitted only for Windows 64 ABI
1590 - .cv_fpo directives are emitted on win32 when emitting CodeView
1591 - .cfi directives are emitted for all other ABIs
1592 - for 32-bit code, substitute %e?? registers for %r??
1593*/
1594
1596 MachineBasicBlock &MBB) const {
1597 assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
1598 "MF used frame lowering for wrong subtarget");
1600 MachineFrameInfo &MFI = MF.getFrameInfo();
1601 const Function &Fn = MF.getFunction();
1603 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
1604 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
1605 bool IsFunclet = MBB.isEHFuncletEntry();
1607 if (Fn.hasPersonalityFn())
1608 Personality = classifyEHPersonality(Fn.getPersonalityFn());
1609 bool FnHasClrFunclet =
1610 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
1611 bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
1612 bool HasFP = hasFP(MF);
1613 bool IsWin64Prologue = isWin64Prologue(MF);
1614 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
1615 // FIXME: Emit FPO data for EH funclets.
1616 bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() &&
1618 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
1619 bool NeedsDwarfCFI = needsDwarfCFI(MF);
1620 bool IsWin64UnwindV3 =
1621 NeedsWin64CFI &&
1623 Register FramePtr = TRI->getFrameRegister(MF);
1624 const Register MachineFramePtr =
1625 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
1626 : FramePtr;
1627 Register BasePtr = TRI->getBaseRegister();
1628 bool HasWinCFI = false;
1629
1630 // Helpers to emit Windows x64 unwind SEH pseudos with the correct placement.
1631 // V1/V2: pseudo goes after the real instruction.
1632 // V3: pseudo goes before the real instruction.
1633 // Usage:
1634 // EmitSEHBefore([&]{ BuildMI(...SEH_PushReg...); });
1635 // BuildMI(... real instruction ...);
1636 // EmitSEHAfter([&]{ BuildMI(...SEH_PushReg...); });
1637 auto EmitSEHBefore = [&](auto EmitFn) {
1638 if (NeedsWinCFI && IsWin64UnwindV3) {
1639 HasWinCFI = true;
1640 EmitFn();
1641 }
1642 };
1643 auto EmitSEHAfter = [&](auto EmitFn) {
1644 if (NeedsWinCFI && !IsWin64UnwindV3) {
1645 HasWinCFI = true;
1646 EmitFn();
1647 }
1648 };
1649
1650 // Debug location must be unknown since the first debug location is used
1651 // to determine the end of the prologue.
1652 DebugLoc DL;
1653 Register ArgBaseReg;
1654
1655 // Emit extra prolog for argument stack slot reference.
1656 if (auto *MI = X86FI->getStackPtrSaveMI()) {
1657 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1658 // Creat extra prolog for stack realignment.
1659 ArgBaseReg = MI->getOperand(0).getReg();
1660 // leal 4(%esp), %basereg
1661 // .cfi_def_cfa %basereg, 0
1662 // andl $-128, %esp
1663 // pushl -4(%basereg)
1664 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
1665 ArgBaseReg)
1667 .addImm(1)
1668 .addUse(X86::NoRegister)
1670 .addUse(X86::NoRegister)
1672 if (NeedsDwarfCFI) {
1673 // .cfi_def_cfa %basereg, 0
1674 unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
1675 BuildCFI(MBB, MBBI, DL,
1676 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
1678 }
1679 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1680 int64_t Offset = -(int64_t)SlotSize;
1681 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm))
1682 .addReg(ArgBaseReg)
1683 .addImm(1)
1684 .addReg(X86::NoRegister)
1685 .addImm(Offset)
1686 .addReg(X86::NoRegister)
1688 }
1689
1690 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1691 // tail call.
1692 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
1693 if (TailCallArgReserveSize && IsWin64Prologue)
1694 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1695
1696 const bool EmitStackProbeCall =
1697 STI.getTargetLowering()->hasStackProbeSymbol(MF);
1698 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1699
1700 if (HasFP && X86FI->hasSwiftAsyncContext()) {
1703 if (STI.swiftAsyncContextIsDynamicallySet()) {
1704 // The special symbol below is absolute and has a *value* suitable to be
1705 // combined with the frame pointer directly.
1706 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
1707 .addUse(MachineFramePtr)
1708 .addUse(X86::RIP)
1709 .addImm(1)
1710 .addUse(X86::NoRegister)
1711 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1713 .addUse(X86::NoRegister);
1714 break;
1715 }
1716 [[fallthrough]];
1717
1719 assert(
1720 !IsWin64Prologue &&
1721 "win64 prologue does not set the bit 60 in the saved frame pointer");
1722 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
1723 .addUse(MachineFramePtr)
1724 .addImm(60)
1726 break;
1727
1729 break;
1730 }
1731 }
1732
1733 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1734 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1735 // stack alignment.
1737 Fn.arg_size() == 2) {
1738 StackSize += 8;
1739 MFI.setStackSize(StackSize);
1740
1741 // Update the stack pointer by pushing a register. This is the instruction
1742 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1743 // Hard-coding the update to a push avoids emitting a second
1744 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1745 // probing isn't needed anyways for an 8-byte update.
1746 // Pushing a register leaves us in a similar situation to a regular
1747 // function call where we know that the address at (rsp-8) is writeable.
1748 // That way we avoid any off-by-ones with stack probing for additional
1749 // stack pointer updates later on.
1750 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1751 .addReg(X86::RAX, RegState::Undef)
1753 }
1754
1755 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1756 // function, and use up to 128 bytes of stack space, don't have a frame
1757 // pointer, calls, or dynamic alloca then we do not need to adjust the
1758 // stack pointer (we fit in the Red Zone). We also check that we don't
1759 // push and pop from the stack.
1760 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
1761 !MFI.hasVarSizedObjects() && // No dynamic alloca.
1762 !MFI.adjustsStack() && // No calls.
1763 !EmitStackProbeCall && // No stack probes.
1764 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1765 !MF.shouldSplitStack()) { // Regular stack
1766 uint64_t MinSize =
1768 if (HasFP)
1769 MinSize += SlotSize;
1770 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1771 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1772 MFI.setStackSize(StackSize);
1773 }
1774
1775 // Insert stack pointer adjustment for later moving of return addr. Only
1776 // applies to tail call optimized functions where the callee argument stack
1777 // size is bigger than the callers.
1778 if (TailCallArgReserveSize != 0) {
1779 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
1780 /*InEpilogue=*/false)
1781 .setMIFlag(MachineInstr::FrameSetup);
1782 }
1783
1784 // Mapping for machine moves:
1785 //
1786 // DST: VirtualFP AND
1787 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1788 // ELSE => DW_CFA_def_cfa
1789 //
1790 // SRC: VirtualFP AND
1791 // DST: Register => DW_CFA_def_cfa_register
1792 //
1793 // ELSE
1794 // OFFSET < 0 => DW_CFA_offset_extended_sf
1795 // REG < 64 => DW_CFA_offset + Reg
1796 // ELSE => DW_CFA_offset_extended
1797
1798 uint64_t NumBytes = 0;
1799 int stackGrowth = -SlotSize;
1800
1801 // Find the funclet establisher parameter
1802 MCRegister Establisher;
1803 if (IsClrFunclet)
1804 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1805 else if (IsFunclet)
1806 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1807
1808 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1809 // Immediately spill establisher into the home slot.
1810 // The runtime cares about this.
1811 // MOV64mr %rdx, 16(%rsp)
1812 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1813 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1814 .addReg(Establisher)
1816 MBB.addLiveIn(Establisher);
1817 }
1818
1819 if (HasFP) {
1820 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1821
1822 // Calculate required stack adjustment.
1823 uint64_t FrameSize = StackSize - SlotSize;
1824 NumBytes =
1825 FrameSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1826
1827 // Callee-saved registers are pushed on stack before the stack is realigned.
1828 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
1829 NumBytes = alignTo(NumBytes, MaxAlign);
1830
1831 // Save EBP/RBP into the appropriate stack slot.
1832 auto EmitSEHPushFramePtr = [&]() {
1833 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1836 };
1837 EmitSEHBefore(EmitSEHPushFramePtr);
1838 BuildMI(MBB, MBBI, DL,
1840 .addReg(MachineFramePtr, RegState::Kill)
1842 EmitSEHAfter(EmitSEHPushFramePtr);
1843
1844 if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
1845 // Mark the place where EBP/RBP was saved.
1846 // Define the current CFA rule to use the provided offset.
1847 assert(StackSize);
1848 BuildCFI(MBB, MBBI, DL,
1850 nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
1852
1853 // Change the rule for the FramePtr to be an "offset" rule.
1854 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1855 BuildCFI(MBB, MBBI, DL,
1856 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
1857 2 * stackGrowth -
1858 (int)TailCallArgReserveSize),
1860 }
1861
1862 if (!IsFunclet) {
1863 if (X86FI->hasSwiftAsyncContext()) {
1864 assert(!IsWin64Prologue &&
1865 "win64 prologue does not store async context right below rbp");
1866 const auto &Attrs = MF.getFunction().getAttributes();
1867
1868 // Before we update the live frame pointer we have to ensure there's a
1869 // valid (or null) asynchronous context in its slot just before FP in
1870 // the frame record, so store it now.
1871 auto EmitSEHPushR14 = [&]() {
1872 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1873 .addImm(X86::R14)
1875 };
1876 EmitSEHBefore(EmitSEHPushR14);
1877 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1878 // We have an initial context in r14, store it just before the frame
1879 // pointer.
1880 MBB.addLiveIn(X86::R14);
1881 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1882 .addReg(X86::R14)
1884 } else {
1885 // No initial context, store null so that there's no pointer that
1886 // could be misused.
1887 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
1888 .addImm(0)
1890 }
1891 EmitSEHAfter(EmitSEHPushR14);
1892
1893 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1894 .addUse(X86::RSP)
1895 .addImm(1)
1896 .addUse(X86::NoRegister)
1897 .addImm(8)
1898 .addUse(X86::NoRegister)
1900 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
1901 .addUse(X86::RSP)
1902 .addImm(8)
1904 }
1905
1906 if (!IsWin64Prologue && !IsFunclet) {
1907 // Update EBP with the new base value.
1908 if (!X86FI->hasSwiftAsyncContext())
1909 BuildMI(MBB, MBBI, DL,
1910 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1911 FramePtr)
1914
1915 if (NeedsDwarfCFI) {
1916 if (ArgBaseReg.isValid()) {
1917 SmallString<64> CfaExpr;
1918 CfaExpr.push_back(dwarf::DW_CFA_expression);
1919 uint8_t buffer[16];
1920 unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
1921 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
1922 CfaExpr.push_back(2);
1923 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
1924 CfaExpr.push_back(0);
1925 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1926 BuildCFI(MBB, MBBI, DL,
1927 MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
1929 } else {
1930 // Mark effective beginning of when frame pointer becomes valid.
1931 // Define the current CFA to use the EBP/RBP register.
1932 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1933 BuildCFI(
1934 MBB, MBBI, DL,
1935 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
1937 }
1938 }
1939
1940 if (NeedsWinFPO) {
1941 // .cv_fpo_setframe $FramePtr
1942 // NeedsWinFPO is Win32 only, so we're never using Unwind v3, hence it
1943 // is always inserted afterwards.
1944 assert(!IsWin64UnwindV3);
1945 HasWinCFI = true;
1946 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1948 .addImm(0)
1950 }
1951 }
1952 }
1953 } else {
1954 assert(!IsFunclet && "funclets without FPs not yet implemented");
1955 NumBytes =
1956 StackSize - (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
1957 }
1958
1959 // Update the offset adjustment, which is mainly used by codeview to translate
1960 // from ESP to VFRAME relative local variable offsets.
1961 if (!IsFunclet) {
1962 if (HasFP && TRI->hasStackRealignment(MF))
1963 MFI.setOffsetAdjustment(-NumBytes);
1964 else
1965 MFI.setOffsetAdjustment(-StackSize);
1966 }
1967
1968 // For EH funclets, only allocate enough space for outgoing calls. Save the
1969 // NumBytes value that we would've used for the parent frame.
1970 unsigned ParentFrameNumBytes = NumBytes;
1971 if (IsFunclet)
1972 NumBytes = getWinEHFuncletFrameSize(MF);
1973
1974 // Skip the callee-saved push instructions.
1975 bool PushedRegs = false;
1976 int StackOffset = 2 * stackGrowth;
1978 auto IsCSPush = [&](const MachineBasicBlock::iterator &MBBI) {
1979 if (MBBI == MBB.end() || !MBBI->getFlag(MachineInstr::FrameSetup))
1980 return false;
1981 unsigned Opc = MBBI->getOpcode();
1982 return Opc == X86::PUSH32r || Opc == X86::PUSH64r || Opc == X86::PUSHP64r ||
1983 Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1984 };
1985
1986 while (IsCSPush(MBBI)) {
1987 PushedRegs = true;
1988 Register Reg = MBBI->getOperand(0).getReg();
1989 LastCSPush = MBBI;
1990 unsigned Opc = LastCSPush->getOpcode();
1991 bool IsPush2 = Opc == X86::PUSH2 || Opc == X86::PUSH2P;
1992
1993 // V3: emit SEH pseudo before the real instruction.
1994 EmitSEHBefore([&]() {
1995 if (IsPush2) {
1996 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Push2Regs))
1997 .addImm(Reg)
1998 .addImm(LastCSPush->getOperand(1).getReg())
2000 } else {
2001 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2002 .addImm(Reg)
2004 }
2005 });
2006 ++MBBI;
2007
2008 if (!HasFP && NeedsDwarfCFI) {
2009 // Mark callee-saved push instruction.
2010 // Define the current CFA rule to use the provided offset.
2011 assert(StackSize);
2012 // Compared to push, push2 introduces more stack offset (one more
2013 // register).
2014 if (IsPush2)
2015 StackOffset += stackGrowth;
2016 BuildCFI(MBB, MBBI, DL,
2019 StackOffset += stackGrowth;
2020 }
2021
2022 // V1/V2: emit SEH pseudo after the real instruction.
2023 EmitSEHAfter([&]() {
2024 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2025 .addImm(Reg)
2027 if (IsPush2)
2028 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2029 .addImm(LastCSPush->getOperand(1).getReg())
2031 });
2032 }
2033
2034 // Realign stack after we pushed callee-saved registers (so that we'll be
2035 // able to calculate their offsets from the frame pointer).
2036 // Don't do this for Win64, it needs to realign the stack after the prologue.
2037 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
2038 !ArgBaseReg.isValid()) {
2039 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2040 auto EmitSEHStackAlign = [&]() {
2041 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
2042 .addImm(MaxAlign)
2044 };
2045 EmitSEHBefore(EmitSEHStackAlign);
2046 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
2047 EmitSEHAfter(EmitSEHStackAlign);
2048 }
2049
2050 // If there is an SUB32ri of ESP immediately before this instruction, merge
2051 // the two. This can be the case when tail call elimination is enabled and
2052 // the callee has more arguments than the caller.
2053 NumBytes = mergeSPUpdates(
2054 MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; },
2055 true);
2056
2057 // Adjust stack pointer: ESP -= numbytes.
2058
2059 // Windows and cygwin/mingw require a prologue helper routine when allocating
2060 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
2061 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
2062 // stack and adjust the stack pointer in one go. The 64-bit version of
2063 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
2064 // responsible for adjusting the stack pointer. Touching the stack at 4K
2065 // increments is necessary to ensure that the guard pages used by the OS
2066 // virtual memory manager are allocated in correct sequence.
2067 uint64_t AlignedNumBytes = NumBytes;
2068 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
2069 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
2070
2071 auto EmitSEHStackAlloc = [&]() {
2072 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2073 .addImm(NumBytes)
2075 };
2076 if (NumBytes)
2077 EmitSEHBefore(EmitSEHStackAlloc);
2078
2079 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
2080 assert(!X86FI->getUsesRedZone() &&
2081 "The Red Zone is not accounted for in stack probes");
2082
2083 // Check whether EAX is livein for this block.
2084 bool isEAXAlive = isEAXLiveIn(MBB);
2085
2086 if (isEAXAlive) {
2087 if (Is64Bit) {
2088 // Save RAX
2089 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
2090 .addReg(X86::RAX, RegState::Kill)
2092 } else {
2093 // Save EAX
2094 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
2095 .addReg(X86::EAX, RegState::Kill)
2097 }
2098 }
2099
2100 if (Is64Bit) {
2101 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
2102 // Function prologue is responsible for adjusting the stack pointer.
2103 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
2104 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
2105 .addImm(Alloc)
2107 } else {
2108 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
2109 // We'll also use 4 already allocated bytes for EAX.
2110 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2111 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
2113 }
2114
2115 // Call __chkstk, __chkstk_ms, or __alloca.
2116 emitStackProbe(MF, MBB, MBBI, DL, true);
2117
2118 if (isEAXAlive) {
2119 // Restore RAX/EAX
2121 if (Is64Bit)
2122 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
2123 StackPtr, false, NumBytes - 8);
2124 else
2125 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
2126 StackPtr, false, NumBytes - 4);
2127 MI->setFlag(MachineInstr::FrameSetup);
2128 MBB.insert(MBBI, MI);
2129 }
2130 } else if (NumBytes) {
2131 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
2132 }
2133
2134 if (NumBytes)
2135 EmitSEHAfter(EmitSEHStackAlloc);
2136
2137 int SEHFrameOffset = 0;
2138 Register SPOrEstablisher;
2139 if (IsFunclet) {
2140 if (IsClrFunclet) {
2141 // The establisher parameter passed to a CLR funclet is actually a pointer
2142 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2143 // to find the root function establisher frame by loading the PSPSym from
2144 // the intermediate frame.
2145 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2146 MachinePointerInfo NoInfo;
2147 MBB.addLiveIn(Establisher);
2148 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
2149 Establisher, false, PSPSlotOffset)
2152 ;
2153 // Save the root establisher back into the current funclet's (mostly
2154 // empty) frame, in case a sub-funclet or the GC needs it.
2155 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
2156 false, PSPSlotOffset)
2157 .addReg(Establisher)
2159 NoInfo,
2162 }
2163 SPOrEstablisher = Establisher;
2164 } else {
2165 SPOrEstablisher = StackPtr;
2166 }
2167
2168 if (IsWin64Prologue && HasFP) {
2169 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2170 // this calculation on the incoming establisher, which holds the value of
2171 // RSP from the parent frame at the end of the prologue.
2172 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
2173
2174 // If this is not a funclet, emit the CFI describing our frame pointer.
2175 if (NeedsWinCFI && !IsFunclet) {
2176 assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
2177 HasWinCFI = true;
2178 if (isAsynchronousEHPersonality(Personality) || MF.hasEHFunclets()) {
2179 if (TRI->hasBasePointer(MF))
2182 else
2183 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
2184 }
2185 }
2186
2187 auto EmitSEHSetFrame = [&]() {
2188 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2190 .addImm(SEHFrameOffset)
2192 };
2193
2194 if (!IsFunclet)
2195 EmitSEHBefore(EmitSEHSetFrame);
2196
2197 if (SEHFrameOffset)
2198 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
2199 SPOrEstablisher, false, SEHFrameOffset);
2200 else
2201 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
2202 .addReg(SPOrEstablisher);
2203
2204 if (!IsFunclet)
2205 EmitSEHAfter(EmitSEHSetFrame);
2206 } else if (IsFunclet && STI.is32Bit()) {
2207 // Reset EBP / ESI to something good for funclets.
2209 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2210 // into the registration node so that the runtime will restore it for us.
2211 if (!MBB.isCleanupFuncletEntry()) {
2212 assert(Personality == EHPersonality::MSVC_CXX);
2213 Register FrameReg;
2215 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
2216 // ESP is the first field, so no extra displacement is needed.
2217 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
2218 false, EHRegOffset)
2219 .addReg(X86::ESP);
2220 }
2221 }
2222
2223 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
2224 const MachineInstr &FrameInstr = *MBBI;
2225
2226 if (NeedsWinCFI) {
2227 int FI;
2228 if (Register Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
2229 if (X86::FR64RegClass.contains(Reg)) {
2230 int Offset;
2231 Register IgnoredFrameReg;
2232 if (IsWin64Prologue && IsFunclet)
2233 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
2234 else
2235 Offset =
2236 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2237 SEHFrameOffset;
2238
2239 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
2240 auto EmitSEHSaveXMM = [&]() {
2241 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
2242 .addImm(Reg)
2243 .addImm(Offset)
2245 };
2246 EmitSEHBefore(EmitSEHSaveXMM);
2247 ++MBBI;
2248 EmitSEHAfter(EmitSEHSaveXMM);
2249 continue;
2250 }
2251 }
2252 }
2253 ++MBBI;
2254 }
2255
2256 if (NeedsWinCFI && HasWinCFI) {
2257 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
2259 }
2260
2261 if (FnHasClrFunclet && !IsFunclet) {
2262 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2263 // immediately after the prolog) into the PSPSlot so that funclets
2264 // and the GC can recover it.
2265 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
2266 auto PSPInfo = MachinePointerInfo::getFixedStack(
2268 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
2269 PSPSlotOffset)
2274 }
2275
2276 // Realign stack after we spilled callee-saved registers (so that we'll be
2277 // able to calculate their offsets from the frame pointer).
2278 // Win64 requires aligning the stack after the prologue.
2279 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
2280 assert(HasFP && "There should be a frame pointer if stack is realigned.");
2281 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
2282 }
2283
2284 // We already dealt with stack realignment and funclets above.
2285 if (IsFunclet && STI.is32Bit())
2286 return;
2287
2288 // If we need a base pointer, set it up here. It's whatever the value
2289 // of the stack pointer is at this point. Any variable size objects
2290 // will be allocated after this, so we can still use the base pointer
2291 // to reference locals.
2292 if (TRI->hasBasePointer(MF)) {
2293 // Update the base pointer with the current stack pointer.
2294 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
2295 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
2296 .addReg(SPOrEstablisher)
2298 if (X86FI->getRestoreBasePointer()) {
2299 // Stash value of base pointer. Saving RSP instead of EBP shortens
2300 // dependence chain. Used by SjLj EH.
2301 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2302 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true,
2304 .addReg(SPOrEstablisher)
2306 }
2307
2308 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
2309 // Stash the value of the frame pointer relative to the base pointer for
2310 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2311 // it recovers the frame pointer from the base pointer rather than the
2312 // other way around.
2313 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
2314 Register UsedReg;
2315 int Offset =
2316 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
2317 .getFixed();
2318 assert(UsedReg == BasePtr);
2319 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
2322 }
2323 }
2324 if (ArgBaseReg.isValid()) {
2325 // Save argument base pointer.
2326 auto *MI = X86FI->getStackPtrSaveMI();
2327 int FI = MI->getOperand(1).getIndex();
2328 unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
2329 // movl %basereg, offset(%ebp)
2330 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
2331 .addReg(ArgBaseReg)
2333 }
2334
2335 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
2336 // Mark end of stack pointer adjustment.
2337 if (!HasFP && NumBytes) {
2338 // Define the current CFA rule to use the provided offset.
2339 assert(StackSize);
2340 BuildCFI(
2341 MBB, MBBI, DL,
2342 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
2344 }
2345
2346 // Emit DWARF info specifying the offsets of the callee-saved registers.
2348 }
2349
2350 // X86 Interrupt handling function cannot assume anything about the direction
2351 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2352 // in each prologue of interrupt handler function.
2353 //
2354 // Create "cld" instruction only in these cases:
2355 // 1. The interrupt handling function uses any of the "rep" instructions.
2356 // 2. Interrupt handling function calls another function.
2357 // 3. If there are any inline asm blocks, as we do not know what they do
2358 //
2359 // TODO: We should also emit cld if we detect the use of std, but as of now,
2360 // the compiler does not even emit that instruction or even define it, so in
2361 // practice, this would only happen with inline asm, which we cover anyway.
2363 bool NeedsCLD = false;
2364
2365 for (const MachineBasicBlock &B : MF) {
2366 for (const MachineInstr &MI : B) {
2367 if (MI.isCall()) {
2368 NeedsCLD = true;
2369 break;
2370 }
2371
2372 if (isOpcodeRep(MI.getOpcode())) {
2373 NeedsCLD = true;
2374 break;
2375 }
2376
2377 if (MI.isInlineAsm()) {
2378 // TODO: Parse asm for rep instructions or call sites?
2379 // For now, let's play it safe and emit a cld instruction
2380 // just in case.
2381 NeedsCLD = true;
2382 break;
2383 }
2384 }
2385 }
2386
2387 if (NeedsCLD) {
2388 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
2390 }
2391 }
2392
2393 // At this point we know if the function has WinCFI or not.
2394 MF.setHasWinCFI(HasWinCFI);
2395}
2396
2398 const MachineFunction &MF) const {
2399 // We can't use LEA instructions for adjusting the stack pointer if we don't
2400 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2401 // to deallocate the stack.
2402 // This means that we can use LEA for SP in two situations:
2403 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2404 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2405 return !MF.getTarget().getMCAsmInfo().usesWindowsCFI() || hasFP(MF);
2406}
2407
2409 switch (MI.getOpcode()) {
2410 case X86::CATCHRET:
2411 case X86::CLEANUPRET:
2412 return true;
2413 default:
2414 return false;
2415 }
2416 llvm_unreachable("impossible");
2417}
2418
2419// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2420// stack. It holds a pointer to the bottom of the root function frame. The
2421// establisher frame pointer passed to a nested funclet may point to the
2422// (mostly empty) frame of its parent funclet, but it will need to find
2423// the frame of the root function to access locals. To facilitate this,
2424// every funclet copies the pointer to the bottom of the root function
2425// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2426// same offset for the PSPSym in the root function frame that's used in the
2427// funclets' frames allows each funclet to dynamically accept any ancestor
2428// frame as its establisher argument (the runtime doesn't guarantee the
2429// immediate parent for some reason lost to history), and also allows the GC,
2430// which uses the PSPSym for some bookkeeping, to find it in any funclet's
2431// frame with only a single offset reported for the entire method.
2432unsigned
2433X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
2434 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
2436 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
2437 /*IgnoreSPUpdates*/ true)
2438 .getFixed();
2439 assert(Offset >= 0 && SPReg == TRI->getStackRegister());
2440 return static_cast<unsigned>(Offset);
2441}
2442
2443unsigned
2444X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
2445 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2446 // This is the size of the pushed CSRs.
2447 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2448 // This is the size of callee saved XMMs.
2449 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2450 unsigned XMMSize =
2451 WinEHXMMSlotInfo.size() * TRI->getSpillSize(X86::VR128RegClass);
2452 // This is the amount of stack a funclet needs to allocate.
2453 unsigned UsedSize;
2454 EHPersonality Personality =
2456 if (Personality == EHPersonality::CoreCLR) {
2457 // CLR funclets need to hold enough space to include the PSPSym, at the
2458 // same offset from the stack pointer (immediately after the prolog) as it
2459 // resides at in the main function.
2460 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
2461 } else {
2462 // Other funclets just need enough stack for outgoing call arguments.
2463 UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
2464 }
2465 // RBP is not included in the callee saved register block. After pushing RBP,
2466 // everything is 16 byte aligned. Everything we allocate before an outgoing
2467 // call must also be 16 byte aligned.
2468 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
2469 // Subtract out the size of the callee saved registers. This is how much stack
2470 // each funclet will allocate.
2471 return FrameSizeMinusRBP + XMMSize - CSSize;
2472}
2473
2474static bool isTailCallOpcode(unsigned Opc) {
2475 return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
2476 Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
2477 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
2478 Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
2479 Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
2480}
2481
2483 MachineBasicBlock &MBB) const {
2484 const MachineFrameInfo &MFI = MF.getFrameInfo();
2486 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
2487 MachineBasicBlock::iterator MBBI = Terminator;
2488 DebugLoc DL;
2489 if (MBBI != MBB.end())
2490 DL = MBBI->getDebugLoc();
2491 // standard x86_64 uses 64-bit frame/stack pointers, x32 - 32-bit.
2492 const bool Is64BitILP32 = STI.isTarget64BitILP32();
2493 Register FramePtr = TRI->getFrameRegister(MF);
2494 Register MachineFramePtr =
2495 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
2496
2497 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2498 bool NeedsWin64CFI =
2499 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
2500 // For V3 unwind, epilog SEH pseudos are emitted inline before each
2501 // unwind-effecting instruction.
2502 bool IsWin64UnwindV3 =
2503 NeedsWin64CFI && MF.hasWinCFI() &&
2506 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
2507
2508 // Get the number of bytes to allocate from the FrameInfo.
2509 uint64_t StackSize = MFI.getStackSize();
2510 uint64_t MaxAlign = calculateMaxStackAlign(MF);
2511 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2512 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
2513 bool HasFP = hasFP(MF);
2514 uint64_t NumBytes = 0;
2515
2516 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
2518 !MF.getTarget().getTargetTriple().isUEFI()) &&
2519 MF.needsFrameMoves();
2520
2521 Register ArgBaseReg;
2522 if (auto *MI = X86FI->getStackPtrSaveMI()) {
2523 unsigned Opc = X86::LEA32r;
2524 Register StackReg = X86::ESP;
2525 ArgBaseReg = MI->getOperand(0).getReg();
2526 if (STI.is64Bit()) {
2527 Opc = X86::LEA64r;
2528 StackReg = X86::RSP;
2529 }
2530 // leal -4(%basereg), %esp
2531 // .cfi_def_cfa %esp, 4
2532 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
2533 .addUse(ArgBaseReg)
2534 .addImm(1)
2535 .addUse(X86::NoRegister)
2536 .addImm(-(int64_t)SlotSize)
2537 .addUse(X86::NoRegister)
2539 if (NeedsDwarfCFI) {
2540 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
2541 BuildCFI(MBB, MBBI, DL,
2542 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2544 --MBBI;
2545 }
2546 --MBBI;
2547 }
2548
2549 if (IsFunclet) {
2550 assert(HasFP && "EH funclets without FP not yet implemented");
2551 NumBytes = getWinEHFuncletFrameSize(MF);
2552 } else if (HasFP) {
2553 // Calculate required stack adjustment.
2554 uint64_t FrameSize = StackSize - SlotSize;
2555 NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
2556
2557 // Callee-saved registers were pushed on stack before the stack was
2558 // realigned.
2559 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
2560 NumBytes = alignTo(FrameSize, MaxAlign);
2561 } else {
2562 NumBytes = StackSize - CSSize - TailCallArgReserveSize;
2563 }
2564 uint64_t SEHStackAllocAmt = NumBytes;
2565
2566 unsigned SEHFrameOffset = 0;
2567 if (IsWin64Prologue && HasFP)
2568 SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
2569
2570 // AfterPop is the position to insert .cfi_restore.
2572 if (HasFP) {
2573 if (X86FI->hasSwiftAsyncContext()) {
2574 // Discard the context.
2575 int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true);
2576 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
2577 }
2578 // Pop EBP.
2579 if (IsWin64UnwindV3)
2580 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
2583 BuildMI(MBB, MBBI, DL,
2585 MachineFramePtr)
2587
2588 // We need to reset FP to its untagged state on return. Bit 60 is currently
2589 // used to show the presence of an extended frame.
2590 if (X86FI->hasSwiftAsyncContext()) {
2591 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), MachineFramePtr)
2592 .addUse(MachineFramePtr)
2593 .addImm(60)
2595 }
2596
2597 if (NeedsDwarfCFI) {
2598 if (!ArgBaseReg.isValid()) {
2599 unsigned DwarfStackPtr =
2600 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
2601 BuildCFI(MBB, MBBI, DL,
2602 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
2604 }
2605 if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
2606 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
2607 BuildCFI(MBB, AfterPop, DL,
2608 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
2610 --MBBI;
2611 --AfterPop;
2612 }
2613 --MBBI;
2614 }
2615 }
2616
2617 MachineBasicBlock::iterator FirstCSPop = MBBI;
2618 // Skip the callee-saved pop instructions.
2619 while (MBBI != MBB.begin()) {
2620 MachineBasicBlock::iterator PI = std::prev(MBBI);
2621 unsigned Opc = PI->getOpcode();
2622
2623 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
2624 if (!PI->getFlag(MachineInstr::FrameDestroy) ||
2625 (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::BTR64ri8 &&
2626 Opc != X86::ADD64ri32 && Opc != X86::POPP64r && Opc != X86::POP2 &&
2627 Opc != X86::POP2P && Opc != X86::LEA64r && Opc != X86::SEH_PushReg &&
2628 Opc != X86::SEH_Push2Regs && Opc != X86::SEH_StackAlloc))
2629 break;
2630 FirstCSPop = PI;
2631 }
2632
2633 --MBBI;
2634 }
2635 if (ArgBaseReg.isValid()) {
2636 // Restore argument base pointer.
2637 auto *MI = X86FI->getStackPtrSaveMI();
2638 int FI = MI->getOperand(1).getIndex();
2639 unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
2640 // movl offset(%ebp), %basereg
2641 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
2643 }
2644 MBBI = FirstCSPop;
2645
2646 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
2647 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
2648
2649 if (MBBI != MBB.end())
2650 DL = MBBI->getDebugLoc();
2651 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2652 // instruction, merge the two instructions.
2653 if (NumBytes || MFI.hasVarSizedObjects())
2654 NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true);
2655
2656 if (IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI()) {
2657 // Find the XMM restores that were tagged with FrameDestroy, now that we
2658 // know the offset we can emit the SEH pseudos for them.
2659 auto EpilogStart = MBBI;
2660 {
2661 auto ScanIt = MBBI;
2662 while (ScanIt != MBB.begin()) {
2663 auto PI = std::prev(ScanIt);
2664 int FI;
2665 if (PI->getFlag(MachineInstr::FrameDestroy) &&
2666 TII.isLoadFromStackSlot(*PI, FI)) {
2667 Register Reg = PI->getOperand(0).getReg();
2668 if (X86::FR64RegClass.contains(Reg)) {
2669 Register IgnoredFrameReg;
2670 int Offset =
2671 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
2672 SEHFrameOffset;
2673 BuildMI(MBB, PI, DL, TII.get(X86::SEH_SaveXMM))
2674 .addImm(Reg)
2675 .addImm(Offset)
2677 // std::prev(PI) is the SEH_SaveXMM we just inserted (before PI).
2678 // We start ScanIt from that point so that the next
2679 // std::prev(ScanIt) will examine the instruction before the pseudo,
2680 // i.e. the next potential XMM restore further up the block.
2681 EpilogStart = std::prev(PI);
2682 ScanIt = EpilogStart;
2683 continue;
2684 }
2685 }
2686 break;
2687 }
2688 }
2689
2690 // For V3, SEH_BeginEpilogue must be emitted before any epilog SEH pseudos.
2691 BuildMI(MBB, EpilogStart, DL, TII.get(X86::SEH_BeginEpilogue));
2692 }
2693
2694 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2695 // slot before popping them off! Same applies for the case, when stack was
2696 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2697 // will not do realignment or dynamic stack allocation.
2698 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
2699 !IsFunclet) {
2700 if (TRI->hasStackRealignment(MF))
2701 MBBI = FirstCSPop;
2702 uint64_t LEAAmount =
2703 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
2704
2705 if (X86FI->hasSwiftAsyncContext())
2706 LEAAmount -= 16;
2707
2708 // There are only two legal forms of epilogue:
2709 // - add SEHAllocationSize, %rsp
2710 // - lea SEHAllocationSize(%FramePtr), %rsp
2711 //
2712 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2713 // However, we may use this sequence if we have a frame pointer because the
2714 // effects of the prologue can safely be undone.
2715 if (IsWin64UnwindV3) {
2716 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
2718 .addImm(SEHFrameOffset)
2720 if (SEHStackAllocAmt)
2721 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2722 .addImm(SEHStackAllocAmt)
2724 }
2725 if (LEAAmount != 0) {
2728 false, LEAAmount);
2729 --MBBI;
2730 } else {
2731 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
2733 --MBBI;
2734 }
2735 } else if (NumBytes) {
2736 // Adjust stack pointer back: ESP += numbytes.
2737 if (IsWin64UnwindV3)
2738 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
2739 .addImm(NumBytes)
2741 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
2742 if (!HasFP && NeedsDwarfCFI) {
2743 // Define the current CFA rule to use the provided offset.
2744 BuildCFI(MBB, MBBI, DL,
2746 nullptr, CSSize + TailCallArgReserveSize + SlotSize),
2748 }
2749 --MBBI;
2750 }
2751
2752 // For V1/V2, emit SEH_BeginEpilogue after stack restore code.
2753 if (!IsWin64UnwindV3 && NeedsWin64CFI && MF.hasWinCFI())
2754 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_BeginEpilogue));
2755
2756 if (!HasFP && NeedsDwarfCFI) {
2757 MBBI = FirstCSPop;
2758 int64_t Offset = -(int64_t)CSSize - SlotSize;
2759 // Mark callee-saved pop instruction.
2760 // Define the current CFA rule to use the provided offset.
2761 while (MBBI != MBB.end()) {
2763 unsigned Opc = PI->getOpcode();
2764 ++MBBI;
2765 if (Opc == X86::POP32r || Opc == X86::POP64r || Opc == X86::POPP64r ||
2766 Opc == X86::POP2 || Opc == X86::POP2P) {
2767 Offset += SlotSize;
2768 // Compared to pop, pop2 introduces more stack offset (one more
2769 // register).
2770 if (Opc == X86::POP2 || Opc == X86::POP2P)
2771 Offset += SlotSize;
2772 BuildCFI(MBB, MBBI, DL,
2775 }
2776 }
2777 }
2778
2779 // Emit DWARF info specifying the restores of the callee-saved registers.
2780 // For epilogue with return inside or being other block without successor,
2781 // no need to generate .cfi_restore for callee-saved registers.
2782 if (NeedsDwarfCFI && !MBB.succ_empty())
2783 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
2784
2785 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
2786 // Add the return addr area delta back since we are not tail calling.
2787 int64_t Delta = X86FI->getTCReturnAddrDelta();
2788 assert(Delta <= 0 && "TCDelta should never be positive");
2789 if (Delta) {
2790 // Check for possible merge with preceding ADD instruction.
2791 int64_t Offset = mergeSPAdd(MBB, Terminator, -Delta, true);
2792 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
2793 }
2794 }
2795
2796 // Emit tilerelease for AMX kernel.
2798 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
2799
2800 if (NeedsWin64CFI && MF.hasWinCFI())
2801 BuildMI(MBB, Terminator, DL, TII.get(X86::SEH_EndEpilogue));
2802}
2803
2805 int FI,
2806 Register &FrameReg) const {
2807 const MachineFrameInfo &MFI = MF.getFrameInfo();
2808
2809 bool IsFixed = MFI.isFixedObjectIndex(FI);
2810 // We can't calculate offset from frame pointer if the stack is realigned,
2811 // so enforce usage of stack/base pointer. The base pointer is used when we
2812 // have dynamic allocas in addition to dynamic realignment.
2813 if (TRI->hasBasePointer(MF))
2814 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
2815 else if (TRI->hasStackRealignment(MF))
2816 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
2817 else
2818 FrameReg = TRI->getFrameRegister(MF);
2819
2820 // Offset will hold the offset from the stack pointer at function entry to the
2821 // object.
2822 // We need to factor in additional offsets applied during the prologue to the
2823 // frame, base, and stack pointer depending on which is used.
2824 int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2826 unsigned CSSize = X86FI->getCalleeSavedFrameSize();
2827 uint64_t StackSize = MFI.getStackSize();
2828 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
2829 int64_t FPDelta = 0;
2830
2831 // In an x86 interrupt, remove the offset we added to account for the return
2832 // address from any stack object allocated in the caller's frame. Interrupts
2833 // do not have a standard return address. Fixed objects in the current frame,
2834 // such as SSE register spills, should not get this treatment.
2836 Offset >= 0) {
2838 }
2839
2840 if (IsWin64Prologue) {
2841 assert(!MFI.hasCalls() || (StackSize % 16) == 8);
2842
2843 // Calculate required stack adjustment.
2844 uint64_t FrameSize = StackSize - SlotSize;
2845 // If required, include space for extra hidden slot for stashing base
2846 // pointer.
2847 if (X86FI->getRestoreBasePointer())
2848 FrameSize += SlotSize;
2849 uint64_t NumBytes = FrameSize - CSSize;
2850
2851 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
2852 if (FI && FI == X86FI->getFAIndex())
2853 return StackOffset::getFixed(-SEHFrameOffset);
2854
2855 // FPDelta is the offset from the "traditional" FP location of the old base
2856 // pointer followed by return address and the location required by the
2857 // restricted Win64 prologue.
2858 // Add FPDelta to all offsets below that go through the frame pointer.
2859 FPDelta = FrameSize - SEHFrameOffset;
2860 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
2861 "FPDelta isn't aligned per the Win64 ABI!");
2862 }
2863
2864 if (FrameReg == TRI->getFramePtr()) {
2865 // Skip saved EBP/RBP
2866 Offset += SlotSize;
2867
2868 // Account for restricted Windows prologue.
2869 Offset += FPDelta;
2870
2871 // Skip the RETADDR move area
2872 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
2873 if (TailCallReturnAddrDelta < 0)
2874 Offset -= TailCallReturnAddrDelta;
2875
2877 }
2878
2879 // FrameReg is either the stack pointer or a base pointer. But the base is
2880 // located at the end of the statically known StackSize so the distinction
2881 // doesn't really matter.
2882 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
2883 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
2884 return StackOffset::getFixed(Offset + StackSize);
2885}
2886
2888 Register &FrameReg) const {
2889 const MachineFrameInfo &MFI = MF.getFrameInfo();
2891 const auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
2892 const auto it = WinEHXMMSlotInfo.find(FI);
2893
2894 if (it == WinEHXMMSlotInfo.end())
2895 return getFrameIndexReference(MF, FI, FrameReg).getFixed();
2896
2897 FrameReg = TRI->getStackRegister();
2898 return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
2899 it->second;
2900}
2901
2904 Register &FrameReg,
2905 int Adjustment) const {
2906 const MachineFrameInfo &MFI = MF.getFrameInfo();
2907 FrameReg = TRI->getStackRegister();
2908 return StackOffset::getFixed(MFI.getObjectOffset(FI) -
2909 getOffsetOfLocalArea() + Adjustment);
2910}
2911
2914 int FI, Register &FrameReg,
2915 bool IgnoreSPUpdates) const {
2916
2917 const MachineFrameInfo &MFI = MF.getFrameInfo();
2918 // Does not include any dynamic realign.
2919 const uint64_t StackSize = MFI.getStackSize();
2920 // LLVM arranges the stack as follows:
2921 // ...
2922 // ARG2
2923 // ARG1
2924 // RETADDR
2925 // PUSH RBP <-- RBP points here
2926 // PUSH CSRs
2927 // ~~~~~~~ <-- possible stack realignment (non-win64)
2928 // ...
2929 // STACK OBJECTS
2930 // ... <-- RSP after prologue points here
2931 // ~~~~~~~ <-- possible stack realignment (win64)
2932 //
2933 // if (hasVarSizedObjects()):
2934 // ... <-- "base pointer" (ESI/RBX) points here
2935 // DYNAMIC ALLOCAS
2936 // ... <-- RSP points here
2937 //
2938 // Case 1: In the simple case of no stack realignment and no dynamic
2939 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2940 // with fixed offsets from RSP.
2941 //
2942 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2943 // stack objects are addressed with RBP and regular stack objects with RSP.
2944 //
2945 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2946 // to address stack arguments for outgoing calls and nothing else. The "base
2947 // pointer" points to local variables, and RBP points to fixed objects.
2948 //
2949 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2950 // answer we give is relative to the SP after the prologue, and not the
2951 // SP in the middle of the function.
2952
2953 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
2954 !STI.isTargetWin64())
2955 return getFrameIndexReference(MF, FI, FrameReg);
2956
2957 // If !hasReservedCallFrame the function might have SP adjustement in the
2958 // body. So, even though the offset is statically known, it depends on where
2959 // we are in the function.
2960 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
2961 return getFrameIndexReference(MF, FI, FrameReg);
2962
2963 // We don't handle tail calls, and shouldn't be seeing them either.
2965 "we don't handle this case!");
2966
2967 // This is how the math works out:
2968 //
2969 // %rsp grows (i.e. gets lower) left to right. Each box below is
2970 // one word (eight bytes). Obj0 is the stack slot we're trying to
2971 // get to.
2972 //
2973 // ----------------------------------
2974 // | BP | Obj0 | Obj1 | ... | ObjN |
2975 // ----------------------------------
2976 // ^ ^ ^ ^
2977 // A B C E
2978 //
2979 // A is the incoming stack pointer.
2980 // (B - A) is the local area offset (-8 for x86-64) [1]
2981 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2982 //
2983 // |(E - B)| is the StackSize (absolute value, positive). For a
2984 // stack that grown down, this works out to be (B - E). [3]
2985 //
2986 // E is also the value of %rsp after stack has been set up, and we
2987 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2988 // (C - E) == (C - A) - (B - A) + (B - E)
2989 // { Using [1], [2] and [3] above }
2990 // == getObjectOffset - LocalAreaOffset + StackSize
2991
2992 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
2993}
2994
2997 std::vector<CalleeSavedInfo> &CSI) const {
2998 MachineFrameInfo &MFI = MF.getFrameInfo();
3000
3001 unsigned CalleeSavedFrameSize = 0;
3002 unsigned XMMCalleeSavedFrameSize = 0;
3003 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
3004 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
3005
3006 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
3007
3008 if (TailCallReturnAddrDelta < 0) {
3009 // create RETURNADDR area
3010 // arg
3011 // arg
3012 // RETADDR
3013 // { ...
3014 // RETADDR area
3015 // ...
3016 // }
3017 // [EBP]
3018 MFI.CreateFixedObject(-TailCallReturnAddrDelta,
3019 TailCallReturnAddrDelta - SlotSize, true);
3020 }
3021
3022 // Spill the BasePtr if it's used.
3023 if (this->TRI->hasBasePointer(MF)) {
3024 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
3025 if (MF.hasEHFunclets()) {
3027 X86FI->setHasSEHFramePtrSave(true);
3028 X86FI->setSEHFramePtrSaveIndex(FI);
3029 }
3030 }
3031
3032 if (hasFP(MF)) {
3033 // emitPrologue always spills frame register the first thing.
3034 SpillSlotOffset -= SlotSize;
3035 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3036
3037 // The async context lives directly before the frame pointer, and we
3038 // allocate a second slot to preserve stack alignment.
3039 if (X86FI->hasSwiftAsyncContext()) {
3040 SpillSlotOffset -= SlotSize;
3041 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3042 SpillSlotOffset -= SlotSize;
3043 }
3044
3045 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
3046 // the frame register, we can delete it from CSI list and not have to worry
3047 // about avoiding it later.
3048 Register FPReg = TRI->getFrameRegister(MF);
3049 for (unsigned i = 0; i < CSI.size(); ++i) {
3050 if (TRI->regsOverlap(CSI[i].getReg(), FPReg)) {
3051 CSI.erase(CSI.begin() + i);
3052 break;
3053 }
3054 }
3055 }
3056
3057 // Strategy:
3058 // 1. Use push2 when
3059 // a) number of CSR > 1 if no need padding
3060 // b) number of CSR > 2 if need padding
3061 // c) stack alignment >= 16 bytes
3062 // 2. When the number of CSR push is odd
3063 // a. Start to use push2 from the 1st push if stack is 16B aligned.
3064 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
3065 // 3. When the number of CSR push is even, start to use push2 from the 1st
3066 // push and make the stack 16B aligned before the push
3067 unsigned NumRegsForPush2 = 0;
3068 if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
3069 unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
3070 return X86::GR64RegClass.contains(I.getReg());
3071 });
3072 bool NeedPadding = (SpillSlotOffset % 16 != 0) && (NumCSGPR % 2 == 0);
3073 bool UsePush2Pop2 = NeedPadding ? NumCSGPR > 2 : NumCSGPR > 1;
3074 X86FI->setPadForPush2Pop2(NeedPadding && UsePush2Pop2);
3075 NumRegsForPush2 = UsePush2Pop2 ? alignDown(NumCSGPR, 2) : 0;
3076 if (X86FI->padForPush2Pop2()) {
3077 SpillSlotOffset -= SlotSize;
3078 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3079 }
3080 }
3081
3082 // Assign slots for GPRs. It increases frame size.
3083 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
3084 MCRegister Reg = I.getReg();
3085
3086 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3087 continue;
3088
3089 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
3090 // or only an odd number of registers in the candidates.
3091 if (X86FI->getNumCandidatesForPush2Pop2() < NumRegsForPush2 &&
3092 (SpillSlotOffset % 16 == 0 ||
3093 X86FI->getNumCandidatesForPush2Pop2() % 2))
3094 X86FI->addCandidateForPush2Pop2(Reg);
3095
3096 SpillSlotOffset -= SlotSize;
3097 CalleeSavedFrameSize += SlotSize;
3098
3099 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3100 I.setFrameIdx(SlotIndex);
3101 }
3102
3103 // Adjust the offset of spill slot as we know the accurate callee saved frame
3104 // size.
3105 if (X86FI->getRestoreBasePointer()) {
3106 SpillSlotOffset -= SlotSize;
3107 CalleeSavedFrameSize += SlotSize;
3108
3109 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
3110 // TODO: saving the slot index is better?
3111 X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
3112 }
3113 assert(X86FI->getNumCandidatesForPush2Pop2() % 2 == 0 &&
3114 "Expect even candidates for push2/pop2");
3115 if (X86FI->getNumCandidatesForPush2Pop2())
3116 ++NumFunctionUsingPush2Pop2;
3117 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
3118 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
3119
3120 // Assign slots for XMMs.
3121 for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
3122 MCRegister Reg = I.getReg();
3123 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3124 continue;
3125
3127 unsigned Size = TRI->getSpillSize(*RC);
3128 Align Alignment = TRI->getSpillAlign(*RC);
3129 // ensure alignment
3130 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
3131 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
3132
3133 // spill into slot
3134 SpillSlotOffset -= Size;
3135 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
3136 I.setFrameIdx(SlotIndex);
3137 MFI.ensureMaxAlignment(Alignment);
3138
3139 // Save the start offset and size of XMM in stack frame for funclets.
3140 if (X86::VR128RegClass.contains(Reg)) {
3141 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
3142 XMMCalleeSavedFrameSize += Size;
3143 }
3144 }
3145
3146 return true;
3147}
3148
3152 DebugLoc DL = MBB.findDebugLoc(MI);
3153
3154 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
3155 // for us, and there are no XMM CSRs on Win32.
3156 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
3157 return true;
3158
3159 // Push GPRs. It increases frame size.
3160 const MachineFunction &MF = *MBB.getParent();
3162 if (X86FI->padForPush2Pop2()) {
3163 assert(SlotSize == 8 && "Unexpected slot size for padding!");
3164 BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r))
3165 .addReg(X86::RAX, RegState::Undef)
3167 }
3168
3169 // Update LiveIn of the basic block and decide whether we can add a kill flag
3170 // to the use.
3171 auto UpdateLiveInCheckCanKill = [&](Register Reg) {
3172 const MachineRegisterInfo &MRI = MF.getRegInfo();
3173 // Do not set a kill flag on values that are also marked as live-in. This
3174 // happens with the @llvm-returnaddress intrinsic and with arguments
3175 // passed in callee saved registers.
3176 // Omitting the kill flags is conservatively correct even if the live-in
3177 // is not used after all.
3178 if (MRI.isLiveIn(Reg))
3179 return false;
3180 MBB.addLiveIn(Reg);
3181 // Check if any subregister is live-in
3182 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg)
3183 if (MRI.isLiveIn(*AReg))
3184 return false;
3185 return true;
3186 };
3187 auto UpdateLiveInGetKillRegState = [&](Register Reg) {
3188 return getKillRegState(UpdateLiveInCheckCanKill(Reg));
3189 };
3190
3191 for (auto RI = CSI.rbegin(), RE = CSI.rend(); RI != RE; ++RI) {
3192 MCRegister Reg = RI->getReg();
3193 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3194 continue;
3195
3196 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3197 MCRegister Reg2 = (++RI)->getReg();
3199 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3200 .addReg(Reg2, UpdateLiveInGetKillRegState(Reg2))
3202 } else {
3203 BuildMI(MBB, MI, DL, TII.get(getPUSHOpcode(STI)))
3204 .addReg(Reg, UpdateLiveInGetKillRegState(Reg))
3206 }
3207 }
3208
3209 if (X86FI->getRestoreBasePointer()) {
3210 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
3211 Register BaseReg = this->TRI->getBaseRegister();
3212 BuildMI(MBB, MI, DL, TII.get(Opc))
3213 .addReg(BaseReg, getKillRegState(true))
3215 }
3216
3217 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3218 // It can be done by spilling XMMs to stack frame.
3219 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
3220 MCRegister Reg = I.getReg();
3221 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3222 continue;
3223
3224 // Add the callee-saved register as live-in. It's killed at the spill.
3225 MBB.addLiveIn(Reg);
3227
3228 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, Register(),
3230 }
3231
3232 return true;
3233}
3234
3235void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
3237 MachineInstr *CatchRet) const {
3238 // SEH shouldn't use catchret.
3240 MBB.getParent()->getFunction().getPersonalityFn())) &&
3241 "SEH should not use CATCHRET");
3242 const DebugLoc &DL = CatchRet->getDebugLoc();
3243 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
3244
3245 // Fill EAX/RAX with the address of the target block.
3246 if (STI.is64Bit()) {
3247 // LEA64r CatchRetTarget(%rip), %rax
3248 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
3249 .addReg(X86::RIP)
3250 .addImm(0)
3251 .addReg(0)
3252 .addMBB(CatchRetTarget)
3253 .addReg(0);
3254 } else {
3255 // MOV32ri $CatchRetTarget, %eax
3256 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
3257 .addMBB(CatchRetTarget);
3258 }
3259
3260 // Record that we've taken the address of CatchRetTarget and no longer just
3261 // reference it in a terminator.
3262 CatchRetTarget->setMachineBlockAddressTaken();
3263}
3264
3268 if (CSI.empty())
3269 return false;
3270
3271 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
3272 // Don't restore CSRs in 32-bit EH funclets. Matches
3273 // spillCalleeSavedRegisters.
3274 if (STI.is32Bit())
3275 return true;
3276 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3277 // funclets. emitEpilogue transforms these to normal jumps.
3278 if (MI->getOpcode() == X86::CATCHRET) {
3279 const Function &F = MBB.getParent()->getFunction();
3280 bool IsSEH = isAsynchronousEHPersonality(
3281 classifyEHPersonality(F.getPersonalityFn()));
3282 if (IsSEH)
3283 return true;
3284 }
3285 }
3286
3287 DebugLoc DL = MBB.findDebugLoc(MI);
3288 MachineFunction &MF = *MBB.getParent();
3290
3291 bool NeedsWin64CFI =
3292 isWin64Prologue(MF) && MF.getFunction().needsUnwindTableEntry();
3293 bool IsWin64UnwindV3 =
3294 NeedsWin64CFI && MF.getFunction().getParent()->getWinX64EHUnwindMode() ==
3296
3297 // Reload XMMs from stack frame.
3298 for (const CalleeSavedInfo &I : CSI) {
3299 MCRegister Reg = I.getReg();
3300 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
3301 continue;
3302
3304 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, Register(), 0,
3306 }
3307
3308 // Clear the stack slot for spill base pointer register.
3309 if (X86FI->getRestoreBasePointer()) {
3310 if (IsWin64UnwindV3)
3311 BuildMI(MBB, MI, DL, TII.get(X86::SEH_PushReg))
3312 .addImm(this->TRI->getBaseRegister())
3314 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
3315 Register BaseReg = this->TRI->getBaseRegister();
3316 BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
3318 }
3319
3320 // POP GPRs.
3321 for (auto I = CSI.begin(), E = CSI.end(); I != E; ++I) {
3322 MCRegister Reg = I->getReg();
3323 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
3324 continue;
3325
3326 if (X86FI->isCandidateForPush2Pop2(Reg)) {
3327 MCRegister Reg2 = (++I)->getReg();
3328 if (IsWin64UnwindV3) {
3329 BuildMI(MBB, MI, DL, TII.get(X86::SEH_Push2Regs))
3330 .addImm(Reg)
3331 .addImm(Reg2)
3333 }
3334 BuildMI(MBB, MI, DL, TII.get(getPOP2Opcode(STI)), Reg)
3335 .addReg(Reg2, RegState::Define)
3337 } else {
3338 if (IsWin64UnwindV3)
3339 BuildMI(MBB, MI, DL, TII.get(X86::SEH_PushReg))
3340 .addImm(Reg)
3342 BuildMI(MBB, MI, DL, TII.get(getPOPOpcode(STI)), Reg)
3344 }
3345 }
3346 if (X86FI->padForPush2Pop2()) {
3347 if (IsWin64UnwindV3)
3348 BuildMI(MBB, MI, DL, TII.get(X86::SEH_StackAlloc))
3351 emitSPUpdate(MBB, MI, DL, SlotSize, /*InEpilogue=*/true);
3352 }
3353
3354 return true;
3355}
3356
3358 BitVector &SavedRegs,
3359 RegScavenger *RS) const {
3361
3362 // Spill the BasePtr if it's used.
3363 if (TRI->hasBasePointer(MF)) {
3364 Register BasePtr = TRI->getBaseRegister();
3365 if (STI.isTarget64BitILP32())
3366 BasePtr = getX86SubSuperRegister(BasePtr, 64);
3367 SavedRegs.set(BasePtr);
3368 }
3369 if (STI.hasUserReservedRegisters()) {
3370 for (int Reg = SavedRegs.find_first(); Reg != -1;
3371 Reg = SavedRegs.find_next(Reg)) {
3372 if (STI.isRegisterReservedByUser(Reg)) {
3373 SavedRegs.reset(Reg);
3374 }
3375 }
3376 }
3377}
3378
3379static bool HasNestArgument(const MachineFunction *MF) {
3380 const Function &F = MF->getFunction();
3381 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
3382 I++) {
3383 if (I->hasNestAttr() && !I->use_empty())
3384 return true;
3385 }
3386 return false;
3387}
3388
3389/// GetScratchRegister - Get a temp register for performing work in the
3390/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3391/// and the properties of the function either one or two registers will be
3392/// needed. Set primary to true for the first register, false for the second.
3393static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64,
3394 const MachineFunction &MF, bool Primary) {
3395 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
3396
3397 // Erlang stuff.
3398 if (CallingConvention == CallingConv::HiPE) {
3399 if (Is64Bit)
3400 return Primary ? X86::R14 : X86::R13;
3401 else
3402 return Primary ? X86::EBX : X86::EDI;
3403 }
3404
3405 if (Is64Bit) {
3406 if (IsLP64)
3407 return Primary ? X86::R11 : X86::R12;
3408 else
3409 return Primary ? X86::R11D : X86::R12D;
3410 }
3411
3412 bool IsNested = HasNestArgument(&MF);
3413
3414 if (CallingConvention == CallingConv::X86_FastCall ||
3415 CallingConvention == CallingConv::Fast ||
3416 CallingConvention == CallingConv::Tail) {
3417 if (IsNested)
3418 report_fatal_error("Segmented stacks does not support fastcall with "
3419 "nested function.");
3420 return Primary ? X86::EAX : X86::ECX;
3421 }
3422 if (IsNested)
3423 return Primary ? X86::EDX : X86::EAX;
3424 return Primary ? X86::ECX : X86::EAX;
3425}
3426
3427// The stack limit in the TCB is set to this many bytes above the actual stack
3428// limit.
3430
3432 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3433 MachineFrameInfo &MFI = MF.getFrameInfo();
3434 uint64_t StackSize;
3435 unsigned TlsReg, TlsOffset;
3436 DebugLoc DL;
3437
3438 // To support shrink-wrapping we would need to insert the new blocks
3439 // at the right place and update the branches to PrologueMBB.
3440 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3441
3442 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3443 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3444 "Scratch register is live-in");
3445
3446 if (MF.getFunction().isVarArg())
3447 report_fatal_error("Segmented stacks do not support vararg functions.");
3448 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
3449 !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
3450 !STI.isTargetDragonFly())
3451 report_fatal_error("Segmented stacks not supported on this platform.");
3452
3453 // Eventually StackSize will be calculated by a link-time pass; which will
3454 // also decide whether checking code needs to be injected into this particular
3455 // prologue.
3456 StackSize = MFI.getStackSize();
3457
3458 if (!MFI.needsSplitStackProlog())
3459 return;
3460
3464 bool IsNested = false;
3465
3466 // We need to know if the function has a nest argument only in 64 bit mode.
3467 if (Is64Bit)
3468 IsNested = HasNestArgument(&MF);
3469
3470 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3471 // allocMBB needs to be last (terminating) instruction.
3472
3473 for (const auto &LI : PrologueMBB.liveins()) {
3474 allocMBB->addLiveIn(LI);
3475 checkMBB->addLiveIn(LI);
3476 }
3477
3478 if (IsNested)
3479 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
3480
3481 MF.push_front(allocMBB);
3482 MF.push_front(checkMBB);
3483
3484 // When the frame size is less than 256 we just compare the stack
3485 // boundary directly to the value of the stack pointer, per gcc.
3486 bool CompareStackPointer = StackSize < kSplitStackAvailable;
3487
3488 // Read the limit off the current stacklet off the stack_guard location.
3489 if (Is64Bit) {
3490 if (STI.isTargetLinux()) {
3491 TlsReg = X86::FS;
3492 TlsOffset = IsLP64 ? 0x70 : 0x40;
3493 } else if (STI.isTargetDarwin()) {
3494 TlsReg = X86::GS;
3495 TlsOffset = 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3496 } else if (STI.isTargetWin64()) {
3497 TlsReg = X86::GS;
3498 TlsOffset = 0x28; // pvArbitrary, reserved for application use
3499 } else if (STI.isTargetFreeBSD()) {
3500 TlsReg = X86::FS;
3501 TlsOffset = 0x18;
3502 } else if (STI.isTargetDragonFly()) {
3503 TlsReg = X86::FS;
3504 TlsOffset = 0x20; // use tls_tcb.tcb_segstack
3505 } else {
3506 report_fatal_error("Segmented stacks not supported on this platform.");
3507 }
3508
3509 if (CompareStackPointer)
3510 ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
3511 else
3512 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r),
3513 ScratchReg)
3514 .addReg(X86::RSP)
3515 .addImm(1)
3516 .addReg(0)
3517 .addImm(-StackSize)
3518 .addReg(0);
3519
3520 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm))
3521 .addReg(ScratchReg)
3522 .addReg(0)
3523 .addImm(1)
3524 .addReg(0)
3525 .addImm(TlsOffset)
3526 .addReg(TlsReg);
3527 } else {
3528 if (STI.isTargetLinux()) {
3529 TlsReg = X86::GS;
3530 TlsOffset = 0x30;
3531 } else if (STI.isTargetDarwin()) {
3532 TlsReg = X86::GS;
3533 TlsOffset = 0x48 + 90 * 4;
3534 } else if (STI.isTargetWin32()) {
3535 TlsReg = X86::FS;
3536 TlsOffset = 0x14; // pvArbitrary, reserved for application use
3537 } else if (STI.isTargetDragonFly()) {
3538 TlsReg = X86::FS;
3539 TlsOffset = 0x10; // use tls_tcb.tcb_segstack
3540 } else if (STI.isTargetFreeBSD()) {
3541 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3542 } else {
3543 report_fatal_error("Segmented stacks not supported on this platform.");
3544 }
3545
3546 if (CompareStackPointer)
3547 ScratchReg = X86::ESP;
3548 else
3549 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg)
3550 .addReg(X86::ESP)
3551 .addImm(1)
3552 .addReg(0)
3553 .addImm(-StackSize)
3554 .addReg(0);
3555
3556 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
3557 STI.isTargetDragonFly()) {
3558 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3559 .addReg(ScratchReg)
3560 .addReg(0)
3561 .addImm(0)
3562 .addReg(0)
3563 .addImm(TlsOffset)
3564 .addReg(TlsReg);
3565 } else if (STI.isTargetDarwin()) {
3566
3567 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3568 unsigned ScratchReg2;
3569 bool SaveScratch2;
3570 if (CompareStackPointer) {
3571 // The primary scratch register is available for holding the TLS offset.
3572 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3573 SaveScratch2 = false;
3574 } else {
3575 // Need to use a second register to hold the TLS offset
3576 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
3577
3578 // Unfortunately, with fastcc the second scratch register may hold an
3579 // argument.
3580 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
3581 }
3582
3583 // If Scratch2 is live-in then it needs to be saved.
3584 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
3585 "Scratch register is live-in and not saved");
3586
3587 if (SaveScratch2)
3588 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
3589 .addReg(ScratchReg2, RegState::Kill);
3590
3591 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
3592 .addImm(TlsOffset);
3593 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
3594 .addReg(ScratchReg)
3595 .addReg(ScratchReg2)
3596 .addImm(1)
3597 .addReg(0)
3598 .addImm(0)
3599 .addReg(TlsReg);
3600
3601 if (SaveScratch2)
3602 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
3603 }
3604 }
3605
3606 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3607 // It jumps to normal execution of the function body.
3608 BuildMI(checkMBB, DL, TII.get(X86::JCC_1))
3609 .addMBB(&PrologueMBB)
3611
3612 // On 32 bit we first push the arguments size and then the frame size. On 64
3613 // bit, we pass the stack frame size in r10 and the argument size in r11.
3614 if (Is64Bit) {
3615 // Functions with nested arguments use R10, so it needs to be saved across
3616 // the call to _morestack
3617
3618 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
3619 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
3620 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
3621 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
3622
3623 if (IsNested)
3624 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
3625
3626 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
3627 .addImm(StackSize);
3628 BuildMI(allocMBB, DL,
3630 Reg11)
3631 .addImm(X86FI->getArgumentStackSize());
3632 } else {
3633 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
3634 .addImm(X86FI->getArgumentStackSize());
3635 BuildMI(allocMBB, DL, TII.get(X86::PUSH32i)).addImm(StackSize);
3636 }
3637
3638 // __morestack is in libgcc
3640 // Under the large code model, we cannot assume that __morestack lives
3641 // within 2^31 bytes of the call site, so we cannot use pc-relative
3642 // addressing. We cannot perform the call via a temporary register,
3643 // as the rax register may be used to store the static chain, and all
3644 // other suitable registers may be either callee-save or used for
3645 // parameter passing. We cannot use the stack at this point either
3646 // because __morestack manipulates the stack directly.
3647 //
3648 // To avoid these issues, perform an indirect call via a read-only memory
3649 // location containing the address.
3650 //
3651 // This solution is not perfect, as it assumes that the .rodata section
3652 // is laid out within 2^31 bytes of each function body, but this seems
3653 // to be sufficient for JIT.
3654 // FIXME: Add retpoline support and remove the error here..
3655 if (STI.useIndirectThunkCalls())
3656 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3657 "code model and thunks not yet implemented.");
3658 BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
3659 .addReg(X86::RIP)
3660 .addImm(0)
3661 .addReg(0)
3662 .addExternalSymbol("__morestack_addr")
3663 .addReg(0);
3664 } else {
3665 if (Is64Bit)
3666 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
3667 .addExternalSymbol("__morestack");
3668 else
3669 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
3670 .addExternalSymbol("__morestack");
3671 }
3672
3673 if (IsNested)
3674 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
3675 else
3676 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
3677
3678 allocMBB->addSuccessor(&PrologueMBB);
3679
3680 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
3681 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
3682
3683#ifdef EXPENSIVE_CHECKS
3684 MF.verify();
3685#endif
3686}
3687
3688/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3689/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3690/// to fields it needs, through a named metadata node "hipe.literals" containing
3691/// name-value pairs.
3692static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD,
3693 const StringRef LiteralName) {
3694 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
3695 MDNode *Node = HiPELiteralsMD->getOperand(i);
3696 if (Node->getNumOperands() != 2)
3697 continue;
3698 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
3699 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
3700 if (!NodeName || !NodeVal)
3701 continue;
3702 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
3703 if (ValConst && NodeName->getString() == LiteralName) {
3704 return ValConst->getZExtValue();
3705 }
3706 }
3707
3708 report_fatal_error("HiPE literal " + LiteralName +
3709 " required but not provided");
3710}
3711
3712// Return true if there are no non-ehpad successors to MBB and there are no
3713// non-meta instructions between MBBI and MBB.end().
3716 return llvm::all_of(
3717 MBB.successors(),
3718 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
3719 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
3720 return MI.isMetaInstruction();
3721 });
3722}
3723
3724/// Erlang programs may need a special prologue to handle the stack size they
3725/// might need at runtime. That is because Erlang/OTP does not implement a C
3726/// stack but uses a custom implementation of hybrid stack/heap architecture.
3727/// (for more information see Eric Stenman's Ph.D. thesis:
3728/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3729///
3730/// CheckStack:
3731/// temp0 = sp - MaxStack
3732/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3733/// OldStart:
3734/// ...
3735/// IncStack:
3736/// call inc_stack # doubles the stack space
3737/// temp0 = sp - MaxStack
3738/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3740 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
3741 MachineFrameInfo &MFI = MF.getFrameInfo();
3742 DebugLoc DL;
3743
3744 // To support shrink-wrapping we would need to insert the new blocks
3745 // at the right place and update the branches to PrologueMBB.
3746 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
3747
3748 // HiPE-specific values
3749 NamedMDNode *HiPELiteralsMD =
3750 MF.getFunction().getParent()->getNamedMetadata("hipe.literals");
3751 if (!HiPELiteralsMD)
3753 "Can't generate HiPE prologue without runtime parameters");
3754 const unsigned HipeLeafWords = getHiPELiteral(
3755 HiPELiteralsMD, Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3756 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
3757 const unsigned Guaranteed = HipeLeafWords * SlotSize;
3758 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs
3759 ? MF.getFunction().arg_size() - CCRegisteredArgs
3760 : 0;
3761 unsigned MaxStack = MFI.getStackSize() + CallerStkArity * SlotSize + SlotSize;
3762
3763 assert(STI.isTargetLinux() &&
3764 "HiPE prologue is only supported on Linux operating systems.");
3765
3766 // Compute the largest caller's frame that is needed to fit the callees'
3767 // frames. This 'MaxStack' is computed from:
3768 //
3769 // a) the fixed frame size, which is the space needed for all spilled temps,
3770 // b) outgoing on-stack parameter areas, and
3771 // c) the minimum stack space this function needs to make available for the
3772 // functions it calls (a tunable ABI property).
3773 if (MFI.hasCalls()) {
3774 unsigned MoreStackForCalls = 0;
3775
3776 for (auto &MBB : MF) {
3777 for (auto &MI : MBB) {
3778 if (!MI.isCall())
3779 continue;
3780
3781 // Get callee operand.
3782 const MachineOperand &MO = MI.getOperand(0);
3783
3784 // Only take account of global function calls (no closures etc.).
3785 if (!MO.isGlobal())
3786 continue;
3787
3788 const Function *F = dyn_cast<Function>(MO.getGlobal());
3789 if (!F)
3790 continue;
3791
3792 // Do not update 'MaxStack' for primitive and built-in functions
3793 // (encoded with names either starting with "erlang."/"bif_" or not
3794 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3795 // "_", such as the BIF "suspend_0") as they are executed on another
3796 // stack.
3797 if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
3798 F->getName().find_first_of("._") == StringRef::npos)
3799 continue;
3800
3801 unsigned CalleeStkArity = F->arg_size() > CCRegisteredArgs
3802 ? F->arg_size() - CCRegisteredArgs
3803 : 0;
3804 if (HipeLeafWords - 1 > CalleeStkArity)
3805 MoreStackForCalls =
3806 std::max(MoreStackForCalls,
3807 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
3808 }
3809 }
3810 MaxStack += MoreStackForCalls;
3811 }
3812
3813 // If the stack frame needed is larger than the guaranteed then runtime checks
3814 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3815 if (MaxStack > Guaranteed) {
3816 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
3817 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
3818
3819 for (const auto &LI : PrologueMBB.liveins()) {
3820 stackCheckMBB->addLiveIn(LI);
3821 incStackMBB->addLiveIn(LI);
3822 }
3823
3824 MF.push_front(incStackMBB);
3825 MF.push_front(stackCheckMBB);
3826
3827 unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
3828 unsigned LEAop, CMPop, CALLop;
3829 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
3830 if (Is64Bit) {
3831 SPReg = X86::RSP;
3832 PReg = X86::RBP;
3833 LEAop = X86::LEA64r;
3834 CMPop = X86::CMP64rm;
3835 CALLop = X86::CALL64pcrel32;
3836 } else {
3837 SPReg = X86::ESP;
3838 PReg = X86::EBP;
3839 LEAop = X86::LEA32r;
3840 CMPop = X86::CMP32rm;
3841 CALLop = X86::CALLpcrel32;
3842 }
3843
3844 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
3845 assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
3846 "HiPE prologue scratch register is live-in");
3847
3848 // Create new MBB for StackCheck:
3849 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3850 false, -MaxStack);
3851 // SPLimitOffset is in a fixed heap location (pointed by BP).
3852 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3853 PReg, false, SPLimitOffset);
3854 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1))
3855 .addMBB(&PrologueMBB)
3857
3858 // Create new MBB for IncStack:
3859 BuildMI(incStackMBB, DL, TII.get(CALLop)).addExternalSymbol("inc_stack_0");
3860 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), SPReg,
3861 false, -MaxStack);
3862 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)).addReg(ScratchReg),
3863 PReg, false, SPLimitOffset);
3864 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1))
3865 .addMBB(incStackMBB)
3867
3868 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
3869 stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
3870 incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
3871 incStackMBB->addSuccessor(incStackMBB, {1, 100});
3872 }
3873#ifdef EXPENSIVE_CHECKS
3874 MF.verify();
3875#endif
3876}
3877
3878bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
3880 const DebugLoc &DL,
3881 int Offset) const {
3882 if (Offset <= 0)
3883 return false;
3884
3885 if (Offset % SlotSize)
3886 return false;
3887
3888 int NumPops = Offset / SlotSize;
3889 // This is only worth it if we have at most 2 pops.
3890 if (NumPops != 1 && NumPops != 2)
3891 return false;
3892
3893 // Handle only the trivial case where the adjustment directly follows
3894 // a call. This is the most common one, anyway.
3895 if (MBBI == MBB.begin())
3896 return false;
3897 MachineBasicBlock::iterator Prev = std::prev(MBBI);
3898 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
3899 return false;
3900
3901 unsigned Regs[2];
3902 unsigned FoundRegs = 0;
3903
3904 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3905 const MachineOperand &RegMask = Prev->getOperand(1);
3906
3907 auto &RegClass =
3908 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
3909 // Try to find up to NumPops free registers.
3910 for (auto Candidate : RegClass) {
3911 // Poor man's liveness:
3912 // Since we're immediately after a call, any register that is clobbered
3913 // by the call and not defined by it can be considered dead.
3914 if (!RegMask.clobbersPhysReg(Candidate))
3915 continue;
3916
3917 // Don't clobber reserved registers
3918 if (MRI.isReserved(Candidate))
3919 continue;
3920
3921 bool IsDef = false;
3922 for (const MachineOperand &MO : Prev->implicit_operands()) {
3923 if (MO.isReg() && MO.isDef() &&
3924 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
3925 IsDef = true;
3926 break;
3927 }
3928 }
3929
3930 if (IsDef)
3931 continue;
3932
3933 Regs[FoundRegs++] = Candidate;
3934 if (FoundRegs == (unsigned)NumPops)
3935 break;
3936 }
3937
3938 if (FoundRegs == 0)
3939 return false;
3940
3941 // If we found only one free register, but need two, reuse the same one twice.
3942 while (FoundRegs < (unsigned)NumPops)
3943 Regs[FoundRegs++] = Regs[0];
3944
3945 for (int i = 0; i < NumPops; ++i)
3946 BuildMI(MBB, MBBI, DL, TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r),
3947 Regs[i]);
3948
3949 return true;
3950}
3951
3955 bool reserveCallFrame = hasReservedCallFrame(MF);
3956 unsigned Opcode = I->getOpcode();
3957 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
3958 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
3959 uint64_t Amount = TII.getFrameSize(*I);
3960 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
3961 I = MBB.erase(I);
3962 auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
3963
3964 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3965 // typically because the function is marked noreturn (abort, throw,
3966 // assert_fail, etc).
3967 if (isDestroy && blockEndIsUnreachable(MBB, I))
3968 return I;
3969
3970 if (!reserveCallFrame) {
3971 // If the stack pointer can be changed after prologue, turn the
3972 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3973 // adjcallstackdown instruction into 'add ESP, <amt>'
3974
3975 // We need to keep the stack aligned properly. To do this, we round the
3976 // amount of space needed for the outgoing arguments up to the next
3977 // alignment boundary.
3978 Amount = alignTo(Amount, getStackAlign());
3979
3980 const Function &F = MF.getFunction();
3981 bool WindowsCFI = MF.getTarget().getMCAsmInfo().usesWindowsCFI();
3982 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
3983
3984 // If we have any exception handlers in this function, and we adjust
3985 // the SP before calls, we may need to indicate this to the unwinder
3986 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3987 // Amount == 0, because the preceding function may have set a non-0
3988 // GNU_ARGS_SIZE.
3989 // TODO: We don't need to reset this between subsequent functions,
3990 // if it didn't change.
3991 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
3992
3993 if (HasDwarfEHHandlers && !isDestroy &&
3995 BuildCFI(MBB, InsertPos, DL,
3996 MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
3997
3998 if (Amount == 0)
3999 return I;
4000
4001 // Factor out the amount that gets handled inside the sequence
4002 // (Pushes of argument for frame setup, callee pops for frame destroy)
4003 Amount -= InternalAmt;
4004
4005 // TODO: This is needed only if we require precise CFA.
4006 // If this is a callee-pop calling convention, emit a CFA adjust for
4007 // the amount the callee popped.
4008 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
4009 BuildCFI(MBB, InsertPos, DL,
4010 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
4011
4012 // Add Amount to SP to destroy a frame, or subtract to setup.
4013 int64_t StackAdjustment = isDestroy ? Amount : -Amount;
4014 int64_t CfaAdjustment = StackAdjustment;
4015
4016 if (StackAdjustment) {
4017 // Merge with any previous or following adjustment instruction. Note: the
4018 // instructions merged with here do not have CFI, so their stack
4019 // adjustments do not feed into CfaAdjustment
4020
4021 auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI,
4022 int64_t Offset) {
4023 CfaAdjustment += Offset;
4024 };
4025 auto CalcNewOffset = [&StackAdjustment](int64_t Offset) {
4026 return StackAdjustment + Offset;
4027 };
4028 StackAdjustment =
4029 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true);
4030 StackAdjustment =
4031 mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false);
4032
4033 if (StackAdjustment) {
4034 if (!(F.hasMinSize() &&
4035 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
4036 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
4037 /*InEpilogue=*/false);
4038 }
4039 }
4040
4041 if (DwarfCFI && !hasFP(MF) && CfaAdjustment) {
4042 // If we don't have FP, but need to generate unwind information,
4043 // we need to set the correct CFA offset after the stack adjustment.
4044 // How much we adjust the CFA offset depends on whether we're emitting
4045 // CFI only for EH purposes or for debugging. EH only requires the CFA
4046 // offset to be correct at each call site, while for debugging we want
4047 // it to be more precise.
4048
4049 // TODO: When not using precise CFA, we also need to adjust for the
4050 // InternalAmt here.
4051 BuildCFI(
4052 MBB, InsertPos, DL,
4053 MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment));
4054 }
4055
4056 return I;
4057 }
4058
4059 if (InternalAmt) {
4062 while (CI != B && !std::prev(CI)->isCall())
4063 --CI;
4064 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
4065 }
4066
4067 return I;
4068}
4069
4071 assert(MBB.getParent() && "Block is not attached to a function!");
4072 const MachineFunction &MF = *MBB.getParent();
4073 if (!MBB.isLiveIn(X86::EFLAGS))
4074 return true;
4075
4076 // If stack probes have to loop inline or call, that will clobber EFLAGS.
4077 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
4079 const X86TargetLowering &TLI = *STI.getTargetLowering();
4080 if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
4081 return false;
4082
4084 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
4085}
4086
4088 assert(MBB.getParent() && "Block is not attached to a function!");
4089
4090 // Win64 has strict requirements in terms of epilogue and we are
4091 // not taking a chance at messing with them.
4092 // I.e., unless this block is already an exit block, we can't use
4093 // it as an epilogue.
4094 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
4095 return false;
4096
4097 // Swift async context epilogue has a BTR instruction that clobbers parts of
4098 // EFLAGS.
4099 const MachineFunction &MF = *MBB.getParent();
4102
4103 if (canUseLEAForSPInEpilogue(*MBB.getParent()))
4104 return true;
4105
4106 // If we cannot use LEA to adjust SP, we may need to use ADD, which
4107 // clobbers the EFLAGS. Check that we do not need to preserve it,
4108 // otherwise, conservatively assume this is not
4109 // safe to insert the epilogue here.
4111}
4112
4114 // If we may need to emit frameless compact unwind information, give
4115 // up as this is currently broken: PR25614.
4116 bool CompactUnwind =
4118 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
4119 !CompactUnwind) &&
4120 // The lowering of segmented stack and HiPE only support entry
4121 // blocks as prologue blocks: PR26107. This limitation may be
4122 // lifted if we fix:
4123 // - adjustForSegmentedStacks
4124 // - adjustForHiPEPrologue
4126 !MF.shouldSplitStack();
4127}
4128
4131 const DebugLoc &DL, bool RestoreSP) const {
4132 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
4133 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
4134 assert(STI.is32Bit() && !Uses64BitFramePtr &&
4135 "restoring EBP/ESI on non-32-bit target");
4136
4137 MachineFunction &MF = *MBB.getParent();
4138 Register FramePtr = TRI->getFrameRegister(MF);
4139 Register BasePtr = TRI->getBaseRegister();
4140 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
4142 MachineFrameInfo &MFI = MF.getFrameInfo();
4143
4144 // FIXME: Don't set FrameSetup flag in catchret case.
4145
4146 int FI = FuncInfo.EHRegNodeFrameIndex;
4147 int EHRegSize = MFI.getObjectSize(FI);
4148
4149 if (RestoreSP) {
4150 // MOV32rm -EHRegSize(%ebp), %esp
4151 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
4152 X86::EBP, true, -EHRegSize)
4154 }
4155
4156 Register UsedReg;
4157 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
4158 int EndOffset = -EHRegOffset - EHRegSize;
4159 FuncInfo.EHRegNodeEndOffset = EndOffset;
4160
4161 if (UsedReg == FramePtr) {
4162 // ADD $offset, %ebp
4163 unsigned ADDri = getADDriOpcode(false);
4164 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
4166 .addImm(EndOffset)
4168 ->getOperand(3)
4169 .setIsDead();
4170 assert(EndOffset >= 0 &&
4171 "end of registration object above normal EBP position!");
4172 } else if (UsedReg == BasePtr) {
4173 // LEA offset(%ebp), %esi
4174 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
4175 FramePtr, false, EndOffset)
4177 // MOV32rm SavedEBPOffset(%esi), %ebp
4178 assert(X86FI->getHasSEHFramePtrSave());
4179 int Offset =
4180 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
4181 .getFixed();
4182 assert(UsedReg == BasePtr);
4183 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
4184 UsedReg, true, Offset)
4186 } else {
4187 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
4188 }
4189 return MBBI;
4190}
4191
4193 return TRI->getSlotSize();
4194}
4195
4200
4204 Register FrameRegister = RI->getFrameRegister(MF);
4205 if (getInitialCFARegister(MF) == FrameRegister &&
4207 DwarfFrameBase FrameBase;
4208 FrameBase.Kind = DwarfFrameBase::CFA;
4209 FrameBase.Location.Offset =
4211 return FrameBase;
4212 }
4213
4214 return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
4215}
4216
4217namespace {
4218// Struct used by orderFrameObjects to help sort the stack objects.
4219struct X86FrameSortingObject {
4220 bool IsValid = false; // true if we care about this Object.
4221 unsigned ObjectIndex = 0; // Index of Object into MFI list.
4222 unsigned ObjectSize = 0; // Size of Object in bytes.
4223 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
4224 unsigned ObjectNumUses = 0; // Object static number of uses.
4225};
4226
4227// The comparison function we use for std::sort to order our local
4228// stack symbols. The current algorithm is to use an estimated
4229// "density". This takes into consideration the size and number of
4230// uses each object has in order to roughly minimize code size.
4231// So, for example, an object of size 16B that is referenced 5 times
4232// will get higher priority than 4 4B objects referenced 1 time each.
4233// It's not perfect and we may be able to squeeze a few more bytes out of
4234// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4235// fringe end can have special consideration, given their size is less
4236// important, etc.), but the algorithmic complexity grows too much to be
4237// worth the extra gains we get. This gets us pretty close.
4238// The final order leaves us with objects with highest priority going
4239// at the end of our list.
4240struct X86FrameSortingComparator {
4241 inline bool operator()(const X86FrameSortingObject &A,
4242 const X86FrameSortingObject &B) const {
4243 uint64_t DensityAScaled, DensityBScaled;
4244
4245 // For consistency in our comparison, all invalid objects are placed
4246 // at the end. This also allows us to stop walking when we hit the
4247 // first invalid item after it's all sorted.
4248 if (!A.IsValid)
4249 return false;
4250 if (!B.IsValid)
4251 return true;
4252
4253 // The density is calculated by doing :
4254 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4255 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4256 // Since this approach may cause inconsistencies in
4257 // the floating point <, >, == comparisons, depending on the floating
4258 // point model with which the compiler was built, we're going
4259 // to scale both sides by multiplying with
4260 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4261 // the division and, with it, the need for any floating point
4262 // arithmetic.
4263 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
4264 static_cast<uint64_t>(B.ObjectSize);
4265 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
4266 static_cast<uint64_t>(A.ObjectSize);
4267
4268 // If the two densities are equal, prioritize highest alignment
4269 // objects. This allows for similar alignment objects
4270 // to be packed together (given the same density).
4271 // There's room for improvement here, also, since we can pack
4272 // similar alignment (different density) objects next to each
4273 // other to save padding. This will also require further
4274 // complexity/iterations, and the overall gain isn't worth it,
4275 // in general. Something to keep in mind, though.
4276 if (DensityAScaled == DensityBScaled)
4277 return A.ObjectAlignment < B.ObjectAlignment;
4278
4279 return DensityAScaled < DensityBScaled;
4280 }
4281};
4282} // namespace
4283
4284// Order the symbols in the local stack.
4285// We want to place the local stack objects in some sort of sensible order.
4286// The heuristic we use is to try and pack them according to static number
4287// of uses and size of object in order to minimize code size.
4289 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
4290 const MachineFrameInfo &MFI = MF.getFrameInfo();
4291
4292 // Don't waste time if there's nothing to do.
4293 if (ObjectsToAllocate.empty())
4294 return;
4295
4296 // Create an array of all MFI objects. We won't need all of these
4297 // objects, but we're going to create a full array of them to make
4298 // it easier to index into when we're counting "uses" down below.
4299 // We want to be able to easily/cheaply access an object by simply
4300 // indexing into it, instead of having to search for it every time.
4301 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
4302
4303 // Walk the objects we care about and mark them as such in our working
4304 // struct.
4305 for (auto &Obj : ObjectsToAllocate) {
4306 SortingObjects[Obj].IsValid = true;
4307 SortingObjects[Obj].ObjectIndex = Obj;
4308 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
4309 // Set the size.
4310 int ObjectSize = MFI.getObjectSize(Obj);
4311 if (ObjectSize == 0)
4312 // Variable size. Just use 4.
4313 SortingObjects[Obj].ObjectSize = 4;
4314 else
4315 SortingObjects[Obj].ObjectSize = ObjectSize;
4316 }
4317
4318 // Count the number of uses for each object.
4319 for (auto &MBB : MF) {
4320 for (auto &MI : MBB) {
4321 if (MI.isDebugInstr())
4322 continue;
4323 for (const MachineOperand &MO : MI.operands()) {
4324 // Check to see if it's a local stack symbol.
4325 if (!MO.isFI())
4326 continue;
4327 int Index = MO.getIndex();
4328 // Check to see if it falls within our range, and is tagged
4329 // to require ordering.
4330 if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
4331 SortingObjects[Index].IsValid)
4332 SortingObjects[Index].ObjectNumUses++;
4333 }
4334 }
4335 }
4336
4337 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4338 // info).
4339 llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
4340
4341 // Now modify the original list to represent the final order that
4342 // we want. The order will depend on whether we're going to access them
4343 // from the stack pointer or the frame pointer. For SP, the list should
4344 // end up with the END containing objects that we want with smaller offsets.
4345 // For FP, it should be flipped.
4346 int i = 0;
4347 for (auto &Obj : SortingObjects) {
4348 // All invalid items are sorted at the end, so it's safe to stop.
4349 if (!Obj.IsValid)
4350 break;
4351 ObjectsToAllocate[i++] = Obj.ObjectIndex;
4352 }
4353
4354 // Flip it if we're accessing off of the FP.
4355 if (!TRI->hasStackRealignment(MF) && hasFP(MF))
4356 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
4357}
4358
4359unsigned
4361 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4362 unsigned Offset = 16;
4363 // RBP is immediately pushed.
4364 Offset += SlotSize;
4365 // All callee-saved registers are then pushed.
4366 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
4367 // Every funclet allocates enough stack space for the largest outgoing call.
4368 Offset += getWinEHFuncletFrameSize(MF);
4369 return Offset;
4370}
4371
4373 MachineFunction &MF, RegScavenger *RS) const {
4374 // Mark the function as not having WinCFI. We will set it back to true in
4375 // emitPrologue if it gets called and emits CFI.
4376 MF.setHasWinCFI(false);
4377
4378 MachineFrameInfo &MFI = MF.getFrameInfo();
4379 // If the frame is big enough that we might need to scavenge a register to
4380 // handle huge offsets, reserve a stack slot for that now.
4381 if (!isInt<32>(MFI.estimateStackSize(MF))) {
4382 int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4383 RS->addScavengingFrameIndex(FI);
4384 }
4385
4386 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4387 // aligned. The format doesn't support misaligned stack adjustments.
4390
4391 // If this function isn't doing Win64-style C++ EH, we don't need to do
4392 // anything.
4393 if (STI.is64Bit() && MF.hasEHFunclets() &&
4396 adjustFrameForMsvcCxxEh(MF);
4397 }
4398}
4399
4400void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
4401 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4402 // relative to RSP after the prologue. Find the offset of the last fixed
4403 // object, so that we can allocate a slot immediately following it. If there
4404 // were no fixed objects, use offset -SlotSize, which is immediately after the
4405 // return address. Fixed objects have negative frame indices.
4406 MachineFrameInfo &MFI = MF.getFrameInfo();
4407 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
4408 int64_t MinFixedObjOffset = -SlotSize;
4409 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
4410 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
4411
4412 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
4413 for (WinEHHandlerType &H : TBME.HandlerArray) {
4414 int FrameIndex = H.CatchObj.FrameIndex;
4415 if ((FrameIndex != INT_MAX) && MFI.getObjectOffset(FrameIndex) == 0) {
4416 // Ensure alignment.
4417 unsigned Align = MFI.getObjectAlign(FrameIndex).value();
4418 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
4419 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
4420 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
4421 }
4422 }
4423 }
4424
4425 // Ensure alignment.
4426 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
4427 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
4428 int UnwindHelpFI =
4429 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
4430 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
4431
4432 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4433 // other frame setup instructions.
4434 MachineBasicBlock &MBB = MF.front();
4435 auto MBBI = MBB.begin();
4436 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
4437 ++MBBI;
4438
4440 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
4441 UnwindHelpFI)
4442 .addImm(-2);
4443}
4444
4446 MachineFunction &MF, RegScavenger *RS) const {
4447 auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
4448
4449 if (STI.is32Bit() && MF.hasEHFunclets())
4451 // We have emitted prolog and epilog. Don't need stack pointer saving
4452 // instruction any more.
4453 if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
4454 MI->eraseFromParent();
4455 X86FI->setStackPtrSaveMI(nullptr);
4456 }
4457}
4458
4460 MachineFunction &MF) const {
4461 // 32-bit functions have to restore stack pointers when control is transferred
4462 // back to the parent function. These blocks are identified as eh pads that
4463 // are not funclet entries.
4464 bool IsSEH = isAsynchronousEHPersonality(
4466 for (MachineBasicBlock &MBB : MF) {
4467 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
4468 if (NeedsRestore)
4470 /*RestoreSP=*/IsSEH);
4471 }
4472}
4473
4474// Compute the alignment gap between current SP after spilling FP/BP and the
4475// next properly aligned stack offset.
4477 const TargetRegisterClass *RC,
4478 unsigned NumSpilledRegs) {
4480 unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs;
4481 Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign();
4482 unsigned AlignedSize = alignTo(AllocSize, StackAlign);
4483 return AlignedSize - AllocSize;
4484}
4485
4486void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF,
4488 Register FP, Register BP,
4489 int SPAdjust) const {
4490 assert(FP.isValid() || BP.isValid());
4491
4492 MachineBasicBlock *MBB = BeforeMI->getParent();
4493 DebugLoc DL = BeforeMI->getDebugLoc();
4494
4495 // Spill FP.
4496 if (FP.isValid()) {
4497 BuildMI(*MBB, BeforeMI, DL,
4498 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4499 .addReg(FP);
4500 }
4501
4502 // Spill BP.
4503 if (BP.isValid()) {
4504 BuildMI(*MBB, BeforeMI, DL,
4505 TII.get(getPUSHOpcode(MF.getSubtarget<X86Subtarget>())))
4506 .addReg(BP);
4507 }
4508
4509 // Make sure SP is aligned.
4510 if (SPAdjust)
4511 emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false);
4512
4513 // Emit unwinding information.
4514 if (FP.isValid() && needsDwarfCFI(MF)) {
4515 // Emit .cfi_remember_state to remember old frame.
4516 unsigned CFIIndex =
4518 BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4519 .addCFIIndex(CFIIndex);
4520
4521 // Setup new CFA value with DW_CFA_def_cfa_expression:
4522 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4523 SmallString<64> CfaExpr;
4524 uint8_t buffer[16];
4525 int Offset = SPAdjust;
4526 if (BP.isValid())
4527 Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP));
4528 // If BeforeMI is a frame setup instruction, we need to adjust the position
4529 // and offset of the new cfi instruction.
4530 if (TII.isFrameSetup(*BeforeMI)) {
4531 Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign());
4532 BeforeMI = std::next(BeforeMI);
4533 }
4534 Register StackPtr = TRI->getStackRegister();
4535 if (STI.isTarget64BitILP32())
4537 unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true);
4538 CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr));
4539 CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
4540 CfaExpr.push_back(dwarf::DW_OP_deref);
4541 CfaExpr.push_back(dwarf::DW_OP_consts);
4542 CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer));
4543 CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus);
4544
4545 SmallString<64> DefCfaExpr;
4546 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
4547 DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
4548 DefCfaExpr.append(CfaExpr.str());
4549 BuildCFI(*MBB, BeforeMI, DL,
4550 MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
4552 }
4553}
4554
4555void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF,
4557 Register FP, Register BP,
4558 int SPAdjust) const {
4559 assert(FP.isValid() || BP.isValid());
4560
4561 // Adjust SP so it points to spilled FP or BP.
4562 MachineBasicBlock *MBB = AfterMI->getParent();
4563 MachineBasicBlock::iterator Pos = std::next(AfterMI);
4564 DebugLoc DL = AfterMI->getDebugLoc();
4565 if (SPAdjust)
4566 emitSPUpdate(*MBB, Pos, DL, SPAdjust, false);
4567
4568 // Restore BP.
4569 if (BP.isValid()) {
4570 BuildMI(*MBB, Pos, DL,
4571 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), BP);
4572 }
4573
4574 // Restore FP.
4575 if (FP.isValid()) {
4576 BuildMI(*MBB, Pos, DL,
4577 TII.get(getPOPOpcode(MF.getSubtarget<X86Subtarget>())), FP);
4578
4579 // Emit unwinding information.
4580 if (needsDwarfCFI(MF)) {
4581 // Restore original frame with .cfi_restore_state.
4582 unsigned CFIIndex =
4584 BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
4585 .addCFIIndex(CFIIndex);
4586 }
4587 }
4588}
4589
4590void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4592 MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const {
4593 assert(SpillFP || SpillBP);
4594
4595 Register FP, BP;
4596 const TargetRegisterClass *RC;
4597 unsigned NumRegs = 0;
4598
4599 if (SpillFP) {
4600 FP = TRI->getFrameRegister(MF);
4601 if (STI.isTarget64BitILP32())
4603 RC = TRI->getMinimalPhysRegClass(FP);
4604 ++NumRegs;
4605 }
4606 if (SpillBP) {
4607 BP = TRI->getBaseRegister();
4608 if (STI.isTarget64BitILP32())
4609 BP = Register(getX86SubSuperRegister(BP, 64));
4610 RC = TRI->getMinimalPhysRegClass(BP);
4611 ++NumRegs;
4612 }
4613 int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs);
4614
4615 spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust);
4616 restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust);
4617}
4618
4619bool X86FrameLowering::skipSpillFPBP(
4621 if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) {
4622 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4623 // SaveRbx = COPY RBX
4624 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4625 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4626 // We should skip this instruction sequence.
4627 int FI;
4628 Register Reg;
4629 while (!(MI->getOpcode() == TargetOpcode::COPY &&
4630 MI->getOperand(1).getReg() == X86::RBX) &&
4631 !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX))
4632 ++MI;
4633 return true;
4634 }
4635 return false;
4636}
4637
4639 const TargetRegisterInfo *TRI, bool &AccessFP,
4640 bool &AccessBP) {
4641 AccessFP = AccessBP = false;
4642 if (FP) {
4643 if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 ||
4644 MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4645 AccessFP = true;
4646 }
4647 if (BP) {
4648 if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 ||
4649 MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4650 AccessBP = true;
4651 }
4652 return AccessFP || AccessBP;
4653}
4654
4655// Invoke instruction has been lowered to normal function call. We try to figure
4656// out if MI comes from Invoke.
4657// Do we have any better method?
4658static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) {
4659 if (!MI.isCall())
4660 return false;
4661 if (InsideEHLabels)
4662 return true;
4663
4664 const MachineBasicBlock *MBB = MI.getParent();
4665 if (!MBB->hasEHPadSuccessor())
4666 return false;
4667
4668 // Check if there is another call instruction from MI to the end of MBB.
4670 for (++MBBI; MBBI != ME; ++MBBI)
4671 if (MBBI->isCall())
4672 return false;
4673 return true;
4674}
4675
4676/// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4677/// interfered stack access in the range, usually generated by register spill.
4678void X86FrameLowering::checkInterferedAccess(
4680 MachineBasicBlock::reverse_iterator KillMI, bool SpillFP,
4681 bool SpillBP) const {
4682 if (DefMI == KillMI)
4683 return;
4684 if (TRI->hasBasePointer(MF)) {
4685 if (!SpillBP)
4686 return;
4687 } else {
4688 if (!SpillFP)
4689 return;
4690 }
4691
4692 auto MI = KillMI;
4693 while (MI != DefMI) {
4694 if (any_of(MI->operands(),
4695 [](const MachineOperand &MO) { return MO.isFI(); }))
4696 MF.getContext().reportError(SMLoc(),
4697 "Interference usage of base pointer/frame "
4698 "pointer.");
4699 MI++;
4700 }
4701}
4702
4703/// If a function uses base pointer and the base pointer is clobbered by inline
4704/// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4705/// contains garbage value.
4706/// For example if a 32b x86 function uses base pointer esi, and esi is
4707/// clobbered by following inline asm
4708/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4709/// We need to save esi before the asm and restore it after the asm.
4710///
4711/// The problem can also occur to frame pointer if there is a function call, and
4712/// the callee uses a different calling convention and clobbers the fp.
4713///
4714/// Because normal frame objects (spill slots) are accessed through fp/bp
4715/// register, so we can't spill fp/bp to normal spill slots.
4716///
4717/// FIXME: There are 2 possible enhancements:
4718/// 1. In many cases there are different physical registers not clobbered by
4719/// inline asm, we can use one of them as base pointer. Or use a virtual
4720/// register as base pointer and let RA allocate a physical register to it.
4721/// 2. If there is no other instructions access stack with fp/bp from the
4722/// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4723/// skip the save and restore operations.
4725 Register FP, BP;
4727 if (TFI.hasFP(MF))
4728 FP = TRI->getFrameRegister(MF);
4729 if (TRI->hasBasePointer(MF))
4730 BP = TRI->getBaseRegister();
4731
4732 // Currently only inline asm and function call can clobbers fp/bp. So we can
4733 // do some quick test and return early.
4734 if (!MF.hasInlineAsm()) {
4736 if (!X86FI->getFPClobberedByCall())
4737 FP = 0;
4738 if (!X86FI->getBPClobberedByCall())
4739 BP = 0;
4740 }
4741 if (!FP && !BP)
4742 return;
4743
4744 for (MachineBasicBlock &MBB : MF) {
4745 bool InsideEHLabels = false;
4746 auto MI = MBB.rbegin(), ME = MBB.rend();
4747 auto TermMI = MBB.getFirstTerminator();
4748 if (TermMI == MBB.begin())
4749 continue;
4750 MI = *(std::prev(TermMI));
4751
4752 while (MI != ME) {
4753 // Skip frame setup/destroy instructions.
4754 // Skip Invoke (call inside try block) instructions.
4755 // Skip instructions handled by target.
4756 if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) ||
4758 isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) {
4759 ++MI;
4760 continue;
4761 }
4762
4763 if (MI->getOpcode() == TargetOpcode::EH_LABEL) {
4764 InsideEHLabels = !InsideEHLabels;
4765 ++MI;
4766 continue;
4767 }
4768
4769 bool AccessFP, AccessBP;
4770 // Check if fp or bp is used in MI.
4771 if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) {
4772 ++MI;
4773 continue;
4774 }
4775
4776 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4777 // used.
4778 bool FPLive = false, BPLive = false;
4779 bool SpillFP = false, SpillBP = false;
4780 auto DefMI = MI, KillMI = MI;
4781 do {
4782 SpillFP |= AccessFP;
4783 SpillBP |= AccessBP;
4784
4785 // Maintain FPLive and BPLive.
4786 if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1)
4787 FPLive = false;
4788 if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1)
4789 FPLive = true;
4790 if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1)
4791 BPLive = false;
4792 if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1)
4793 BPLive = true;
4794
4795 DefMI = MI++;
4796 } while ((MI != ME) &&
4797 (FPLive || BPLive ||
4798 isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)));
4799
4800 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4801 if (FPLive && !SpillBP)
4802 continue;
4803
4804 // If the bp is clobbered by a call, we should save and restore outside of
4805 // the frame setup instructions.
4806 if (KillMI->isCall() && DefMI != ME) {
4807 auto FrameSetup = std::next(DefMI);
4808 // Look for frame setup instruction toward the start of the BB.
4809 // If we reach another call instruction, it means no frame setup
4810 // instruction for the current call instruction.
4811 while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) &&
4812 !FrameSetup->isCall())
4813 ++FrameSetup;
4814 // If a frame setup instruction is found, we need to find out the
4815 // corresponding frame destroy instruction.
4816 if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup) &&
4817 (TII.getFrameSize(*FrameSetup) ||
4818 TII.getFrameAdjustment(*FrameSetup))) {
4819 while (!TII.isFrameInstr(*KillMI))
4820 --KillMI;
4821 DefMI = FrameSetup;
4822 MI = DefMI;
4823 ++MI;
4824 }
4825 }
4826
4827 checkInterferedAccess(MF, DefMI, KillMI, SpillFP, SpillBP);
4828
4829 // Call target function to spill and restore FP and BP registers.
4830 saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP);
4831 }
4832 }
4833}
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const uint64_t kSplitStackAvailable
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool isTailCallOpcode(unsigned Opc)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr MCPhysReg FPReg
static constexpr MCPhysReg SPReg
This file declares the machine register scavenger class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static bool is64Bit(const char *name)
static unsigned calculateSetFPREG(uint64_t SPAdjust)
static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary)
GetScratchRegister - Get a temp register for performing work in the segmented stack and the Erlang/Hi...
static unsigned getADDriOpcode(bool IsLP64)
static unsigned getPUSH2Opcode(const X86Subtarget &ST)
static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm)
static unsigned getLEArOpcode(bool IsLP64)
static unsigned getSUBriOpcode(bool IsLP64)
static bool flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB)
Check if the flags need to be preserved before the terminators.
static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, const TargetRegisterInfo *TRI, bool &AccessFP, bool &AccessBP)
static const TargetRegisterClass * getCalleeSavedSpillRC(MCRegister Reg, const X86Subtarget &STI, const TargetRegisterInfo &TRI)
static bool isOpcodeRep(unsigned Opcode)
Return true if an opcode is part of the REP group of instructions.
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm)
static bool isEAXLiveIn(MachineBasicBlock &MBB)
static int computeFPBPAlignmentGap(MachineFunction &MF, const TargetRegisterClass *RC, unsigned NumSpilledRegs)
static unsigned getADDrrOpcode(bool IsLP64)
static bool HasNestArgument(const MachineFunction *MF)
static unsigned getPOPOpcode(const X86Subtarget &ST)
static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels)
static unsigned getPOP2Opcode(const X86Subtarget &ST)
static unsigned getHiPELiteral(NamedMDNode *HiPELiteralsMD, const StringRef LiteralName)
Lookup an ERTS parameter in the !hipe.literals named metadata node.
static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, MachineBasicBlock::const_iterator MBBI)
static unsigned getSUBrrOpcode(bool IsLP64)
static unsigned getPUSHOpcode(const X86Subtarget &ST)
constexpr uint64_t MaxSPChunk
static const unsigned FramePtr
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
reverse_iterator rend() const
Definition ArrayRef.h:133
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
reverse_iterator rbegin() const
Definition ArrayRef.h:132
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:317
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:324
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
static BranchProbability getOne()
static BranchProbability getZero()
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
A debug info location.
Definition DebugLoc.h:124
unsigned size() const
Definition DenseMap.h:174
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
size_t arg_size() const
Definition Function.h:901
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition Function.h:689
const Argument * const_arg_iterator
Definition Function.h:74
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
Module * getParent()
Get the module that this global value is contained inside of...
bool usesWindowsCFI() const
Definition MCAsmInfo.h:674
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition MCDwarf.h:622
static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int64_t Size, SMLoc Loc={})
A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE.
Definition MCDwarf.h:736
static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_restore says that the rule for Register is now the same as it was at the beginning of the functi...
Definition MCDwarf.h:696
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:615
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657
static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc={})
.cfi_remember_state Save all current rules for all registers.
Definition MCDwarf.h:716
OpType getOperation() const
Definition MCDwarf.h:804
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:630
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:727
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition MCDwarf.h:638
static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc={})
.cfi_restore_state Restore the previously saved state.
Definition MCDwarf.h:721
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:413
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
MCSection * getCompactUnwindSection() const
MCRegAliasIterator enumerates all registers aliasing Reg.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
virtual int64_t getDwarfRegNum(MCRegister Reg, bool isEH) const
Map a target register to an equivalent dwarf register number.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Metadata node.
Definition Metadata.h:1069
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< livein_iterator > liveins() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Live
Register is known to be (at least partially) live.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
LLVM_ABI void ensureMaxAlignment(Align Alignment)
Make sure the function is at least Align bytes aligned.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasOpaqueSPAdjustment() const
Returns true if the function contains opaque dynamic stack adjustments.
void setCVBytesOfCalleeSavedRegisters(unsigned S)
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment, TargetStackID::Value StackID=TargetStackID::Default)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
bool hasCopyImplyingStackAdjustment() const
Returns true if the function contains operations which will lower down to instructions which manipula...
bool hasStackObjects() const
Return true if there are any stack objects in this function.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
void setOffsetAdjustment(int64_t Adj)
Set the correction for frame offsets.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const std::vector< MCCFIInstruction > & getFrameInstructions() const
Returns a reference to a list of cfi instructions in the function's prologue.
bool hasInlineAsm() const
Returns true if the function contains any inline assembly.
void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair, unsigned SubReg=0)
Create a substitution between one <instr,operand> value to a different, new value.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_front(MachineBasicBlock *MBB)
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, raw_ostream *OS=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const std::vector< LandingPadInfo > & getLandingPads() const
Return a reference to the landing pad info for the current function.
BasicBlockListType::iterator iterator
bool shouldSplitStack() const
Should we be emitting segmented stack stuff for the function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI unsigned getDebugInstrNum()
Fetch the instruction number of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isLiveIn(Register Reg) const
NamedMDNode * getNamedMetadata(StringRef Name) const
Return the first NamedMDNode in the module with the specified name.
Definition Module.cpp:301
WinX64EHUnwindMode getWinX64EHUnwindMode() const
Get how unwind information should be generated for x64 Windows.
Definition Module.cpp:960
unsigned getCodeViewFlag() const
Returns the CodeView Version by checking module flags.
Definition Module.cpp:607
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
A tuple of MDNodes.
Definition Metadata.h:1749
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int getOffsetOfLocalArea() const
getOffsetOfLocalArea - This method returns the offset of the local area from the stack pointer on ent...
TargetFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl=Align(1), bool StackReal=true)
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const Triple & getTargetTriple() const
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
SwiftAsyncFramePointerMode SwiftAsyncFramePointer
Control when and how the Swift async frame pointer bit should be set.
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual Register getFrameRegister(const MachineFunction &MF) const =0
Debug information queries.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
bool isUEFI() const
Tests whether the OS is UEFI.
Definition Triple.h:696
bool isOSWindows() const
Tests whether the OS is Windows.
Definition Triple.h:699
Value wrapper in the Metadata hierarchy.
Definition Metadata.h:459
Value * getValue() const
Definition Metadata.h:499
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void spillFPBP(MachineFunction &MF) const override
If a function uses base pointer and the base pointer is clobbered by inline asm, RA doesn't detect th...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
bool needsFrameIndexResolution(const MachineFunction &MF) const override
X86FrameLowering(const X86Subtarget &STI, MaybeAlign StackAlignOverride)
const X86RegisterInfo * TRI
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFPImpl(const MachineFunction &MF) const override
hasFPImpl - Return true if the specified function should have a dedicated frame pointer register.
MachineBasicBlock::iterator restoreWin32EHStackPointers(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool RestoreSP=false) const
Sets up EBP and optionally ESI based on the incoming EBP value.
int getInitialCFAOffset(const MachineFunction &MF) const override
Return initial CFA offset value i.e.
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a prologue for the target.
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, std::optional< MachineFunction::DebugInstrOperandPair > InstrNum=std::nullopt) const
Emit target stack probe code.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsPrologue) const
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int64_t AddOffset, bool doMergeWithPrevious) const
Equivalent to: mergeSPUpdates(MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; }...
StackOffset getFrameIndexReferenceSP(const MachineFunction &MF, int FI, Register &SPReg, int Adjustment) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe inline-stub with the actual probe code inline.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
const X86InstrInfo & TII
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &DL, int64_t NumBytes, bool InEpilogue) const
Emit a series of instructions to increment / decrement the stack pointer by a constant value.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
bool Is64Bit
Is64Bit implies that x86_64 instructions are available.
Register getInitialCFARegister(const MachineFunction &MF) const override
Return initial CFA register value i.e.
bool Uses64BitFramePtr
True if the 64-bit frame or stack pointer should be used.
unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Adjust the prologue to have the function use segmented stacks.
DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override
Return the frame base information to be encoded in the DWARF subprogram debug info.
void emitCalleeSavedFrameMovesFullCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const override
Emits Dwarf Info specifying offsets of callee saved registers and frame pointer.
int getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, Register &SPReg) const
bool canUseLEAForSPInEpilogue(const MachineFunction &MF) const
Check that LEA can be used on SP in an epilogue sequence for MF.
bool stackProbeFunctionModifiesSP() const override
Does the stack probe function call return with a modified stack pointer?
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
StackOffset getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, Register &FrameReg, bool IgnoreSPUpdates) const override
Same as getFrameIndexReference, except that the stack pointer (as opposed to the frame pointer) will ...
void restoreWinEHStackPointersInParent(MachineFunction &MF) const
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Erlang programs may need a special prologue to handle the stack size they might need at runtime.
const X86Subtarget & STI
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
bool isCandidateForPush2Pop2(Register Reg) const
void setRestoreBasePointer(const MachineFunction *MF)
DenseMap< int, unsigned > & getWinEHXMMSlotInfo()
MachineInstr * getStackPtrSaveMI() const
AMXProgModelEnum getAMXProgModel() const
void setStackPtrSaveMI(MachineInstr *MI)
void setCalleeSavedFrameSize(unsigned bytes)
const X86TargetLowering * getTargetLowering() const override
bool isTargetWindowsCoreCLR() const
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition ARMWinEH.h:200
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ X86_INTR
x86 hardware interrupt context.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ X86_FastCall
'fast' analog of X86_StdCall.
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
@ DwarfCFI
DWARF-like instruction based exceptions.
Definition CodeGen.h:55
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
static bool isFuncletReturnInstr(const MachineInstr &MI)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
@ DeploymentBased
Determine whether to set the bit statically or dynamically based on the deployment target.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition LEB128.h:24
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
LLVM_ABI void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition LEB128.h:79
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, Register Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
constexpr RegState getUndefRegState(bool B)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Pair of physical register and lane mask.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
union llvm::TargetFrameLowering::DwarfFrameBase::@004076321055032247336074224075335064105264310375 Location
enum llvm::TargetFrameLowering::DwarfFrameBase::FrameBaseKind Kind
SmallVector< WinEHTryBlockMapEntry, 4 > TryBlockMap
SmallVector< WinEHHandlerType, 1 > HandlerArray