52#define DEBUG_TYPE "x86-cf-opt"
56 cl::desc(
"Avoid optimizing x86 call frames for size"),
61class X86CallFrameOptimizationImpl {
68 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
74 MachineInstr *Call =
nullptr;
77 MachineInstr *SPCopy =
nullptr;
80 int64_t ExpectedDist = 0;
83 SmallVector<MachineInstr *, 4> ArgStoreVector;
86 bool NoStackParams =
false;
92 typedef SmallVector<CallContext, 8> ContextVector;
94 bool isLegal(MachineFunction &MF);
96 bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
98 void collectCallInfo(MachineFunction &MF, MachineBasicBlock &
MBB,
101 void adjustCallSequence(MachineFunction &MF,
const CallContext &
Context);
106 enum InstClassification { Convert, Skip, Exit };
108 InstClassification classifyInstruction(MachineBasicBlock &
MBB,
110 const X86RegisterInfo &RegInfo,
111 const DenseSet<MCRegister> &UsedRegs);
113 const X86InstrInfo *TII =
nullptr;
114 const X86FrameLowering *TFL =
nullptr;
115 const X86Subtarget *STI =
nullptr;
116 MachineRegisterInfo *MRI =
nullptr;
117 unsigned SlotSize = 0;
118 unsigned Log2SlotSize = 0;
123 X86CallFrameOptimizationLegacy() : MachineFunctionPass(ID) {}
125 bool runOnMachineFunction(MachineFunction &MF)
override;
130 StringRef getPassName()
const override {
return "X86 Optimize Call Frame"; }
134char X86CallFrameOptimizationLegacy::ID = 0;
136 "X86 Call Frame Optimization",
false,
false)
148 if (STI->isTargetDarwin() &&
149 (!MF.getLandingPads().empty() ||
150 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
155 if (STI->isTargetWin64())
170 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
171 unsigned FrameDestroyOpcode =
TII->getCallFrameDestroyOpcode();
172 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
173 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
175 bool InsideFrameSequence =
false;
177 if (
MI.getOpcode() == FrameSetupOpcode) {
178 if (
TII->getFrameSize(
MI) >= StackProbeSize && EmitStackProbeCall)
180 if (InsideFrameSequence)
182 InsideFrameSequence =
true;
183 }
else if (
MI.getOpcode() == FrameDestroyOpcode) {
184 if (!InsideFrameSequence)
186 InsideFrameSequence =
false;
190 if (InsideFrameSequence)
200 ContextVector &CallSeqVector) {
205 if (CannotReserveFrame)
210 int64_t Advantage = 0;
211 for (
const auto &CC : CallSeqVector) {
215 if (CC.NoStackParams)
231 if (!
isAligned(StackAlign, CC.ExpectedDist))
235 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
239 return Advantage >= 0;
242bool X86CallFrameOptimizationImpl::runOnMachineFunction(MachineFunction &MF) {
251 Log2SlotSize =
Log2_32(SlotSize);
256 unsigned FrameSetupOpcode =
TII->getCallFrameSetupOpcode();
260 ContextVector CallSeqVector;
264 if (
MI.getOpcode() == FrameSetupOpcode) {
267 CallSeqVector.push_back(
Context);
273 for (
const auto &CC : CallSeqVector) {
275 adjustCallSequence(MF, CC);
283X86CallFrameOptimizationImpl::InstClassification
284X86CallFrameOptimizationImpl::classifyInstruction(
286 const X86RegisterInfo &RegInfo,
const DenseSet<MCRegister> &UsedRegs) {
292 switch (
MI->getOpcode()) {
295 case X86::AND64mi32: {
297 return ImmOp.
getImm() == 0 ? Convert : Exit;
301 case X86::OR64mi32: {
303 return ImmOp.
getImm() == -1 ? Convert : Exit;
337 if (
MI->isCall() ||
MI->mayStore())
340 for (
const MachineOperand &MO :
MI->operands()) {
349 for (MCRegister U : UsedRegs)
350 if (RegInfo.regsOverlap(
Reg, U))
358void X86CallFrameOptimizationImpl::collectCallInfo(
366 assert(
I->getOpcode() ==
TII->getCallFrameSetupOpcode());
368 Context.FrameSetup = FrameSetup;
372 unsigned int MaxAdjust =
TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
383 while (
I->getOpcode() == X86::LEA32r ||
I->isDebugInstr())
387 auto StackPtrCopyInst =
MBB.
end();
396 for (
auto J =
I; !J->isCall(); ++J)
397 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
398 J->getOperand(1).getReg() == StackPtr) {
399 StackPtrCopyInst = J;
410 Context.ArgStoreVector.resize(MaxAdjust,
nullptr);
412 DenseSet<MCRegister> UsedRegs;
414 for (InstClassification Classification = Skip; Classification != Exit; ++
I) {
416 if (
I == StackPtrCopyInst)
418 Classification = classifyInstruction(
MBB,
I, RegInfo, UsedRegs);
419 if (Classification != Convert)
441 "Negative stack displacement when passing parameters");
444 if (StackDisp & (SlotSize - 1))
446 StackDisp >>= Log2SlotSize;
449 "Function call has more parameters than the stack is adjusted for.");
452 if (
Context.ArgStoreVector[StackDisp] !=
nullptr)
454 Context.ArgStoreVector[StackDisp] = &*
I;
456 for (
const MachineOperand &MO :
I->uses()) {
479 for (; MMI != MME; ++MMI,
Context.ExpectedDist += SlotSize)
484 if (MMI ==
Context.ArgStoreVector.begin())
489 for (; MMI != MME; ++MMI)
496void X86CallFrameOptimizationImpl::adjustCallSequence(
497 MachineFunction &MF,
const CallContext &
Context) {
502 MachineBasicBlock &
MBB = *(FrameSetup->getParent());
503 TII->setFrameAdjustment(*FrameSetup,
Context.ExpectedDist);
505 const DebugLoc &
DL = FrameSetup->getDebugLoc();
506 bool Is64Bit = STI->is64Bit();
510 for (
int Idx = (
Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
515 switch (
Store->getOpcode()) {
526 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;
528 Push->cloneMemRefs(MF, *Store);
536 if (Is64Bit &&
Store->getOpcode() == X86::MOV32mr) {
537 Register UndefReg =
MRI->createVirtualRegister(&X86::GR64RegClass);
538 Reg =
MRI->createVirtualRegister(&X86::GR64RegClass);
548 bool SlowPUSHrmm = STI->slowTwoMemOps();
552 MachineInstr *DefMov =
nullptr;
553 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup,
Reg))) {
554 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
560 Push->cloneMergedMemRefs(MF, {DefMov, &*
Store});
563 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
567 Push->cloneMemRefs(MF, *Store);
578 MBB, std::next(Push),
DL,
587 Context.SPCopy->eraseFromParent();
591 X86MachineFunctionInfo *FuncInfo = MF.
getInfo<X86MachineFunctionInfo>();
595MachineInstr *X86CallFrameOptimizationImpl::canFoldIntoRegPush(
611 if (!
MRI->hasOneNonDBGUse(
Reg))
618 if ((
DefMI.getOpcode() != X86::MOV32rm &&
619 DefMI.getOpcode() != X86::MOV64rm) ||
620 DefMI.getParent() != FrameSetup->getParent())
626 if (
I->isLoadFoldBarrier())
633 return new X86CallFrameOptimizationLegacy();
636bool X86CallFrameOptimizationLegacy::runOnMachineFunction(
MachineFunction &MF) {
639 X86CallFrameOptimizationImpl Impl;
640 return Impl.runOnMachineFunction(MF);
646 X86CallFrameOptimizationImpl Impl;
647 bool Changed = Impl.runOnMachineFunction(MF);
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void setHasPushSequences(bool HasPush)
Register getStackRegister() const
unsigned getSlotSize() const
const X86InstrInfo * getInstrInfo() const override
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FunctionPass * createX86CallFrameOptimizationLegacyPass()