LLVM  11.0.0git
X86CallFrameOptimization.cpp
Go to the documentation of this file.
1 //===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pass that optimizes call sequences on x86.
10 // Currently, it converts movs of function parameters onto the stack into
11 // pushes. This is beneficial for two main reasons:
12 // 1) The push instruction encoding is much smaller than a stack-ptr-based mov.
13 // 2) It is possible to push memory arguments directly. So, if the
14 // the transformation is performed pre-reg-alloc, it can help relieve
15 // register pressure.
16 //
17 //===----------------------------------------------------------------------===//
18 
20 #include "X86.h"
21 #include "X86FrameLowering.h"
22 #include "X86InstrInfo.h"
23 #include "X86MachineFunctionInfo.h"
24 #include "X86RegisterInfo.h"
25 #include "X86Subtarget.h"
26 #include "llvm/ADT/DenseSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
39 #include "llvm/IR/DebugLoc.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/MC/MCDwarf.h"
45 #include <cassert>
46 #include <cstddef>
47 #include <cstdint>
48 #include <iterator>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "x86-cf-opt"
53 
54 static cl::opt<bool>
55  NoX86CFOpt("no-x86-call-frame-opt",
56  cl::desc("Avoid optimizing x86 call frames for size"),
57  cl::init(false), cl::Hidden);
58 
59 namespace {
60 
61 class X86CallFrameOptimization : public MachineFunctionPass {
62 public:
63  X86CallFrameOptimization() : MachineFunctionPass(ID) { }
64 
65  bool runOnMachineFunction(MachineFunction &MF) override;
66 
67  static char ID;
68 
69 private:
70  // Information we know about a particular call site
71  struct CallContext {
72  CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
73 
74  // Iterator referring to the frame setup instruction
75  MachineBasicBlock::iterator FrameSetup;
76 
77  // Actual call instruction
78  MachineInstr *Call = nullptr;
79 
80  // A copy of the stack pointer
81  MachineInstr *SPCopy = nullptr;
82 
83  // The total displacement of all passed parameters
84  int64_t ExpectedDist = 0;
85 
86  // The sequence of storing instructions used to pass the parameters
87  SmallVector<MachineInstr *, 4> ArgStoreVector;
88 
89  // True if this call site has no stack parameters
90  bool NoStackParams = false;
91 
92  // True if this call site can use push instructions
93  bool UsePush = false;
94  };
95 
96  typedef SmallVector<CallContext, 8> ContextVector;
97 
98  bool isLegal(MachineFunction &MF);
99 
100  bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
101 
102  void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
103  MachineBasicBlock::iterator I, CallContext &Context);
104 
105  void adjustCallSequence(MachineFunction &MF, const CallContext &Context);
106 
107  MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
108  unsigned Reg);
109 
110  enum InstClassification { Convert, Skip, Exit };
111 
112  InstClassification classifyInstruction(MachineBasicBlock &MBB,
114  const X86RegisterInfo &RegInfo,
115  DenseSet<unsigned int> &UsedRegs);
116 
117  StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
118 
119  const X86InstrInfo *TII = nullptr;
120  const X86FrameLowering *TFL = nullptr;
121  const X86Subtarget *STI = nullptr;
122  MachineRegisterInfo *MRI = nullptr;
123  unsigned SlotSize = 0;
124  unsigned Log2SlotSize = 0;
125 };
126 
127 } // end anonymous namespace
129 INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE,
130  "X86 Call Frame Optimization", false, false)
131 
132 // This checks whether the transformation is legal.
133 // Also returns false in cases where it's potentially legal, but
134 // we don't even want to try.
135 bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
136  if (NoX86CFOpt.getValue())
137  return false;
138 
139  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
140  // in the compact unwind encoding that Darwin uses. So, bail if there
141  // is a danger of that being generated.
142  if (STI->isTargetDarwin() &&
143  (!MF.getLandingPads().empty() ||
144  (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
145  return false;
146 
147  // It is not valid to change the stack pointer outside the prolog/epilog
148  // on 64-bit Windows.
149  if (STI->isTargetWin64())
150  return false;
151 
152  // You would expect straight-line code between call-frame setup and
153  // call-frame destroy. You would be wrong. There are circumstances (e.g.
154  // CMOV_GR8 expansion of a select that feeds a function call!) where we can
155  // end up with the setup and the destroy in different basic blocks.
156  // This is bad, and breaks SP adjustment.
157  // So, check that all of the frames in the function are closed inside
158  // the same block, and, for good measure, that there are no nested frames.
159  //
160  // If any call allocates more argument stack memory than the stack
161  // probe size, don't do this optimization. Otherwise, this pass
162  // would need to synthesize additional stack probe calls to allocate
163  // memory for arguments.
164  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
165  unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
166  bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167  unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
168  for (MachineBasicBlock &BB : MF) {
169  bool InsideFrameSequence = false;
170  for (MachineInstr &MI : BB) {
171  if (MI.getOpcode() == FrameSetupOpcode) {
172  if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
173  return false;
174  if (InsideFrameSequence)
175  return false;
176  InsideFrameSequence = true;
177  } else if (MI.getOpcode() == FrameDestroyOpcode) {
178  if (!InsideFrameSequence)
179  return false;
180  InsideFrameSequence = false;
181  }
182  }
183 
184  if (InsideFrameSequence)
185  return false;
186  }
187 
188  return true;
189 }
190 
191 // Check whether this transformation is profitable for a particular
192 // function - in terms of code size.
193 bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
194  ContextVector &CallSeqVector) {
195  // This transformation is always a win when we do not expect to have
196  // a reserved call frame. Under other circumstances, it may be either
197  // a win or a loss, and requires a heuristic.
198  bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();
199  if (CannotReserveFrame)
200  return true;
201 
202  Align StackAlign = TFL->getStackAlign();
203 
204  int64_t Advantage = 0;
205  for (auto CC : CallSeqVector) {
206  // Call sites where no parameters are passed on the stack
207  // do not affect the cost, since there needs to be no
208  // stack adjustment.
209  if (CC.NoStackParams)
210  continue;
211 
212  if (!CC.UsePush) {
213  // If we don't use pushes for a particular call site,
214  // we pay for not having a reserved call frame with an
215  // additional sub/add esp pair. The cost is ~3 bytes per instruction,
216  // depending on the size of the constant.
217  // TODO: Callee-pop functions should have a smaller penalty, because
218  // an add is needed even with a reserved call frame.
219  Advantage -= 6;
220  } else {
221  // We can use pushes. First, account for the fixed costs.
222  // We'll need a add after the call.
223  Advantage -= 3;
224  // If we have to realign the stack, we'll also need a sub before
225  if (!isAligned(StackAlign, CC.ExpectedDist))
226  Advantage -= 3;
227  // Now, for each push, we save ~3 bytes. For small constants, we actually,
228  // save more (up to 5 bytes), but 3 should be a good approximation.
229  Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
230  }
231  }
232 
233  return Advantage >= 0;
234 }
235 
236 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
237  STI = &MF.getSubtarget<X86Subtarget>();
238  TII = STI->getInstrInfo();
239  TFL = STI->getFrameLowering();
240  MRI = &MF.getRegInfo();
241 
242  const X86RegisterInfo &RegInfo =
243  *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
244  SlotSize = RegInfo.getSlotSize();
245  assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
246  Log2SlotSize = Log2_32(SlotSize);
247 
248  if (skipFunction(MF.getFunction()) || !isLegal(MF))
249  return false;
250 
251  unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
252 
253  bool Changed = false;
254 
255  ContextVector CallSeqVector;
256 
257  for (auto &MBB : MF)
258  for (auto &MI : MBB)
259  if (MI.getOpcode() == FrameSetupOpcode) {
260  CallContext Context;
261  collectCallInfo(MF, MBB, MI, Context);
262  CallSeqVector.push_back(Context);
263  }
264 
265  if (!isProfitable(MF, CallSeqVector))
266  return false;
267 
268  for (auto CC : CallSeqVector) {
269  if (CC.UsePush) {
270  adjustCallSequence(MF, CC);
271  Changed = true;
272  }
273  }
274 
275  return Changed;
276 }
277 
278 X86CallFrameOptimization::InstClassification
279 X86CallFrameOptimization::classifyInstruction(
281  const X86RegisterInfo &RegInfo, DenseSet<unsigned int> &UsedRegs) {
282  if (MI == MBB.end())
283  return Exit;
284 
285  // The instructions we actually care about are movs onto the stack or special
286  // cases of constant-stores to stack
287  switch (MI->getOpcode()) {
288  case X86::AND16mi8:
289  case X86::AND32mi8:
290  case X86::AND64mi8: {
291  MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
292  return ImmOp.getImm() == 0 ? Convert : Exit;
293  }
294  case X86::OR16mi8:
295  case X86::OR32mi8:
296  case X86::OR64mi8: {
297  MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands);
298  return ImmOp.getImm() == -1 ? Convert : Exit;
299  }
300  case X86::MOV32mi:
301  case X86::MOV32mr:
302  case X86::MOV64mi32:
303  case X86::MOV64mr:
304  return Convert;
305  }
306 
307  // Not all calling conventions have only stack MOVs between the stack
308  // adjust and the call.
309 
310  // We want to tolerate other instructions, to cover more cases.
311  // In particular:
312  // a) PCrel calls, where we expect an additional COPY of the basereg.
313  // b) Passing frame-index addresses.
314  // c) Calling conventions that have inreg parameters. These generate
315  // both copies and movs into registers.
316  // To avoid creating lots of special cases, allow any instruction
317  // that does not write into memory, does not def or use the stack
318  // pointer, and does not def any register that was used by a preceding
319  // push.
320  // (Reading from memory is allowed, even if referenced through a
321  // frame index, since these will get adjusted properly in PEI)
322 
323  // The reason for the last condition is that the pushes can't replace
324  // the movs in place, because the order must be reversed.
325  // So if we have a MOV32mr that uses EDX, then an instruction that defs
326  // EDX, and then the call, after the transformation the push will use
327  // the modified version of EDX, and not the original one.
328  // Since we are still in SSA form at this point, we only need to
329  // make sure we don't clobber any *physical* registers that were
330  // used by an earlier mov that will become a push.
331 
332  if (MI->isCall() || MI->mayStore())
333  return Exit;
334 
335  for (const MachineOperand &MO : MI->operands()) {
336  if (!MO.isReg())
337  continue;
338  Register Reg = MO.getReg();
340  continue;
341  if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
342  return Exit;
343  if (MO.isDef()) {
344  for (unsigned int U : UsedRegs)
345  if (RegInfo.regsOverlap(Reg, U))
346  return Exit;
347  }
348  }
349 
350  return Skip;
351 }
352 
353 void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
354  MachineBasicBlock &MBB,
356  CallContext &Context) {
357  // Check that this particular call sequence is amenable to the
358  // transformation.
359  const X86RegisterInfo &RegInfo =
360  *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
361 
362  // We expect to enter this at the beginning of a call sequence
363  assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
364  MachineBasicBlock::iterator FrameSetup = I++;
365  Context.FrameSetup = FrameSetup;
366 
367  // How much do we adjust the stack? This puts an upper bound on
368  // the number of parameters actually passed on it.
369  unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
370 
371  // A zero adjustment means no stack parameters
372  if (!MaxAdjust) {
373  Context.NoStackParams = true;
374  return;
375  }
376 
377  // Skip over DEBUG_VALUE.
378  // For globals in PIC mode, we can have some LEAs here. Skip them as well.
379  // TODO: Extend this to something that covers more cases.
380  while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
381  ++I;
382 
383  Register StackPtr = RegInfo.getStackRegister();
384  auto StackPtrCopyInst = MBB.end();
385  // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
386  // register. If it's there, use that virtual register as stack pointer
387  // instead. Also, we need to locate this instruction so that we can later
388  // safely ignore it while doing the conservative processing of the call chain.
389  // The COPY can be located anywhere between the call-frame setup
390  // instruction and its first use. We use the call instruction as a boundary
391  // because it is usually cheaper to check if an instruction is a call than
392  // checking if an instruction uses a register.
393  for (auto J = I; !J->isCall(); ++J)
394  if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395  J->getOperand(1).getReg() == StackPtr) {
396  StackPtrCopyInst = J;
397  Context.SPCopy = &*J++;
398  StackPtr = Context.SPCopy->getOperand(0).getReg();
399  break;
400  }
401 
402  // Scan the call setup sequence for the pattern we're looking for.
403  // We only handle a simple case - a sequence of store instructions that
404  // push a sequence of stack-slot-aligned values onto the stack, with
405  // no gaps between them.
406  if (MaxAdjust > 4)
407  Context.ArgStoreVector.resize(MaxAdjust, nullptr);
408 
409  DenseSet<unsigned int> UsedRegs;
410 
411  for (InstClassification Classification = Skip; Classification != Exit; ++I) {
412  // If this is the COPY of the stack pointer, it's ok to ignore.
413  if (I == StackPtrCopyInst)
414  continue;
415  Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
416  if (Classification != Convert)
417  continue;
418  // We know the instruction has a supported store opcode.
419  // We only want movs of the form:
420  // mov imm/reg, k(%StackPtr)
421  // If we run into something else, bail.
422  // Note that AddrBaseReg may, counter to its name, not be a register,
423  // but rather a frame index.
424  // TODO: Support the fi case. This should probably work now that we
425  // have the infrastructure to track the stack pointer within a call
426  // sequence.
427  if (!I->getOperand(X86::AddrBaseReg).isReg() ||
428  (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
429  !I->getOperand(X86::AddrScaleAmt).isImm() ||
430  (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
431  (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
432  (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
433  !I->getOperand(X86::AddrDisp).isImm())
434  return;
435 
436  int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
437  assert(StackDisp >= 0 &&
438  "Negative stack displacement when passing parameters");
439 
440  // We really don't want to consider the unaligned case.
441  if (StackDisp & (SlotSize - 1))
442  return;
443  StackDisp >>= Log2SlotSize;
444 
445  assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
446  "Function call has more parameters than the stack is adjusted for.");
447 
448  // If the same stack slot is being filled twice, something's fishy.
449  if (Context.ArgStoreVector[StackDisp] != nullptr)
450  return;
451  Context.ArgStoreVector[StackDisp] = &*I;
452 
453  for (const MachineOperand &MO : I->uses()) {
454  if (!MO.isReg())
455  continue;
456  Register Reg = MO.getReg();
458  UsedRegs.insert(Reg);
459  }
460  }
461 
462  --I;
463 
464  // We now expect the end of the sequence. If we stopped early,
465  // or reached the end of the block without finding a call, bail.
466  if (I == MBB.end() || !I->isCall())
467  return;
468 
469  Context.Call = &*I;
470  if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
471  return;
472 
473  // Now, go through the vector, and see that we don't have any gaps,
474  // but only a series of storing instructions.
475  auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
476  for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
477  if (*MMI == nullptr)
478  break;
479 
480  // If the call had no parameters, do nothing
481  if (MMI == Context.ArgStoreVector.begin())
482  return;
483 
484  // We are either at the last parameter, or a gap.
485  // Make sure it's not a gap
486  for (; MMI != MME; ++MMI)
487  if (*MMI != nullptr)
488  return;
489 
490  Context.UsePush = true;
491 }
492 
493 void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
494  const CallContext &Context) {
495  // Ok, we can in fact do the transformation for this call.
496  // Do not remove the FrameSetup instruction, but adjust the parameters.
497  // PEI will end up finalizing the handling of this.
498  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
499  MachineBasicBlock &MBB = *(FrameSetup->getParent());
500  TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
501 
502  DebugLoc DL = FrameSetup->getDebugLoc();
503  bool Is64Bit = STI->is64Bit();
504  // Now, iterate through the vector in reverse order, and replace the store to
505  // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
506  // replace uses.
507  for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
508  MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];
509  MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands);
510  MachineBasicBlock::iterator Push = nullptr;
511  unsigned PushOpcode;
512  switch (Store->getOpcode()) {
513  default:
514  llvm_unreachable("Unexpected Opcode!");
515  case X86::AND16mi8:
516  case X86::AND32mi8:
517  case X86::AND64mi8:
518  case X86::OR16mi8:
519  case X86::OR32mi8:
520  case X86::OR64mi8:
521  case X86::MOV32mi:
522  case X86::MOV64mi32:
523  PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
524  // If the operand is a small (8-bit) immediate, we can use a
525  // PUSH instruction with a shorter encoding.
526  // Note that isImm() may fail even though this is a MOVmi, because
527  // the operand can also be a symbol.
528  if (PushOp.isImm()) {
529  int64_t Val = PushOp.getImm();
530  if (isInt<8>(Val))
531  PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
532  }
533  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
534  Push->cloneMemRefs(MF, *Store);
535  break;
536  case X86::MOV32mr:
537  case X86::MOV64mr: {
538  Register Reg = PushOp.getReg();
539 
540  // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
541  // in preparation for the PUSH64. The upper 32 bits can be undef.
542  if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
543  Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
544  Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
545  BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
546  BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
547  .addReg(UndefReg)
548  .add(PushOp)
549  .addImm(X86::sub_32bit);
550  }
551 
552  // If PUSHrmm is not slow on this target, try to fold the source of the
553  // push into the instruction.
554  bool SlowPUSHrmm = STI->slowTwoMemOps();
555 
556  // Check that this is legal to fold. Right now, we're extremely
557  // conservative about that.
558  MachineInstr *DefMov = nullptr;
559  if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
560  PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
561  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));
562 
563  unsigned NumOps = DefMov->getDesc().getNumOperands();
564  for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
565  Push->addOperand(DefMov->getOperand(i));
566  Push->cloneMergedMemRefs(MF, {&*DefMov, &*Store});
567 
568  DefMov->eraseFromParent();
569  } else {
570  PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
571  Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
572  .addReg(Reg)
573  .getInstr();
574  Push->cloneMemRefs(MF, *Store);
575  }
576  break;
577  }
578  }
579 
580  // For debugging, when using SP-based CFA, we need to adjust the CFA
581  // offset after each push.
582  // TODO: This is needed only if we require precise CFA.
583  if (!TFL->hasFP(MF))
584  TFL->BuildCFI(
585  MBB, std::next(Push), DL,
586  MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
587 
588  MBB.erase(Store);
589  }
590 
591  // The stack-pointer copy is no longer used in the call sequences.
592  // There should not be any other users, but we can't commit to that, so:
593  if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
594  Context.SPCopy->eraseFromParent();
595 
596  // Once we've done this, we need to make sure PEI doesn't assume a reserved
597  // frame.
599  FuncInfo->setHasPushSequences(true);
600 }
601 
602 MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
603  MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
604  // Do an extremely restricted form of load folding.
605  // ISel will often create patterns like:
606  // movl 4(%edi), %eax
607  // movl 8(%edi), %ecx
608  // movl 12(%edi), %edx
609  // movl %edx, 8(%esp)
610  // movl %ecx, 4(%esp)
611  // movl %eax, (%esp)
612  // call
613  // Get rid of those with prejudice.
614  if (!Register::isVirtualRegister(Reg))
615  return nullptr;
616 
617  // Make sure this is the only use of Reg.
618  if (!MRI->hasOneNonDBGUse(Reg))
619  return nullptr;
620 
621  MachineInstr &DefMI = *MRI->getVRegDef(Reg);
622 
623  // Make sure the def is a MOV from memory.
624  // If the def is in another block, give up.
625  if ((DefMI.getOpcode() != X86::MOV32rm &&
626  DefMI.getOpcode() != X86::MOV64rm) ||
627  DefMI.getParent() != FrameSetup->getParent())
628  return nullptr;
629 
630  // Make sure we don't have any instructions between DefMI and the
631  // push that make folding the load illegal.
632  for (MachineBasicBlock::iterator I = DefMI; I != FrameSetup; ++I)
633  if (I->isLoadFoldBarrier())
634  return nullptr;
635 
636  return &DefMI;
637 }
638 
640  return new X86CallFrameOptimization();
641 }
const MachineInstrBuilder & add(const MachineOperand &MO) const
LLVMContext & Context
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Implements a dense probed hash-table based set.
Definition: DenseSet.h:255
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
unsigned Reg
INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE, "X86 Call Frame Optimization", false, false) bool X86CallFrameOptimization
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:368
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
A debug info location.
Definition: DebugLoc.h:33
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
unsigned getSlotSize() const
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:506
Function & getFunction()
Return the LLVM function that this machine code represents.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
MachineBasicBlock & MBB
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:220
const HexagonInstrInfo * TII
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:456
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:453
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
unsigned const MachineRegisterInfo * MRI
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
#define DEBUG_TYPE
MachineOperand class - Representation of each machine instruction operand.
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:148
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
int64_t getImm() const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:280
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:62
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
#define I(x, y, z)
Definition: MD5.cpp:59
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
Register getReg() const
getReg - Returns the register number.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:466
Register getStackRegister() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL