LLVM 17.0.0git
X86CallFrameOptimization.cpp
Go to the documentation of this file.
1//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pass that optimizes call sequences on x86.
10// Currently, it converts movs of function parameters onto the stack into
11// pushes. This is beneficial for two main reasons:
12// 1) The push instruction encoding is much smaller than a stack-ptr-based mov.
13// 2) It is possible to push memory arguments directly. So, if the
14// the transformation is performed pre-reg-alloc, it can help relieve
15// register pressure.
16//
17//===----------------------------------------------------------------------===//
18
20#include "X86.h"
21#include "X86FrameLowering.h"
22#include "X86InstrInfo.h"
24#include "X86RegisterInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/StringRef.h"
39#include "llvm/IR/DebugLoc.h"
40#include "llvm/IR/Function.h"
41#include "llvm/MC/MCDwarf.h"
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "x86-cf-opt"
53
54static cl::opt<bool>
55 NoX86CFOpt("no-x86-call-frame-opt",
56 cl::desc("Avoid optimizing x86 call frames for size"),
57 cl::init(false), cl::Hidden);
58
59namespace {
60
61class X86CallFrameOptimization : public MachineFunctionPass {
62public:
63 X86CallFrameOptimization() : MachineFunctionPass(ID) { }
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67 static char ID;
68
69private:
70 // Information we know about a particular call site
71 struct CallContext {
72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
73
74 // Iterator referring to the frame setup instruction
76
77 // Actual call instruction
78 MachineInstr *Call = nullptr;
79
80 // A copy of the stack pointer
81 MachineInstr *SPCopy = nullptr;
82
83 // The total displacement of all passed parameters
84 int64_t ExpectedDist = 0;
85
86 // The sequence of storing instructions used to pass the parameters
87 SmallVector<MachineInstr *, 4> ArgStoreVector;
88
89 // True if this call site has no stack parameters
90 bool NoStackParams = false;
91
92 // True if this call site can use push instructions
93 bool UsePush = false;
94 };
95
96 typedef SmallVector<CallContext, 8> ContextVector;
97
98 bool isLegal(MachineFunction &MF);
99
100 bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
101
102 void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
103 MachineBasicBlock::iterator I, CallContext &Context);
104
105 void adjustCallSequence(MachineFunction &MF, const CallContext &Context);
106
107 MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
108 Register Reg);
109
110 enum InstClassification { Convert, Skip, Exit };
111
112 InstClassification classifyInstruction(MachineBasicBlock &MBB,
115 DenseSet<unsigned int> &UsedRegs);
116
117 StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
118
119 const X86InstrInfo *TII = nullptr;
120 const X86FrameLowering *TFL = nullptr;
121 const X86Subtarget *STI = nullptr;
122 MachineRegisterInfo *MRI = nullptr;
123 unsigned SlotSize = 0;
124 unsigned Log2SlotSize = 0;
125};
126
127} // end anonymous namespace
128char X86CallFrameOptimization::ID = 0;
129INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE,
130 "X86 Call Frame Optimization", false, false)
131
132// This checks whether the transformation is legal.
133// Also returns false in cases where it's potentially legal, but
134// we don't even want to try.
135bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
136 if (NoX86CFOpt.getValue())
137 return false;
138
139 // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
140 // in the compact unwind encoding that Darwin uses. So, bail if there
141 // is a danger of that being generated.
142 if (STI->isTargetDarwin() &&
143 (!MF.getLandingPads().empty() ||
144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
145 return false;
146
147 // It is not valid to change the stack pointer outside the prolog/epilog
148 // on 64-bit Windows.
149 if (STI->isTargetWin64())
150 return false;
151
152 // You would expect straight-line code between call-frame setup and
153 // call-frame destroy. You would be wrong. There are circumstances (e.g.
154 // CMOV_GR8 expansion of a select that feeds a function call!) where we can
155 // end up with the setup and the destroy in different basic blocks.
156 // This is bad, and breaks SP adjustment.
157 // So, check that all of the frames in the function are closed inside
158 // the same block, and, for good measure, that there are no nested frames.
159 //
160 // If any call allocates more argument stack memory than the stack
161 // probe size, don't do this optimization. Otherwise, this pass
162 // would need to synthesize additional stack probe calls to allocate
163 // memory for arguments.
164 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
165 unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
166 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
168 for (MachineBasicBlock &BB : MF) {
169 bool InsideFrameSequence = false;
170 for (MachineInstr &MI : BB) {
171 if (MI.getOpcode() == FrameSetupOpcode) {
172 if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
173 return false;
174 if (InsideFrameSequence)
175 return false;
176 InsideFrameSequence = true;
177 } else if (MI.getOpcode() == FrameDestroyOpcode) {
178 if (!InsideFrameSequence)
179 return false;
180 InsideFrameSequence = false;
181 }
182 }
183
184 if (InsideFrameSequence)
185 return false;
186 }
187
188 return true;
189}
190
191// Check whether this transformation is profitable for a particular
192// function - in terms of code size.
193bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
194 ContextVector &CallSeqVector) {
195 // This transformation is always a win when we do not expect to have
196 // a reserved call frame. Under other circumstances, it may be either
197 // a win or a loss, and requires a heuristic.
198 bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();
199 if (CannotReserveFrame)
200 return true;
201
202 Align StackAlign = TFL->getStackAlign();
203
204 int64_t Advantage = 0;
205 for (const auto &CC : CallSeqVector) {
206 // Call sites where no parameters are passed on the stack
207 // do not affect the cost, since there needs to be no
208 // stack adjustment.
209 if (CC.NoStackParams)
210 continue;
211
212 if (!CC.UsePush) {
213 // If we don't use pushes for a particular call site,
214 // we pay for not having a reserved call frame with an
215 // additional sub/add esp pair. The cost is ~3 bytes per instruction,
216 // depending on the size of the constant.
217 // TODO: Callee-pop functions should have a smaller penalty, because
218 // an add is needed even with a reserved call frame.
219 Advantage -= 6;
220 } else {
221 // We can use pushes. First, account for the fixed costs.
222 // We'll need a add after the call.
223 Advantage -= 3;
224 // If we have to realign the stack, we'll also need a sub before
225 if (!isAligned(StackAlign, CC.ExpectedDist))
226 Advantage -= 3;
227 // Now, for each push, we save ~3 bytes. For small constants, we actually,
228 // save more (up to 5 bytes), but 3 should be a good approximation.
229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
230 }
231 }
232
233 return Advantage >= 0;
234}
235
236bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
237 STI = &MF.getSubtarget<X86Subtarget>();
238 TII = STI->getInstrInfo();
239 TFL = STI->getFrameLowering();
240 MRI = &MF.getRegInfo();
241
242 const X86RegisterInfo &RegInfo =
243 *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
244 SlotSize = RegInfo.getSlotSize();
245 assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
246 Log2SlotSize = Log2_32(SlotSize);
247
248 if (skipFunction(MF.getFunction()) || !isLegal(MF))
249 return false;
250
251 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
252
253 bool Changed = false;
254
255 ContextVector CallSeqVector;
256
257 for (auto &MBB : MF)
258 for (auto &MI : MBB)
259 if (MI.getOpcode() == FrameSetupOpcode) {
260 CallContext Context;
261 collectCallInfo(MF, MBB, MI, Context);
262 CallSeqVector.push_back(Context);
263 }
264
265 if (!isProfitable(MF, CallSeqVector))
266 return false;
267
268 for (const auto &CC : CallSeqVector) {
269 if (CC.UsePush) {
270 adjustCallSequence(MF, CC);
271 Changed = true;
272 }
273 }
274
275 return Changed;
276}
277
278X86CallFrameOptimization::InstClassification
279X86CallFrameOptimization::classifyInstruction(
282 if (MI == MBB.end())
283 return Exit;
284
285 // The instructions we actually care about are movs onto the stack or special
286 // cases of constant-stores to stack
287 switch (MI->getOpcode()) {
288 case X86::AND16mi8:
289 case X86::AND32mi8:
290 case X86::AND64mi8: {
291 const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
292 return ImmOp.getImm() == 0 ? Convert : Exit;
293 }
294 case X86::OR16mi8:
295 case X86::OR32mi8:
296 case X86::OR64mi8: {
297 const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
298 return ImmOp.getImm() == -1 ? Convert : Exit;
299 }
300 case X86::MOV32mi:
301 case X86::MOV32mr:
302 case X86::MOV64mi32:
303 case X86::MOV64mr:
304 return Convert;
305 }
306
307 // Not all calling conventions have only stack MOVs between the stack
308 // adjust and the call.
309
310 // We want to tolerate other instructions, to cover more cases.
311 // In particular:
312 // a) PCrel calls, where we expect an additional COPY of the basereg.
313 // b) Passing frame-index addresses.
314 // c) Calling conventions that have inreg parameters. These generate
315 // both copies and movs into registers.
316 // To avoid creating lots of special cases, allow any instruction
317 // that does not write into memory, does not def or use the stack
318 // pointer, and does not def any register that was used by a preceding
319 // push.
320 // (Reading from memory is allowed, even if referenced through a
321 // frame index, since these will get adjusted properly in PEI)
322
323 // The reason for the last condition is that the pushes can't replace
324 // the movs in place, because the order must be reversed.
325 // So if we have a MOV32mr that uses EDX, then an instruction that defs
326 // EDX, and then the call, after the transformation the push will use
327 // the modified version of EDX, and not the original one.
328 // Since we are still in SSA form at this point, we only need to
329 // make sure we don't clobber any *physical* registers that were
330 // used by an earlier mov that will become a push.
331
332 if (MI->isCall() || MI->mayStore())
333 return Exit;
334
335 for (const MachineOperand &MO : MI->operands()) {
336 if (!MO.isReg())
337 continue;
338 Register Reg = MO.getReg();
339 if (!Reg.isPhysical())
340 continue;
341 if (RegInfo.regsOverlap(Reg, RegInfo.getStackRegister()))
342 return Exit;
343 if (MO.isDef()) {
344 for (unsigned int U : UsedRegs)
345 if (RegInfo.regsOverlap(Reg, U))
346 return Exit;
347 }
348 }
349
350 return Skip;
351}
352
353void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
356 CallContext &Context) {
357 // Check that this particular call sequence is amenable to the
358 // transformation.
359 const X86RegisterInfo &RegInfo =
360 *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
361
362 // We expect to enter this at the beginning of a call sequence
363 assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
364 MachineBasicBlock::iterator FrameSetup = I++;
365 Context.FrameSetup = FrameSetup;
366
367 // How much do we adjust the stack? This puts an upper bound on
368 // the number of parameters actually passed on it.
369 unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
370
371 // A zero adjustment means no stack parameters
372 if (!MaxAdjust) {
373 Context.NoStackParams = true;
374 return;
375 }
376
377 // Skip over DEBUG_VALUE.
378 // For globals in PIC mode, we can have some LEAs here. Skip them as well.
379 // TODO: Extend this to something that covers more cases.
380 while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
381 ++I;
382
383 Register StackPtr = RegInfo.getStackRegister();
384 auto StackPtrCopyInst = MBB.end();
385 // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
386 // register. If it's there, use that virtual register as stack pointer
387 // instead. Also, we need to locate this instruction so that we can later
388 // safely ignore it while doing the conservative processing of the call chain.
389 // The COPY can be located anywhere between the call-frame setup
390 // instruction and its first use. We use the call instruction as a boundary
391 // because it is usually cheaper to check if an instruction is a call than
392 // checking if an instruction uses a register.
393 for (auto J = I; !J->isCall(); ++J)
394 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
395 J->getOperand(1).getReg() == StackPtr) {
396 StackPtrCopyInst = J;
397 Context.SPCopy = &*J++;
398 StackPtr = Context.SPCopy->getOperand(0).getReg();
399 break;
400 }
401
402 // Scan the call setup sequence for the pattern we're looking for.
403 // We only handle a simple case - a sequence of store instructions that
404 // push a sequence of stack-slot-aligned values onto the stack, with
405 // no gaps between them.
406 if (MaxAdjust > 4)
407 Context.ArgStoreVector.resize(MaxAdjust, nullptr);
408
409 DenseSet<unsigned int> UsedRegs;
410
411 for (InstClassification Classification = Skip; Classification != Exit; ++I) {
412 // If this is the COPY of the stack pointer, it's ok to ignore.
413 if (I == StackPtrCopyInst)
414 continue;
415 Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
416 if (Classification != Convert)
417 continue;
418 // We know the instruction has a supported store opcode.
419 // We only want movs of the form:
420 // mov imm/reg, k(%StackPtr)
421 // If we run into something else, bail.
422 // Note that AddrBaseReg may, counter to its name, not be a register,
423 // but rather a frame index.
424 // TODO: Support the fi case. This should probably work now that we
425 // have the infrastructure to track the stack pointer within a call
426 // sequence.
427 if (!I->getOperand(X86::AddrBaseReg).isReg() ||
428 (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
429 !I->getOperand(X86::AddrScaleAmt).isImm() ||
430 (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
431 (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
432 (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
433 !I->getOperand(X86::AddrDisp).isImm())
434 return;
435
436 int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
437 assert(StackDisp >= 0 &&
438 "Negative stack displacement when passing parameters");
439
440 // We really don't want to consider the unaligned case.
441 if (StackDisp & (SlotSize - 1))
442 return;
443 StackDisp >>= Log2SlotSize;
444
445 assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
446 "Function call has more parameters than the stack is adjusted for.");
447
448 // If the same stack slot is being filled twice, something's fishy.
449 if (Context.ArgStoreVector[StackDisp] != nullptr)
450 return;
451 Context.ArgStoreVector[StackDisp] = &*I;
452
453 for (const MachineOperand &MO : I->uses()) {
454 if (!MO.isReg())
455 continue;
456 Register Reg = MO.getReg();
457 if (Reg.isPhysical())
458 UsedRegs.insert(Reg);
459 }
460 }
461
462 --I;
463
464 // We now expect the end of the sequence. If we stopped early,
465 // or reached the end of the block without finding a call, bail.
466 if (I == MBB.end() || !I->isCall())
467 return;
468
469 Context.Call = &*I;
470 if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
471 return;
472
473 // Now, go through the vector, and see that we don't have any gaps,
474 // but only a series of storing instructions.
475 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
476 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
477 if (*MMI == nullptr)
478 break;
479
480 // If the call had no parameters, do nothing
481 if (MMI == Context.ArgStoreVector.begin())
482 return;
483
484 // We are either at the last parameter, or a gap.
485 // Make sure it's not a gap
486 for (; MMI != MME; ++MMI)
487 if (*MMI != nullptr)
488 return;
489
490 Context.UsePush = true;
491}
492
493void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
494 const CallContext &Context) {
495 // Ok, we can in fact do the transformation for this call.
496 // Do not remove the FrameSetup instruction, but adjust the parameters.
497 // PEI will end up finalizing the handling of this.
498 MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
499 MachineBasicBlock &MBB = *(FrameSetup->getParent());
500 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
501
502 const DebugLoc &DL = FrameSetup->getDebugLoc();
503 bool Is64Bit = STI->is64Bit();
504 // Now, iterate through the vector in reverse order, and replace the store to
505 // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
506 // replace uses.
507 for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
508 MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx];
509 const MachineOperand &PushOp = Store->getOperand(X86::AddrNumOperands);
510 MachineBasicBlock::iterator Push = nullptr;
511 unsigned PushOpcode;
512 switch (Store->getOpcode()) {
513 default:
514 llvm_unreachable("Unexpected Opcode!");
515 case X86::AND16mi8:
516 case X86::AND32mi8:
517 case X86::AND64mi8:
518 case X86::OR16mi8:
519 case X86::OR32mi8:
520 case X86::OR64mi8:
521 case X86::MOV32mi:
522 case X86::MOV64mi32:
523 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
524 // If the operand is a small (8-bit) immediate, we can use a
525 // PUSH instruction with a shorter encoding.
526 // Note that isImm() may fail even though this is a MOVmi, because
527 // the operand can also be a symbol.
528 if (PushOp.isImm()) {
529 int64_t Val = PushOp.getImm();
530 if (isInt<8>(Val))
531 PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
532 }
533 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
534 Push->cloneMemRefs(MF, *Store);
535 break;
536 case X86::MOV32mr:
537 case X86::MOV64mr: {
538 Register Reg = PushOp.getReg();
539
540 // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
541 // in preparation for the PUSH64. The upper 32 bits can be undef.
542 if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
543 Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
544 Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
545 BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
546 BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
547 .addReg(UndefReg)
548 .add(PushOp)
549 .addImm(X86::sub_32bit);
550 }
551
552 // If PUSHrmm is not slow on this target, try to fold the source of the
553 // push into the instruction.
554 bool SlowPUSHrmm = STI->slowTwoMemOps();
555
556 // Check that this is legal to fold. Right now, we're extremely
557 // conservative about that.
558 MachineInstr *DefMov = nullptr;
559 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
560 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
561 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));
562
563 unsigned NumOps = DefMov->getDesc().getNumOperands();
564 for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
565 Push->addOperand(DefMov->getOperand(i));
566 Push->cloneMergedMemRefs(MF, {DefMov, &*Store});
567 DefMov->eraseFromParent();
568 } else {
569 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
570 Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
571 .addReg(Reg)
572 .getInstr();
573 Push->cloneMemRefs(MF, *Store);
574 }
575 break;
576 }
577 }
578
579 // For debugging, when using SP-based CFA, we need to adjust the CFA
580 // offset after each push.
581 // TODO: This is needed only if we require precise CFA.
582 if (!TFL->hasFP(MF))
583 TFL->BuildCFI(
584 MBB, std::next(Push), DL,
585 MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
586
587 MBB.erase(Store);
588 }
589
590 // The stack-pointer copy is no longer used in the call sequences.
591 // There should not be any other users, but we can't commit to that, so:
592 if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
593 Context.SPCopy->eraseFromParent();
594
595 // Once we've done this, we need to make sure PEI doesn't assume a reserved
596 // frame.
598 FuncInfo->setHasPushSequences(true);
599}
600
601MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
602 MachineBasicBlock::iterator FrameSetup, Register Reg) {
603 // Do an extremely restricted form of load folding.
604 // ISel will often create patterns like:
605 // movl 4(%edi), %eax
606 // movl 8(%edi), %ecx
607 // movl 12(%edi), %edx
608 // movl %edx, 8(%esp)
609 // movl %ecx, 4(%esp)
610 // movl %eax, (%esp)
611 // call
612 // Get rid of those with prejudice.
613 if (!Reg.isVirtual())
614 return nullptr;
615
616 // Make sure this is the only use of Reg.
617 if (!MRI->hasOneNonDBGUse(Reg))
618 return nullptr;
619
620 MachineInstr &DefMI = *MRI->getVRegDef(Reg);
621
622 // Make sure the def is a MOV from memory.
623 // If the def is in another block, give up.
624 if ((DefMI.getOpcode() != X86::MOV32rm &&
625 DefMI.getOpcode() != X86::MOV64rm) ||
626 DefMI.getParent() != FrameSetup->getParent())
627 return nullptr;
628
629 // Make sure we don't have any instructions between DefMI and the
630 // push that make folding the load illegal.
631 for (MachineBasicBlock::iterator I = DefMI; I != FrameSetup; ++I)
632 if (I->isLoadFoldBarrier())
633 return nullptr;
634
635 return &DefMI;
636}
637
639 return new X86CallFrameOptimization();
640}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseSet and SmallDenseSet classes.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
LLVMContext & Context
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
#define DEBUG_TYPE
A debug info location.
Definition: DebugLoc.h:33
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:554
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
DataType & getValue()
Definition: CommandLine.h:1341
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
@ AddrScaleAmt
Definition: X86BaseInfo.h:33
@ AddrSegmentReg
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:38
@ AddrIndexReg
Definition: X86BaseInfo.h:34
@ AddrNumOperands
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:41
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:373
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39