LLVM 17.0.0git
PPCFrameLowering.cpp
Go to the documentation of this file.
1//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PPC implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCFrameLowering.h"
15#include "PPCInstrBuilder.h"
16#include "PPCInstrInfo.h"
18#include "PPCSubtarget.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/Function.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "framelowering"
34STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36STATISTIC(NumPrologProbed, "Number of prologues probed");
37
38static cl::opt<bool>
39EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40 cl::desc("Enable spills in prologue to vector registers."),
41 cl::init(false), cl::Hidden);
42
43static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44 if (STI.isAIXABI())
45 return STI.isPPC64() ? 16 : 8;
46 // SVR4 ABI:
47 return STI.isPPC64() ? 16 : 4;
48}
49
50static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51 if (STI.isAIXABI())
52 return STI.isPPC64() ? 40 : 20;
53 return STI.isELFv2ABI() ? 24 : 40;
54}
55
56static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57 // First slot in the general register save area.
58 return STI.isPPC64() ? -8U : -4U;
59}
60
61static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62 if (STI.isAIXABI() || STI.isPPC64())
63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64
65 // 32-bit SVR4 ABI:
66 return 8;
67}
68
69static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70 // Third slot in the general purpose register save area.
72 return -12U;
73
74 // Second slot in the general purpose register save area.
75 return STI.isPPC64() ? -16U : -8U;
76}
77
78static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80}
81
84 STI.getPlatformStackAlignment(), 0),
85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88 LinkageSize(computeLinkageSize(Subtarget)),
89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91
92// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
94 unsigned &NumEntries) const {
95
96// Floating-point register save area offsets.
97#define CALLEE_SAVED_FPRS \
98 {PPC::F31, -8}, \
99 {PPC::F30, -16}, \
100 {PPC::F29, -24}, \
101 {PPC::F28, -32}, \
102 {PPC::F27, -40}, \
103 {PPC::F26, -48}, \
104 {PPC::F25, -56}, \
105 {PPC::F24, -64}, \
106 {PPC::F23, -72}, \
107 {PPC::F22, -80}, \
108 {PPC::F21, -88}, \
109 {PPC::F20, -96}, \
110 {PPC::F19, -104}, \
111 {PPC::F18, -112}, \
112 {PPC::F17, -120}, \
113 {PPC::F16, -128}, \
114 {PPC::F15, -136}, \
115 {PPC::F14, -144}
116
117// 32-bit general purpose register save area offsets shared by ELF and
118// AIX. AIX has an extra CSR with r13.
119#define CALLEE_SAVED_GPRS32 \
120 {PPC::R31, -4}, \
121 {PPC::R30, -8}, \
122 {PPC::R29, -12}, \
123 {PPC::R28, -16}, \
124 {PPC::R27, -20}, \
125 {PPC::R26, -24}, \
126 {PPC::R25, -28}, \
127 {PPC::R24, -32}, \
128 {PPC::R23, -36}, \
129 {PPC::R22, -40}, \
130 {PPC::R21, -44}, \
131 {PPC::R20, -48}, \
132 {PPC::R19, -52}, \
133 {PPC::R18, -56}, \
134 {PPC::R17, -60}, \
135 {PPC::R16, -64}, \
136 {PPC::R15, -68}, \
137 {PPC::R14, -72}
138
139// 64-bit general purpose register save area offsets.
140#define CALLEE_SAVED_GPRS64 \
141 {PPC::X31, -8}, \
142 {PPC::X30, -16}, \
143 {PPC::X29, -24}, \
144 {PPC::X28, -32}, \
145 {PPC::X27, -40}, \
146 {PPC::X26, -48}, \
147 {PPC::X25, -56}, \
148 {PPC::X24, -64}, \
149 {PPC::X23, -72}, \
150 {PPC::X22, -80}, \
151 {PPC::X21, -88}, \
152 {PPC::X20, -96}, \
153 {PPC::X19, -104}, \
154 {PPC::X18, -112}, \
155 {PPC::X17, -120}, \
156 {PPC::X16, -128}, \
157 {PPC::X15, -136}, \
158 {PPC::X14, -144}
159
160// Vector register save area offsets.
161#define CALLEE_SAVED_VRS \
162 {PPC::V31, -16}, \
163 {PPC::V30, -32}, \
164 {PPC::V29, -48}, \
165 {PPC::V28, -64}, \
166 {PPC::V27, -80}, \
167 {PPC::V26, -96}, \
168 {PPC::V25, -112}, \
169 {PPC::V24, -128}, \
170 {PPC::V23, -144}, \
171 {PPC::V22, -160}, \
172 {PPC::V21, -176}, \
173 {PPC::V20, -192}
174
175 // Note that the offsets here overlap, but this is fixed up in
176 // processFunctionBeforeFrameFinalized.
177
178 static const SpillSlot ELFOffsets32[] = {
181
182 // CR save area offset. We map each of the nonvolatile CR fields
183 // to the slot for CR2, which is the first of the nonvolatile CR
184 // fields to be assigned, so that we only allocate one save slot.
185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186 {PPC::CR2, -4},
187
188 // VRSAVE save area offset.
189 {PPC::VRSAVE, -4},
190
192
193 // SPE register save area (overlaps Vector save area).
194 {PPC::S31, -8},
195 {PPC::S30, -16},
196 {PPC::S29, -24},
197 {PPC::S28, -32},
198 {PPC::S27, -40},
199 {PPC::S26, -48},
200 {PPC::S25, -56},
201 {PPC::S24, -64},
202 {PPC::S23, -72},
203 {PPC::S22, -80},
204 {PPC::S21, -88},
205 {PPC::S20, -96},
206 {PPC::S19, -104},
207 {PPC::S18, -112},
208 {PPC::S17, -120},
209 {PPC::S16, -128},
210 {PPC::S15, -136},
211 {PPC::S14, -144}};
212
213 static const SpillSlot ELFOffsets64[] = {
216
217 // VRSAVE save area offset.
218 {PPC::VRSAVE, -4},
220 };
221
222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
224 // Add AIX's extra CSR.
225 {PPC::R13, -76},
227
228 static const SpillSlot AIXOffsets64[] = {
230
231 if (Subtarget.is64BitELFABI()) {
232 NumEntries = std::size(ELFOffsets64);
233 return ELFOffsets64;
234 }
235
236 if (Subtarget.is32BitELFABI()) {
237 NumEntries = std::size(ELFOffsets32);
238 return ELFOffsets32;
239 }
240
241 assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242
243 if (Subtarget.isPPC64()) {
244 NumEntries = std::size(AIXOffsets64);
245 return AIXOffsets64;
246 }
247
248 NumEntries = std::size(AIXOffsets32);
249 return AIXOffsets32;
250}
251
252static bool spillsCR(const MachineFunction &MF) {
253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254 return FuncInfo->isCRSpilled();
255}
256
257static bool hasSpills(const MachineFunction &MF) {
258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259 return FuncInfo->hasSpills();
260}
261
262static bool hasNonRISpills(const MachineFunction &MF) {
263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264 return FuncInfo->hasNonRISpills();
265}
266
267/// MustSaveLR - Return true if this function requires that we save the LR
268/// register onto the stack in the prolog and restore it in the epilog of the
269/// function.
270static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272
273 // We need a save/restore of LR if there is any def of LR (which is
274 // defined by calls, including the PIC setup sequence), or if there is
275 // some use of the LR stack slot (e.g. for builtin_return_address).
276 // (LR comes in 32 and 64 bit versions.)
278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279}
280
281/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282/// call frame size. Update the MachineFunction object with the stack size.
285 bool UseEstimate) const {
286 unsigned NewMaxCallFrameSize = 0;
287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288 &NewMaxCallFrameSize);
289 MF.getFrameInfo().setStackSize(FrameSize);
290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291 return FrameSize;
292}
293
294/// determineFrameLayout - Determine the size of the frame and maximum call
295/// frame size.
298 bool UseEstimate,
299 unsigned *NewMaxCallFrameSize) const {
300 const MachineFrameInfo &MFI = MF.getFrameInfo();
301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302
303 // Get the number of bytes to allocate from the FrameInfo
304 uint64_t FrameSize =
305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306
307 // Get stack alignments. The frame must be aligned to the greatest of these:
308 Align TargetAlign = getStackAlign(); // alignment required per the ABI
309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame
310 Align Alignment = std::max(TargetAlign, MaxAlign);
311
312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313
314 unsigned LR = RegInfo->getRARegister();
315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317 !MFI.adjustsStack() && // No calls.
318 !MustSaveLR(MF, LR) && // No need to save LR.
319 !FI->mustSaveTOC() && // No need to save TOC.
320 !RegInfo->hasBasePointer(MF); // No special alignment.
321
322 // Note: for PPC32 SVR4ABI, we can still generate stackless
323 // code if all local vars are reg-allocated.
324 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
325
326 // Check whether we can skip adjusting the stack pointer (by using red zone)
327 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
328 // No need for frame
329 return 0;
330 }
331
332 // Get the maximum call frame size of all the calls.
333 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
334
335 // Maximum call frame needs to be at least big enough for linkage area.
336 unsigned minCallFrameSize = getLinkageSize();
337 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
338
339 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
340 // that allocations will be aligned.
341 if (MFI.hasVarSizedObjects())
342 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
343
344 // Update the new max call frame size if the caller passes in a valid pointer.
345 if (NewMaxCallFrameSize)
346 *NewMaxCallFrameSize = maxCallFrameSize;
347
348 // Include call frame size in total.
349 FrameSize += maxCallFrameSize;
350
351 // Make sure the frame is aligned.
352 FrameSize = alignTo(FrameSize, Alignment);
353
354 return FrameSize;
355}
356
357// hasFP - Return true if the specified function actually has a dedicated frame
358// pointer register.
360 const MachineFrameInfo &MFI = MF.getFrameInfo();
361 // FIXME: This is pretty much broken by design: hasFP() might be called really
362 // early, before the stack layout was calculated and thus hasFP() might return
363 // true or false here depending on the time of call.
364 return (MFI.getStackSize()) && needsFP(MF);
365}
366
367// needsFP - Return true if the specified function should have a dedicated frame
368// pointer register. This is true if the function has variable sized allocas or
369// if frame pointer elimination is disabled.
371 const MachineFrameInfo &MFI = MF.getFrameInfo();
372
373 // Naked functions have no stack frame pushed, so we don't have a frame
374 // pointer.
375 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
376 return false;
377
378 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
379 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
380 MF.exposesReturnsTwice() ||
382 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
383}
384
386 bool is31 = needsFP(MF);
387 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
388 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
389
390 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
391 bool HasBP = RegInfo->hasBasePointer(MF);
392 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
393 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
394
395 for (MachineBasicBlock &MBB : MF)
397 --MBBI;
398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
399 MachineOperand &MO = MBBI->getOperand(I);
400 if (!MO.isReg())
401 continue;
402
403 switch (MO.getReg()) {
404 case PPC::FP:
405 MO.setReg(FPReg);
406 break;
407 case PPC::FP8:
408 MO.setReg(FP8Reg);
409 break;
410 case PPC::BP:
411 MO.setReg(BPReg);
412 break;
413 case PPC::BP8:
414 MO.setReg(BP8Reg);
415 break;
416
417 }
418 }
419 }
420}
421
422/* This function will do the following:
423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
424 respectively (defaults recommended by the ABI) and return true
425 - If MBB is not an entry block, initialize the register scavenger and look
426 for available registers.
427 - If the defaults (R0/R12) are available, return true
428 - If TwoUniqueRegsRequired is set to true, it looks for two unique
429 registers. Otherwise, look for a single available register.
430 - If the required registers are found, set SR1 and SR2 and return true.
431 - If the required registers are not found, set SR2 or both SR1 and SR2 to
432 PPC::NoRegister and return false.
433
434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
435 is not set, this function will attempt to find two different registers, but
436 still return true if only one register is available (and set SR1 == SR2).
437*/
438bool
439PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
440 bool UseAtEnd,
441 bool TwoUniqueRegsRequired,
442 Register *SR1,
443 Register *SR2) const {
444 RegScavenger RS;
445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
447
448 // Set the defaults for the two scratch registers.
449 if (SR1)
450 *SR1 = R0;
451
452 if (SR2) {
453 assert (SR1 && "Asking for the second scratch register but not the first?");
454 *SR2 = R12;
455 }
456
457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
458 if ((UseAtEnd && MBB->isReturnBlock()) ||
459 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
460 return true;
461
462 RS.enterBasicBlock(*MBB);
463
464 if (UseAtEnd && !MBB->empty()) {
465 // The scratch register will be used at the end of the block, so must
466 // consider all registers used within the block
467
469 // If no terminator, back iterator up to previous instruction.
470 if (MBBI == MBB->end())
471 MBBI = std::prev(MBBI);
472
473 if (MBBI != MBB->begin())
474 RS.forward(MBBI);
475 }
476
477 // If the two registers are available, we're all good.
478 // Note that we only return here if both R0 and R12 are available because
479 // although the function may not require two unique registers, it may benefit
480 // from having two so we should try to provide them.
481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
482 return true;
483
484 // Get the list of callee-saved registers for the target.
485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
487
488 // Get all the available registers in the block.
489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
490 &PPC::GPRCRegClass);
491
492 // We shouldn't use callee-saved registers as scratch registers as they may be
493 // available when looking for a candidate block for shrink wrapping but not
494 // available when the actual prologue/epilogue is being emitted because they
495 // were added as live-in to the prologue block by PrologueEpilogueInserter.
496 for (int i = 0; CSRegs[i]; ++i)
497 BV.reset(CSRegs[i]);
498
499 // Set the first scratch register to the first available one.
500 if (SR1) {
501 int FirstScratchReg = BV.find_first();
502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
503 }
504
505 // If there is another one available, set the second scratch register to that.
506 // Otherwise, set it to either PPC::NoRegister if this function requires two
507 // or to whatever SR1 is set to if this function doesn't require two.
508 if (SR2) {
509 int SecondScratchReg = BV.find_next(*SR1);
510 if (SecondScratchReg != -1)
511 *SR2 = SecondScratchReg;
512 else
513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
514 }
515
516 // Now that we've done our best to provide both registers, double check
517 // whether we were unable to provide enough.
518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
519 return false;
520
521 return true;
522}
523
524// We need a scratch register for spilling LR and for spilling CR. By default,
525// we use two scratch registers to hide latency. However, if only one scratch
526// register is available, we can adjust for that by not overlapping the spill
527// code. However, if we need to realign the stack (i.e. have a base pointer)
528// and the stack frame is large, we need two scratch registers.
529// Also, stack probe requires two scratch registers, one for old sp, one for
530// large frame and large probe size.
531bool
532PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
534 MachineFunction &MF = *(MBB->getParent());
535 bool HasBP = RegInfo->hasBasePointer(MF);
536 unsigned FrameSize = determineFrameLayout(MF);
537 int NegFrameSize = -FrameSize;
538 bool IsLargeFrame = !isInt<16>(NegFrameSize);
539 MachineFrameInfo &MFI = MF.getFrameInfo();
540 Align MaxAlign = MFI.getMaxAlign();
541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
543
544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
545 TLI.hasInlineStackProbe(MF);
546}
547
549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
550
551 return findScratchRegister(TmpMBB, false,
552 twoUniqueScratchRegsRequired(TmpMBB));
553}
554
556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
557
558 return findScratchRegister(TmpMBB, true);
559}
560
561bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
564
565 // Abort if there is no register info or function info.
566 if (!RegInfo || !FI)
567 return false;
568
569 // Only move the stack update on ELFv2 ABI and PPC64.
570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
571 return false;
572
573 // Check the frame size first and return false if it does not fit the
574 // requirements.
575 // We need a non-zero frame size as well as a frame that will fit in the red
576 // zone. This is because by moving the stack pointer update we are now storing
577 // to the red zone until the stack pointer is updated. If we get an interrupt
578 // inside the prologue but before the stack update we now have a number of
579 // stores to the red zone and those stores must all fit.
580 MachineFrameInfo &MFI = MF.getFrameInfo();
581 unsigned FrameSize = MFI.getStackSize();
582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
583 return false;
584
585 // Frame pointers and base pointers complicate matters so don't do anything
586 // if we have them. For example having a frame pointer will sometimes require
587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more
588 // difficult. Similar situation exists with setjmp.
589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
590 return false;
591
592 // Calls to fast_cc functions use different rules for passing parameters on
593 // the stack from the ABI and using PIC base in the function imposes
594 // similar restrictions to using the base pointer. It is not generally safe
595 // to move the stack pointer update in these situations.
596 if (FI->hasFastCall() || FI->usesPICBase())
597 return false;
598
599 // Finally we can move the stack update if we do not require register
600 // scavenging. Register scavenging can introduce more spills and so
601 // may make the frame size larger than we have computed.
602 return !RegInfo->requiresFrameIndexScavenging(MF);
603}
604
606 MachineBasicBlock &MBB) const {
608 MachineFrameInfo &MFI = MF.getFrameInfo();
609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
612
613 MachineModuleInfo &MMI = MF.getMMI();
615 DebugLoc dl;
616 // AIX assembler does not support cfi directives.
617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
618
619 const bool HasFastMFLR = Subtarget.hasFastMFLR();
620
621 // Get processor type.
622 bool isPPC64 = Subtarget.isPPC64();
623 // Get the ABI.
624 bool isSVR4ABI = Subtarget.isSVR4ABI();
625 bool isELFv2ABI = Subtarget.isELFv2ABI();
626 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
627
628 // Work out frame sizes.
630 int64_t NegFrameSize = -FrameSize;
631 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
632 llvm_unreachable("Unhandled stack size!");
633
634 if (MFI.isFrameAddressTaken())
636
637 // Check if the link register (LR) must be saved.
639 bool MustSaveLR = FI->mustSaveLR();
640 bool MustSaveTOC = FI->mustSaveTOC();
641 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
642 bool MustSaveCR = !MustSaveCRs.empty();
643 // Do we have a frame pointer and/or base pointer for this function?
644 bool HasFP = hasFP(MF);
645 bool HasBP = RegInfo->hasBasePointer(MF);
646 bool HasRedZone = isPPC64 || !isSVR4ABI;
647 bool HasROPProtect = Subtarget.hasROPProtect();
648 bool HasPrivileged = Subtarget.hasPrivileged();
649
650 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
651 Register BPReg = RegInfo->getBaseRegister(MF);
652 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
653 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
654 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
655 Register ScratchReg;
656 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
657 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
658 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
659 : PPC::MFLR );
660 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
661 : PPC::STW );
662 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
663 : PPC::STWU );
664 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
665 : PPC::STWUX);
666 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
667 : PPC::OR );
668 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
669 : PPC::SUBFC);
670 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
671 : PPC::SUBFIC);
672 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
673 : PPC::MFCR);
674 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
675 const MCInstrDesc &HashST =
676 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
677 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
678
679 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
680 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
681 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
682 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
683 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
684 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
685
686 // Using the same bool variable as below to suppress compiler warnings.
687 bool SingleScratchReg = findScratchRegister(
688 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
689 assert(SingleScratchReg &&
690 "Required number of registers not available in this block");
691
692 SingleScratchReg = ScratchReg == TempReg;
693
694 int64_t LROffset = getReturnSaveOffset();
695
696 int64_t FPOffset = 0;
697 if (HasFP) {
698 MachineFrameInfo &MFI = MF.getFrameInfo();
699 int FPIndex = FI->getFramePointerSaveIndex();
700 assert(FPIndex && "No Frame Pointer Save Slot!");
701 FPOffset = MFI.getObjectOffset(FPIndex);
702 }
703
704 int64_t BPOffset = 0;
705 if (HasBP) {
706 MachineFrameInfo &MFI = MF.getFrameInfo();
707 int BPIndex = FI->getBasePointerSaveIndex();
708 assert(BPIndex && "No Base Pointer Save Slot!");
709 BPOffset = MFI.getObjectOffset(BPIndex);
710 }
711
712 int64_t PBPOffset = 0;
713 if (FI->usesPICBase()) {
714 MachineFrameInfo &MFI = MF.getFrameInfo();
715 int PBPIndex = FI->getPICBasePointerSaveIndex();
716 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
717 PBPOffset = MFI.getObjectOffset(PBPIndex);
718 }
719
720 // Get stack alignments.
721 Align MaxAlign = MFI.getMaxAlign();
722 if (HasBP && MaxAlign > 1)
723 assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
724
725 // Frames of 32KB & larger require special handling because they cannot be
726 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
727 bool isLargeFrame = !isInt<16>(NegFrameSize);
728
729 // Check if we can move the stack update instruction (stdu) down the prologue
730 // past the callee saves. Hopefully this will avoid the situation where the
731 // saves are waiting for the update on the store with update to complete.
732 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
733 bool MovingStackUpdateDown = false;
734
735 // Check if we can move the stack update.
736 if (stackUpdateCanBeMoved(MF)) {
737 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
738 for (CalleeSavedInfo CSI : Info) {
739 // If the callee saved register is spilled to a register instead of the
740 // stack then the spill no longer uses the stack pointer.
741 // This can lead to two consequences:
742 // 1) We no longer need to update the stack because the function does not
743 // spill any callee saved registers to stack.
744 // 2) We have a situation where we still have to update the stack pointer
745 // even though some registers are spilled to other registers. In
746 // this case the current code moves the stack update to an incorrect
747 // position.
748 // In either case we should abort moving the stack update operation.
749 if (CSI.isSpilledToReg()) {
750 StackUpdateLoc = MBBI;
751 MovingStackUpdateDown = false;
752 break;
753 }
754
755 int FrIdx = CSI.getFrameIdx();
756 // If the frame index is not negative the callee saved info belongs to a
757 // stack object that is not a fixed stack object. We ignore non-fixed
758 // stack objects because we won't move the stack update pointer past them.
759 if (FrIdx >= 0)
760 continue;
761
762 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
763 StackUpdateLoc++;
764 MovingStackUpdateDown = true;
765 } else {
766 // We need all of the Frame Indices to meet these conditions.
767 // If they do not, abort the whole operation.
768 StackUpdateLoc = MBBI;
769 MovingStackUpdateDown = false;
770 break;
771 }
772 }
773
774 // If the operation was not aborted then update the object offset.
775 if (MovingStackUpdateDown) {
776 for (CalleeSavedInfo CSI : Info) {
777 int FrIdx = CSI.getFrameIdx();
778 if (FrIdx < 0)
779 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
780 }
781 }
782 }
783
784 // Where in the prologue we move the CR fields depends on how many scratch
785 // registers we have, and if we need to save the link register or not. This
786 // lambda is to avoid duplicating the logic in 2 places.
787 auto BuildMoveFromCR = [&]() {
788 if (isELFv2ABI && MustSaveCRs.size() == 1) {
789 // In the ELFv2 ABI, we are not required to save all CR fields.
790 // If only one CR field is clobbered, it is more efficient to use
791 // mfocrf to selectively save just that field, because mfocrf has short
792 // latency compares to mfcr.
793 assert(isPPC64 && "V2 ABI is 64-bit only.");
795 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
796 MIB.addReg(MustSaveCRs[0], RegState::Kill);
797 } else {
799 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
800 for (unsigned CRfield : MustSaveCRs)
801 MIB.addReg(CRfield, RegState::ImplicitKill);
802 }
803 };
804
805 // If we need to spill the CR and the LR but we don't have two separate
806 // registers available, we must spill them one at a time
807 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
808 BuildMoveFromCR();
809 BuildMI(MBB, MBBI, dl, StoreWordInst)
810 .addReg(TempReg, getKillRegState(true))
811 .addImm(CRSaveOffset)
812 .addReg(SPReg);
813 }
814
815 if (MustSaveLR)
816 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
817
818 if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
819 BuildMoveFromCR();
820
821 if (HasRedZone) {
822 if (HasFP)
824 .addReg(FPReg)
825 .addImm(FPOffset)
826 .addReg(SPReg);
827 if (FI->usesPICBase())
829 .addReg(PPC::R30)
830 .addImm(PBPOffset)
831 .addReg(SPReg);
832 if (HasBP)
834 .addReg(BPReg)
835 .addImm(BPOffset)
836 .addReg(SPReg);
837 }
838
839 // Generate the instruction to store the LR. In the case where ROP protection
840 // is required the register holding the LR should not be killed as it will be
841 // used by the hash store instruction.
842 auto SaveLR = [&](int64_t Offset) {
843 assert(MustSaveLR && "LR is not required to be saved!");
844 BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
845 .addReg(ScratchReg, getKillRegState(!HasROPProtect))
846 .addImm(Offset)
847 .addReg(SPReg);
848
849 // Add the ROP protection Hash Store instruction.
850 // NOTE: This is technically a violation of the ABI. The hash can be saved
851 // up to 512 bytes into the Protected Zone. This can be outside of the
852 // initial 288 byte volatile program storage region in the Protected Zone.
853 // However, this restriction will be removed in an upcoming revision of the
854 // ABI.
855 if (HasROPProtect) {
856 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
857 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
858 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
859 "ROP hash save offset out of range.");
860 assert(((ImmOffset & 0x7) == 0) &&
861 "ROP hash save offset must be 8 byte aligned.");
862 BuildMI(MBB, StackUpdateLoc, dl, HashST)
863 .addReg(ScratchReg, getKillRegState(true))
864 .addImm(ImmOffset)
865 .addReg(SPReg);
866 }
867 };
868
869 if (MustSaveLR && HasFastMFLR)
870 SaveLR(LROffset);
871
872 if (MustSaveCR &&
873 !(SingleScratchReg && MustSaveLR)) {
874 assert(HasRedZone && "A red zone is always available on PPC64");
875 BuildMI(MBB, MBBI, dl, StoreWordInst)
876 .addReg(TempReg, getKillRegState(true))
877 .addImm(CRSaveOffset)
878 .addReg(SPReg);
879 }
880
881 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
882 if (!FrameSize) {
883 if (MustSaveLR && !HasFastMFLR)
884 SaveLR(LROffset);
885 return;
886 }
887
888 // Adjust stack pointer: r1 += NegFrameSize.
889 // If there is a preferred stack alignment, align R1 now
890
891 if (HasBP && HasRedZone) {
892 // Save a copy of r1 as the base pointer.
893 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
894 .addReg(SPReg)
895 .addReg(SPReg);
896 }
897
898 // Have we generated a STUX instruction to claim stack frame? If so,
899 // the negated frame size will be placed in ScratchReg.
900 bool HasSTUX =
901 (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
902 (HasBP && MaxAlign > 1) || isLargeFrame;
903
904 // If we use STUX to update the stack pointer, we need the two scratch
905 // registers TempReg and ScratchReg, we have to save LR here which is stored
906 // in ScratchReg.
907 // If the offset can not be encoded into the store instruction, we also have
908 // to save LR here.
909 if (MustSaveLR && !HasFastMFLR &&
910 (HasSTUX || !isInt<16>(FrameSize + LROffset)))
911 SaveLR(LROffset);
912
913 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
914 // pointer is always stored at SP, we will get a free probe due to an essential
915 // STU(X) instruction.
916 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
917 // To be consistent with other targets, a pseudo instruction is emitted and
918 // will be later expanded in `inlineStackProbe`.
919 BuildMI(MBB, MBBI, dl,
920 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
921 : PPC::PROBED_STACKALLOC_32))
922 .addDef(TempReg)
923 .addDef(ScratchReg) // ScratchReg stores the old sp.
924 .addImm(NegFrameSize);
925 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
926 // update the ScratchReg to meet the assumption that ScratchReg contains
927 // the NegFrameSize. This solution is rather tricky.
928 if (!HasRedZone) {
929 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
930 .addReg(ScratchReg)
931 .addReg(SPReg);
932 }
933 } else {
934 // This condition must be kept in sync with canUseAsPrologue.
935 if (HasBP && MaxAlign > 1) {
936 if (isPPC64)
937 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
938 .addReg(SPReg)
939 .addImm(0)
940 .addImm(64 - Log2(MaxAlign));
941 else // PPC32...
942 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
943 .addReg(SPReg)
944 .addImm(0)
945 .addImm(32 - Log2(MaxAlign))
946 .addImm(31);
947 if (!isLargeFrame) {
948 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
949 .addReg(ScratchReg, RegState::Kill)
950 .addImm(NegFrameSize);
951 } else {
952 assert(!SingleScratchReg && "Only a single scratch reg available");
953 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
954 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
955 .addReg(ScratchReg, RegState::Kill)
956 .addReg(TempReg, RegState::Kill);
957 }
958
959 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
960 .addReg(SPReg, RegState::Kill)
961 .addReg(SPReg)
962 .addReg(ScratchReg);
963 } else if (!isLargeFrame) {
964 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
965 .addReg(SPReg)
966 .addImm(NegFrameSize)
967 .addReg(SPReg);
968 } else {
969 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
970 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
971 .addReg(SPReg, RegState::Kill)
972 .addReg(SPReg)
973 .addReg(ScratchReg);
974 }
975 }
976
977 // Save the TOC register after the stack pointer update if a prologue TOC
978 // save is required for the function.
979 if (MustSaveTOC) {
980 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
981 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
982 .addReg(TOCReg, getKillRegState(true))
983 .addImm(TOCSaveOffset)
984 .addReg(SPReg);
985 }
986
987 if (!HasRedZone) {
988 assert(!isPPC64 && "A red zone is always available on PPC64");
989 if (HasSTUX) {
990 // The negated frame size is in ScratchReg, and the SPReg has been
991 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
992 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
993 // the stack frame (i.e. the old SP), ideally, we would put the old
994 // SP into a register and use it as the base for the stores. The
995 // problem is that the only available register may be ScratchReg,
996 // which could be R0, and R0 cannot be used as a base address.
997
998 // First, set ScratchReg to the old SP. This may need to be modified
999 // later.
1000 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1001 .addReg(ScratchReg, RegState::Kill)
1002 .addReg(SPReg);
1003
1004 if (ScratchReg == PPC::R0) {
1005 // R0 cannot be used as a base register, but it can be used as an
1006 // index in a store-indexed.
1007 int LastOffset = 0;
1008 if (HasFP) {
1009 // R0 += (FPOffset-LastOffset).
1010 // Need addic, since addi treats R0 as 0.
1011 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1012 .addReg(ScratchReg)
1013 .addImm(FPOffset-LastOffset);
1014 LastOffset = FPOffset;
1015 // Store FP into *R0.
1016 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1017 .addReg(FPReg, RegState::Kill) // Save FP.
1018 .addReg(PPC::ZERO)
1019 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1020 }
1021 if (FI->usesPICBase()) {
1022 // R0 += (PBPOffset-LastOffset).
1023 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1024 .addReg(ScratchReg)
1025 .addImm(PBPOffset-LastOffset);
1026 LastOffset = PBPOffset;
1027 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1028 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1029 .addReg(PPC::ZERO)
1030 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1031 }
1032 if (HasBP) {
1033 // R0 += (BPOffset-LastOffset).
1034 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1035 .addReg(ScratchReg)
1036 .addImm(BPOffset-LastOffset);
1037 LastOffset = BPOffset;
1038 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1039 .addReg(BPReg, RegState::Kill) // Save BP.
1040 .addReg(PPC::ZERO)
1041 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1042 // BP = R0-LastOffset
1043 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1044 .addReg(ScratchReg, RegState::Kill)
1045 .addImm(-LastOffset);
1046 }
1047 } else {
1048 // ScratchReg is not R0, so use it as the base register. It is
1049 // already set to the old SP, so we can use the offsets directly.
1050
1051 // Now that the stack frame has been allocated, save all the necessary
1052 // registers using ScratchReg as the base address.
1053 if (HasFP)
1054 BuildMI(MBB, MBBI, dl, StoreInst)
1055 .addReg(FPReg)
1056 .addImm(FPOffset)
1057 .addReg(ScratchReg);
1058 if (FI->usesPICBase())
1059 BuildMI(MBB, MBBI, dl, StoreInst)
1060 .addReg(PPC::R30)
1061 .addImm(PBPOffset)
1062 .addReg(ScratchReg);
1063 if (HasBP) {
1064 BuildMI(MBB, MBBI, dl, StoreInst)
1065 .addReg(BPReg)
1066 .addImm(BPOffset)
1067 .addReg(ScratchReg);
1068 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1069 .addReg(ScratchReg, RegState::Kill)
1070 .addReg(ScratchReg);
1071 }
1072 }
1073 } else {
1074 // The frame size is a known 16-bit constant (fitting in the immediate
1075 // field of STWU). To be here we have to be compiling for PPC32.
1076 // Since the SPReg has been decreased by FrameSize, add it back to each
1077 // offset.
1078 if (HasFP)
1079 BuildMI(MBB, MBBI, dl, StoreInst)
1080 .addReg(FPReg)
1081 .addImm(FrameSize + FPOffset)
1082 .addReg(SPReg);
1083 if (FI->usesPICBase())
1084 BuildMI(MBB, MBBI, dl, StoreInst)
1085 .addReg(PPC::R30)
1086 .addImm(FrameSize + PBPOffset)
1087 .addReg(SPReg);
1088 if (HasBP) {
1089 BuildMI(MBB, MBBI, dl, StoreInst)
1090 .addReg(BPReg)
1091 .addImm(FrameSize + BPOffset)
1092 .addReg(SPReg);
1093 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1094 .addReg(SPReg)
1095 .addImm(FrameSize);
1096 }
1097 }
1098 }
1099
1100 // Save the LR now.
1101 if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1102 SaveLR(LROffset + FrameSize);
1103
1104 // Add Call Frame Information for the instructions we generated above.
1105 if (needsCFI) {
1106 unsigned CFIIndex;
1107
1108 if (HasBP) {
1109 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1110 // because if the stack needed aligning then CFA won't be at a fixed
1111 // offset from FP/SP.
1112 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1113 CFIIndex = MF.addFrameInst(
1115 } else {
1116 // Adjust the definition of CFA to account for the change in SP.
1117 assert(NegFrameSize);
1118 CFIIndex = MF.addFrameInst(
1119 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1120 }
1121 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1122 .addCFIIndex(CFIIndex);
1123
1124 if (HasFP) {
1125 // Describe where FP was saved, at a fixed offset from CFA.
1126 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1127 CFIIndex = MF.addFrameInst(
1128 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1129 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1130 .addCFIIndex(CFIIndex);
1131 }
1132
1133 if (FI->usesPICBase()) {
1134 // Describe where FP was saved, at a fixed offset from CFA.
1135 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1136 CFIIndex = MF.addFrameInst(
1137 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1138 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1139 .addCFIIndex(CFIIndex);
1140 }
1141
1142 if (HasBP) {
1143 // Describe where BP was saved, at a fixed offset from CFA.
1144 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1145 CFIIndex = MF.addFrameInst(
1146 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1147 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148 .addCFIIndex(CFIIndex);
1149 }
1150
1151 if (MustSaveLR) {
1152 // Describe where LR was saved, at a fixed offset from CFA.
1153 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1154 CFIIndex = MF.addFrameInst(
1155 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1156 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1157 .addCFIIndex(CFIIndex);
1158 }
1159 }
1160
1161 // If there is a frame pointer, copy R1 into R31
1162 if (HasFP) {
1163 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1164 .addReg(SPReg)
1165 .addReg(SPReg);
1166
1167 if (!HasBP && needsCFI) {
1168 // Change the definition of CFA from SP+offset to FP+offset, because SP
1169 // will change at every alloca.
1170 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1171 unsigned CFIIndex = MF.addFrameInst(
1173
1174 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1175 .addCFIIndex(CFIIndex);
1176 }
1177 }
1178
1179 if (needsCFI) {
1180 // Describe where callee saved registers were saved, at fixed offsets from
1181 // CFA.
1182 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1183 for (const CalleeSavedInfo &I : CSI) {
1184 Register Reg = I.getReg();
1185 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1186
1187 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1188 // subregisters of CR2. We just need to emit a move of CR2.
1189 if (PPC::CRBITRCRegClass.contains(Reg))
1190 continue;
1191
1192 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1193 continue;
1194
1195 // For SVR4, don't emit a move for the CR spill slot if we haven't
1196 // spilled CRs.
1197 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1198 && !MustSaveCR)
1199 continue;
1200
1201 // For 64-bit SVR4 when we have spilled CRs, the spill location
1202 // is SP+8, not a frame-relative slot.
1203 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1204 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1205 // the whole CR word. In the ELFv2 ABI, every CR that was
1206 // actually saved gets its own CFI record.
1207 Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1208 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1209 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1210 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1211 .addCFIIndex(CFIIndex);
1212 continue;
1213 }
1214
1215 if (I.isSpilledToReg()) {
1216 unsigned SpilledReg = I.getDstReg();
1217 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1218 nullptr, MRI->getDwarfRegNum(Reg, true),
1219 MRI->getDwarfRegNum(SpilledReg, true)));
1220 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1221 .addCFIIndex(CFIRegister);
1222 } else {
1223 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1224 // We have changed the object offset above but we do not want to change
1225 // the actual offsets in the CFI instruction so we have to undo the
1226 // offset change here.
1227 if (MovingStackUpdateDown)
1228 Offset -= NegFrameSize;
1229
1230 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1231 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1232 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1233 .addCFIIndex(CFIIndex);
1234 }
1235 }
1236 }
1237}
1238
1240 MachineBasicBlock &PrologMBB) const {
1241 bool isPPC64 = Subtarget.isPPC64();
1242 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1243 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1244 MachineFrameInfo &MFI = MF.getFrameInfo();
1245 MachineModuleInfo &MMI = MF.getMMI();
1247 // AIX assembler does not support cfi directives.
1248 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1249 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1250 int Opc = MI.getOpcode();
1251 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1252 });
1253 if (StackAllocMIPos == PrologMBB.end())
1254 return;
1255 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1256 MachineBasicBlock *CurrentMBB = &PrologMBB;
1257 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1258 MachineInstr &MI = *StackAllocMIPos;
1259 int64_t NegFrameSize = MI.getOperand(2).getImm();
1260 unsigned ProbeSize = TLI.getStackProbeSize(MF);
1261 int64_t NegProbeSize = -(int64_t)ProbeSize;
1262 assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1263 int64_t NumBlocks = NegFrameSize / NegProbeSize;
1264 int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1265 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1266 Register ScratchReg = MI.getOperand(0).getReg();
1267 Register FPReg = MI.getOperand(1).getReg();
1268 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1269 bool HasBP = RegInfo->hasBasePointer(MF);
1270 Register BPReg = RegInfo->getBaseRegister(MF);
1271 Align MaxAlign = MFI.getMaxAlign();
1272 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1273 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1274 // Subroutines to generate .cfi_* directives.
1277 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1278 unsigned CFIIndex = MF.addFrameInst(
1280 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1281 .addCFIIndex(CFIIndex);
1282 };
1283 auto buildDefCFA = [&](MachineBasicBlock &MBB,
1285 int Offset) {
1286 unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1287 unsigned CFIIndex = MBB.getParent()->addFrameInst(
1288 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1289 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1290 .addCFIIndex(CFIIndex);
1291 };
1292 // Subroutine to determine if we can use the Imm as part of d-form.
1293 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1294 // Subroutine to materialize the Imm into TempReg.
1295 auto MaterializeImm = [&](MachineBasicBlock &MBB,
1296 MachineBasicBlock::iterator MBBI, int64_t Imm,
1297 Register &TempReg) {
1298 assert(isInt<32>(Imm) && "Unhandled imm");
1299 if (isInt<16>(Imm))
1300 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1301 .addImm(Imm);
1302 else {
1303 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1304 .addImm(Imm >> 16);
1305 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1306 .addReg(TempReg)
1307 .addImm(Imm & 0xFFFF);
1308 }
1309 };
1310 // Subroutine to store frame pointer and decrease stack pointer by probe size.
1311 auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1312 MachineBasicBlock::iterator MBBI, int64_t NegSize,
1313 Register NegSizeReg, bool UseDForm,
1314 Register StoreReg) {
1315 if (UseDForm)
1316 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1317 .addReg(StoreReg)
1318 .addImm(NegSize)
1319 .addReg(SPReg);
1320 else
1321 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1322 .addReg(StoreReg)
1323 .addReg(SPReg)
1324 .addReg(NegSizeReg);
1325 };
1326 // Used to probe stack when realignment is required.
1327 // Note that, according to ABI's requirement, *sp must always equals the
1328 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1329 // Following is pseudo code:
1330 // final_sp = (sp & align) + negframesize;
1331 // neg_gap = final_sp - sp;
1332 // while (neg_gap < negprobesize) {
1333 // stdu fp, negprobesize(sp);
1334 // neg_gap -= negprobesize;
1335 // }
1336 // stdux fp, sp, neg_gap
1337 //
1338 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1339 // before probe code, we don't need to save it, so we get one additional reg
1340 // that can be used to materialize the probeside if needed to use xform.
1341 // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1342 // now.
1343 //
1344 // The allocations are:
1345 // if (HasBP && HasRedzone) {
1346 // r0: materialize the probesize if needed so that we can use xform.
1347 // r12: `neg_gap`
1348 // } else {
1349 // r0: back-chain pointer
1350 // r12: `neg_gap`.
1351 // }
1352 auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1354 Register ScratchReg, Register TempReg) {
1355 assert(HasBP && "The function is supposed to have base pointer when its "
1356 "stack is realigned.");
1357 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1358
1359 // FIXME: We can eliminate this limitation if we get more infomation about
1360 // which part of redzone are already used. Used redzone can be treated
1361 // probed. But there might be `holes' in redzone probed, this could
1362 // complicate the implementation.
1363 assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1364 "Probe size should be larger or equal to the size of red-zone so "
1365 "that red-zone is not clobbered by probing.");
1366
1367 Register &FinalStackPtr = TempReg;
1368 // FIXME: We only support NegProbeSize materializable by DForm currently.
1369 // When HasBP && HasRedzone, we can use xform if we have an additional idle
1370 // register.
1371 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1372 assert(isInt<16>(NegProbeSize) &&
1373 "NegProbeSize should be materializable by DForm");
1374 Register CRReg = PPC::CR0;
1375 // Layout of output assembly kinda like:
1376 // bb.0:
1377 // ...
1378 // sub $scratchreg, $finalsp, r1
1379 // cmpdi $scratchreg, <negprobesize>
1380 // bge bb.2
1381 // bb.1:
1382 // stdu <backchain>, <negprobesize>(r1)
1383 // sub $scratchreg, $scratchreg, negprobesize
1384 // cmpdi $scratchreg, <negprobesize>
1385 // blt bb.1
1386 // bb.2:
1387 // stdux <backchain>, r1, $scratchreg
1388 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1389 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1390 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1391 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1392 MF.insert(MBBInsertPoint, ProbeExitMBB);
1393 // bb.2
1394 {
1395 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1396 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1397 BackChainPointer);
1398 if (HasRedZone)
1399 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1400 // to TempReg to satisfy it.
1401 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1402 .addReg(BPReg)
1403 .addReg(BPReg);
1404 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1405 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1406 }
1407 // bb.0
1408 {
1409 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1410 .addReg(SPReg)
1411 .addReg(FinalStackPtr);
1412 if (!HasRedZone)
1413 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1414 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1415 .addReg(ScratchReg)
1416 .addImm(NegProbeSize);
1417 BuildMI(&MBB, DL, TII.get(PPC::BCC))
1419 .addReg(CRReg)
1420 .addMBB(ProbeExitMBB);
1421 MBB.addSuccessor(ProbeLoopBodyMBB);
1422 MBB.addSuccessor(ProbeExitMBB);
1423 }
1424 // bb.1
1425 {
1426 Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1427 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1428 0, true /*UseDForm*/, BackChainPointer);
1429 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1430 ScratchReg)
1431 .addReg(ScratchReg)
1432 .addImm(-NegProbeSize);
1433 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1434 CRReg)
1435 .addReg(ScratchReg)
1436 .addImm(NegProbeSize);
1437 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1439 .addReg(CRReg)
1440 .addMBB(ProbeLoopBodyMBB);
1441 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1442 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1443 }
1444 // Update liveins.
1445 recomputeLiveIns(*ProbeLoopBodyMBB);
1446 recomputeLiveIns(*ProbeExitMBB);
1447 return ProbeExitMBB;
1448 };
1449 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1450 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1451 // the offset subtracted from SP is determined by SP's runtime value.
1452 if (HasBP && MaxAlign > 1) {
1453 // Calculate final stack pointer.
1454 if (isPPC64)
1455 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1456 .addReg(SPReg)
1457 .addImm(0)
1458 .addImm(64 - Log2(MaxAlign));
1459 else
1460 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1461 .addReg(SPReg)
1462 .addImm(0)
1463 .addImm(32 - Log2(MaxAlign))
1464 .addImm(31);
1465 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1466 FPReg)
1467 .addReg(ScratchReg)
1468 .addReg(SPReg);
1469 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1470 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1471 FPReg)
1472 .addReg(ScratchReg)
1473 .addReg(FPReg);
1474 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1475 if (needsCFI)
1476 buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1477 } else {
1478 // Initialize current frame pointer.
1479 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1480 // Use FPReg to calculate CFA.
1481 if (needsCFI)
1482 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1483 // Probe residual part.
1484 if (NegResidualSize) {
1485 bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1486 if (!ResidualUseDForm)
1487 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1488 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1489 ResidualUseDForm, FPReg);
1490 }
1491 bool UseDForm = CanUseDForm(NegProbeSize);
1492 // If number of blocks is small, just probe them directly.
1493 if (NumBlocks < 3) {
1494 if (!UseDForm)
1495 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1496 for (int i = 0; i < NumBlocks; ++i)
1497 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1498 FPReg);
1499 if (needsCFI) {
1500 // Restore using SPReg to calculate CFA.
1501 buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1502 }
1503 } else {
1504 // Since CTR is a volatile register and current shrinkwrap implementation
1505 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1506 // CTR loop to probe.
1507 // Calculate trip count and stores it in CTRReg.
1508 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1509 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1510 .addReg(ScratchReg, RegState::Kill);
1511 if (!UseDForm)
1512 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1513 // Create MBBs of the loop.
1514 MachineFunction::iterator MBBInsertPoint =
1515 std::next(CurrentMBB->getIterator());
1516 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1517 MF.insert(MBBInsertPoint, LoopMBB);
1518 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1519 MF.insert(MBBInsertPoint, ExitMBB);
1520 // Synthesize the loop body.
1521 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1522 UseDForm, FPReg);
1523 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1524 .addMBB(LoopMBB);
1525 LoopMBB->addSuccessor(ExitMBB);
1526 LoopMBB->addSuccessor(LoopMBB);
1527 // Synthesize the exit MBB.
1528 ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1529 std::next(MachineBasicBlock::iterator(MI)),
1530 CurrentMBB->end());
1531 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1532 CurrentMBB->addSuccessor(LoopMBB);
1533 if (needsCFI) {
1534 // Restore using SPReg to calculate CFA.
1535 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1536 }
1537 // Update liveins.
1538 recomputeLiveIns(*LoopMBB);
1539 recomputeLiveIns(*ExitMBB);
1540 }
1541 }
1542 ++NumPrologProbed;
1543 MI.eraseFromParent();
1544}
1545
1547 MachineBasicBlock &MBB) const {
1549 DebugLoc dl;
1550
1551 if (MBBI != MBB.end())
1552 dl = MBBI->getDebugLoc();
1553
1554 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1555 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1556
1557 // Get alignment info so we know how to restore the SP.
1558 const MachineFrameInfo &MFI = MF.getFrameInfo();
1559
1560 // Get the number of bytes allocated from the FrameInfo.
1561 int64_t FrameSize = MFI.getStackSize();
1562
1563 // Get processor type.
1564 bool isPPC64 = Subtarget.isPPC64();
1565
1566 // Check if the link register (LR) has been saved.
1568 bool MustSaveLR = FI->mustSaveLR();
1569 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1570 bool MustSaveCR = !MustSaveCRs.empty();
1571 // Do we have a frame pointer and/or base pointer for this function?
1572 bool HasFP = hasFP(MF);
1573 bool HasBP = RegInfo->hasBasePointer(MF);
1574 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1575 bool HasROPProtect = Subtarget.hasROPProtect();
1576 bool HasPrivileged = Subtarget.hasPrivileged();
1577
1578 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1579 Register BPReg = RegInfo->getBaseRegister(MF);
1580 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1581 Register ScratchReg;
1582 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1583 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1584 : PPC::MTLR );
1585 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1586 : PPC::LWZ );
1587 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1588 : PPC::LIS );
1589 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1590 : PPC::OR );
1591 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1592 : PPC::ORI );
1593 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1594 : PPC::ADDI );
1595 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1596 : PPC::ADD4 );
1597 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1598 : PPC::LWZ);
1599 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1600 : PPC::MTOCRF);
1601 const MCInstrDesc &HashChk =
1602 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1603 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1604 int64_t LROffset = getReturnSaveOffset();
1605
1606 int64_t FPOffset = 0;
1607
1608 // Using the same bool variable as below to suppress compiler warnings.
1609 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1610 &TempReg);
1611 assert(SingleScratchReg &&
1612 "Could not find an available scratch register");
1613
1614 SingleScratchReg = ScratchReg == TempReg;
1615
1616 if (HasFP) {
1617 int FPIndex = FI->getFramePointerSaveIndex();
1618 assert(FPIndex && "No Frame Pointer Save Slot!");
1619 FPOffset = MFI.getObjectOffset(FPIndex);
1620 }
1621
1622 int64_t BPOffset = 0;
1623 if (HasBP) {
1624 int BPIndex = FI->getBasePointerSaveIndex();
1625 assert(BPIndex && "No Base Pointer Save Slot!");
1626 BPOffset = MFI.getObjectOffset(BPIndex);
1627 }
1628
1629 int64_t PBPOffset = 0;
1630 if (FI->usesPICBase()) {
1631 int PBPIndex = FI->getPICBasePointerSaveIndex();
1632 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1633 PBPOffset = MFI.getObjectOffset(PBPIndex);
1634 }
1635
1636 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1637
1638 if (IsReturnBlock) {
1639 unsigned RetOpcode = MBBI->getOpcode();
1640 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1641 RetOpcode == PPC::TCRETURNdi ||
1642 RetOpcode == PPC::TCRETURNai ||
1643 RetOpcode == PPC::TCRETURNri8 ||
1644 RetOpcode == PPC::TCRETURNdi8 ||
1645 RetOpcode == PPC::TCRETURNai8;
1646
1647 if (UsesTCRet) {
1648 int MaxTCRetDelta = FI->getTailCallSPDelta();
1649 MachineOperand &StackAdjust = MBBI->getOperand(1);
1650 assert(StackAdjust.isImm() && "Expecting immediate value.");
1651 // Adjust stack pointer.
1652 int StackAdj = StackAdjust.getImm();
1653 int Delta = StackAdj - MaxTCRetDelta;
1654 assert((Delta >= 0) && "Delta must be positive");
1655 if (MaxTCRetDelta>0)
1656 FrameSize += (StackAdj +Delta);
1657 else
1658 FrameSize += StackAdj;
1659 }
1660 }
1661
1662 // Frames of 32KB & larger require special handling because they cannot be
1663 // indexed into with a simple LD/LWZ immediate offset operand.
1664 bool isLargeFrame = !isInt<16>(FrameSize);
1665
1666 // On targets without red zone, the SP needs to be restored last, so that
1667 // all live contents of the stack frame are upwards of the SP. This means
1668 // that we cannot restore SP just now, since there may be more registers
1669 // to restore from the stack frame (e.g. R31). If the frame size is not
1670 // a simple immediate value, we will need a spare register to hold the
1671 // restored SP. If the frame size is known and small, we can simply adjust
1672 // the offsets of the registers to be restored, and still use SP to restore
1673 // them. In such case, the final update of SP will be to add the frame
1674 // size to it.
1675 // To simplify the code, set RBReg to the base register used to restore
1676 // values from the stack, and set SPAdd to the value that needs to be added
1677 // to the SP at the end. The default values are as if red zone was present.
1678 unsigned RBReg = SPReg;
1679 uint64_t SPAdd = 0;
1680
1681 // Check if we can move the stack update instruction up the epilogue
1682 // past the callee saves. This will allow the move to LR instruction
1683 // to be executed before the restores of the callee saves which means
1684 // that the callee saves can hide the latency from the MTLR instrcution.
1685 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1686 if (stackUpdateCanBeMoved(MF)) {
1687 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1688 for (CalleeSavedInfo CSI : Info) {
1689 // If the callee saved register is spilled to another register abort the
1690 // stack update movement.
1691 if (CSI.isSpilledToReg()) {
1692 StackUpdateLoc = MBBI;
1693 break;
1694 }
1695 int FrIdx = CSI.getFrameIdx();
1696 // If the frame index is not negative the callee saved info belongs to a
1697 // stack object that is not a fixed stack object. We ignore non-fixed
1698 // stack objects because we won't move the update of the stack pointer
1699 // past them.
1700 if (FrIdx >= 0)
1701 continue;
1702
1703 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1704 StackUpdateLoc--;
1705 else {
1706 // Abort the operation as we can't update all CSR restores.
1707 StackUpdateLoc = MBBI;
1708 break;
1709 }
1710 }
1711 }
1712
1713 if (FrameSize) {
1714 // In the prologue, the loaded (or persistent) stack pointer value is
1715 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1716 // zone add this offset back now.
1717
1718 // If the function has a base pointer, the stack pointer has been copied
1719 // to it so we can restore it by copying in the other direction.
1720 if (HasRedZone && HasBP) {
1721 BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1722 addReg(BPReg).
1723 addReg(BPReg);
1724 }
1725 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1726 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1727 // call which invalidates the stack pointer value in SP(0). So we use the
1728 // value of R31 in this case. Similar situation exists with setjmp.
1729 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1730 assert(HasFP && "Expecting a valid frame pointer.");
1731 if (!HasRedZone)
1732 RBReg = FPReg;
1733 if (!isLargeFrame) {
1734 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1735 .addReg(FPReg).addImm(FrameSize);
1736 } else {
1737 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1738 BuildMI(MBB, MBBI, dl, AddInst)
1739 .addReg(RBReg)
1740 .addReg(FPReg)
1741 .addReg(ScratchReg);
1742 }
1743 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1744 if (HasRedZone) {
1745 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1746 .addReg(SPReg)
1747 .addImm(FrameSize);
1748 } else {
1749 // Make sure that adding FrameSize will not overflow the max offset
1750 // size.
1751 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1752 "Local offsets should be negative");
1753 SPAdd = FrameSize;
1754 FPOffset += FrameSize;
1755 BPOffset += FrameSize;
1756 PBPOffset += FrameSize;
1757 }
1758 } else {
1759 // We don't want to use ScratchReg as a base register, because it
1760 // could happen to be R0. Use FP instead, but make sure to preserve it.
1761 if (!HasRedZone) {
1762 // If FP is not saved, copy it to ScratchReg.
1763 if (!HasFP)
1764 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1765 .addReg(FPReg)
1766 .addReg(FPReg);
1767 RBReg = FPReg;
1768 }
1769 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1770 .addImm(0)
1771 .addReg(SPReg);
1772 }
1773 }
1774 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1775 // If there is no red zone, ScratchReg may be needed for holding a useful
1776 // value (although not the base register). Make sure it is not overwritten
1777 // too early.
1778
1779 // If we need to restore both the LR and the CR and we only have one
1780 // available scratch register, we must do them one at a time.
1781 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1782 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1783 // is live here.
1784 assert(HasRedZone && "Expecting red zone");
1785 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1786 .addImm(CRSaveOffset)
1787 .addReg(SPReg);
1788 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1789 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1790 .addReg(TempReg, getKillRegState(i == e-1));
1791 }
1792
1793 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1794 // LR is stored in the caller's stack frame. ScratchReg will be needed
1795 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1796 // a base register anyway, because it may happen to be R0.
1797 bool LoadedLR = false;
1798 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1799 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1800 .addImm(LROffset+SPAdd)
1801 .addReg(RBReg);
1802 LoadedLR = true;
1803 }
1804
1805 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1806 assert(RBReg == SPReg && "Should be using SP as a base register");
1807 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1808 .addImm(CRSaveOffset)
1809 .addReg(RBReg);
1810 }
1811
1812 if (HasFP) {
1813 // If there is red zone, restore FP directly, since SP has already been
1814 // restored. Otherwise, restore the value of FP into ScratchReg.
1815 if (HasRedZone || RBReg == SPReg)
1816 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1817 .addImm(FPOffset)
1818 .addReg(SPReg);
1819 else
1820 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1821 .addImm(FPOffset)
1822 .addReg(RBReg);
1823 }
1824
1825 if (FI->usesPICBase())
1826 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1827 .addImm(PBPOffset)
1828 .addReg(RBReg);
1829
1830 if (HasBP)
1831 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1832 .addImm(BPOffset)
1833 .addReg(RBReg);
1834
1835 // There is nothing more to be loaded from the stack, so now we can
1836 // restore SP: SP = RBReg + SPAdd.
1837 if (RBReg != SPReg || SPAdd != 0) {
1838 assert(!HasRedZone && "This should not happen with red zone");
1839 // If SPAdd is 0, generate a copy.
1840 if (SPAdd == 0)
1841 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1842 .addReg(RBReg)
1843 .addReg(RBReg);
1844 else
1845 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1846 .addReg(RBReg)
1847 .addImm(SPAdd);
1848
1849 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1850 if (RBReg == FPReg)
1851 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1852 .addReg(ScratchReg)
1853 .addReg(ScratchReg);
1854
1855 // Now load the LR from the caller's stack frame.
1856 if (MustSaveLR && !LoadedLR)
1857 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1858 .addImm(LROffset)
1859 .addReg(SPReg);
1860 }
1861
1862 if (MustSaveCR &&
1863 !(SingleScratchReg && MustSaveLR))
1864 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1865 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1866 .addReg(TempReg, getKillRegState(i == e-1));
1867
1868 if (MustSaveLR) {
1869 // If ROP protection is required, an extra instruction is added to compute a
1870 // hash and then compare it to the hash stored in the prologue.
1871 if (HasROPProtect) {
1872 const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1873 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1874 assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1875 "ROP hash check location offset out of range.");
1876 assert(((ImmOffset & 0x7) == 0) &&
1877 "ROP hash check location offset must be 8 byte aligned.");
1878 BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1879 .addReg(ScratchReg)
1880 .addImm(ImmOffset)
1881 .addReg(SPReg);
1882 }
1883 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1884 }
1885
1886 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1887 // call optimization
1888 if (IsReturnBlock) {
1889 unsigned RetOpcode = MBBI->getOpcode();
1891 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1894 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1895
1896 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1897 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1898 .addReg(SPReg).addImm(CallerAllocatedAmt);
1899 } else {
1900 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1901 .addImm(CallerAllocatedAmt >> 16);
1902 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1903 .addReg(ScratchReg, RegState::Kill)
1904 .addImm(CallerAllocatedAmt & 0xFFFF);
1905 BuildMI(MBB, MBBI, dl, AddInst)
1906 .addReg(SPReg)
1907 .addReg(FPReg)
1908 .addReg(ScratchReg);
1909 }
1910 } else {
1911 createTailCallBranchInstr(MBB);
1912 }
1913 }
1914}
1915
1916void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1918
1919 // If we got this far a first terminator should exist.
1920 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1921
1922 DebugLoc dl = MBBI->getDebugLoc();
1923 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1924
1925 // Create branch instruction for pseudo tail call return instruction.
1926 // The TCRETURNdi variants are direct calls. Valid targets for those are
1927 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1928 // since we can tail call external functions with PC-Rel (i.e. we don't need
1929 // to worry about different TOC pointers). Some of the external functions will
1930 // be MO_GlobalAddress while others like memcpy for example, are going to
1931 // be MO_ExternalSymbol.
1932 unsigned RetOpcode = MBBI->getOpcode();
1933 if (RetOpcode == PPC::TCRETURNdi) {
1935 MachineOperand &JumpTarget = MBBI->getOperand(0);
1936 if (JumpTarget.isGlobal())
1937 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1938 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1939 else if (JumpTarget.isSymbol())
1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1941 addExternalSymbol(JumpTarget.getSymbolName());
1942 else
1943 llvm_unreachable("Expecting Global or External Symbol");
1944 } else if (RetOpcode == PPC::TCRETURNri) {
1946 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1947 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1948 } else if (RetOpcode == PPC::TCRETURNai) {
1950 MachineOperand &JumpTarget = MBBI->getOperand(0);
1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1952 } else if (RetOpcode == PPC::TCRETURNdi8) {
1954 MachineOperand &JumpTarget = MBBI->getOperand(0);
1955 if (JumpTarget.isGlobal())
1956 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1957 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1958 else if (JumpTarget.isSymbol())
1959 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1960 addExternalSymbol(JumpTarget.getSymbolName());
1961 else
1962 llvm_unreachable("Expecting Global or External Symbol");
1963 } else if (RetOpcode == PPC::TCRETURNri8) {
1965 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1966 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1967 } else if (RetOpcode == PPC::TCRETURNai8) {
1969 MachineOperand &JumpTarget = MBBI->getOperand(0);
1970 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1971 }
1972}
1973
1975 BitVector &SavedRegs,
1976 RegScavenger *RS) const {
1978
1979 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1980
1981 // Do not explicitly save the callee saved VSRp registers.
1982 // The individual VSR subregisters will be saved instead.
1983 SavedRegs.reset(PPC::VSRp26);
1984 SavedRegs.reset(PPC::VSRp27);
1985 SavedRegs.reset(PPC::VSRp28);
1986 SavedRegs.reset(PPC::VSRp29);
1987 SavedRegs.reset(PPC::VSRp30);
1988 SavedRegs.reset(PPC::VSRp31);
1989
1990 // Save and clear the LR state.
1992 unsigned LR = RegInfo->getRARegister();
1993 FI->setMustSaveLR(MustSaveLR(MF, LR));
1994 SavedRegs.reset(LR);
1995
1996 // Save R31 if necessary
1997 int FPSI = FI->getFramePointerSaveIndex();
1998 const bool isPPC64 = Subtarget.isPPC64();
1999 MachineFrameInfo &MFI = MF.getFrameInfo();
2000
2001 // If the frame pointer save index hasn't been defined yet.
2002 if (!FPSI && needsFP(MF)) {
2003 // Find out what the fix offset of the frame pointer save area.
2004 int FPOffset = getFramePointerSaveOffset();
2005 // Allocate the frame index for frame pointer save area.
2006 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2007 // Save the result.
2008 FI->setFramePointerSaveIndex(FPSI);
2009 }
2010
2011 int BPSI = FI->getBasePointerSaveIndex();
2012 if (!BPSI && RegInfo->hasBasePointer(MF)) {
2013 int BPOffset = getBasePointerSaveOffset();
2014 // Allocate the frame index for the base pointer save area.
2015 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2016 // Save the result.
2017 FI->setBasePointerSaveIndex(BPSI);
2018 }
2019
2020 // Reserve stack space for the PIC Base register (R30).
2021 // Only used in SVR4 32-bit.
2022 if (FI->usesPICBase()) {
2023 int PBPSI = MFI.CreateFixedObject(4, -8, true);
2024 FI->setPICBasePointerSaveIndex(PBPSI);
2025 }
2026
2027 // Make sure we don't explicitly spill r31, because, for example, we have
2028 // some inline asm which explicitly clobbers it, when we otherwise have a
2029 // frame pointer and are using r31's spill slot for the prologue/epilogue
2030 // code. Same goes for the base pointer and the PIC base register.
2031 if (needsFP(MF))
2032 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2033 if (RegInfo->hasBasePointer(MF))
2034 SavedRegs.reset(RegInfo->getBaseRegister(MF));
2035 if (FI->usesPICBase())
2036 SavedRegs.reset(PPC::R30);
2037
2038 // Reserve stack space to move the linkage area to in case of a tail call.
2039 int TCSPDelta = 0;
2041 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2042 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2043 }
2044
2045 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2046 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2047 // object at the offset of the CR-save slot in the linkage area. The actual
2048 // save and restore of the condition register will be created as part of the
2049 // prologue and epilogue insertion, but the FixedStack object is needed to
2050 // keep the CalleSavedInfo valid.
2051 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2052 SavedRegs.test(PPC::CR4))) {
2053 const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2054 const int64_t SpillOffset =
2055 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2056 int FrameIdx =
2057 MFI.CreateFixedObject(SpillSize, SpillOffset,
2058 /* IsImmutable */ true, /* IsAliased */ false);
2059 FI->setCRSpillFrameIndex(FrameIdx);
2060 }
2061}
2062
2064 RegScavenger *RS) const {
2065 // Get callee saved register information.
2066 MachineFrameInfo &MFI = MF.getFrameInfo();
2067 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2068
2069 // If the function is shrink-wrapped, and if the function has a tail call, the
2070 // tail call might not be in the new RestoreBlock, so real branch instruction
2071 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2072 // RestoreBlock. So we handle this case here.
2073 if (MFI.getSavePoint() && MFI.hasTailCall()) {
2074 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2075 for (MachineBasicBlock &MBB : MF) {
2076 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2077 createTailCallBranchInstr(MBB);
2078 }
2079 }
2080
2081 // Early exit if no callee saved registers are modified!
2082 if (CSI.empty() && !needsFP(MF)) {
2083 addScavengingSpillSlot(MF, RS);
2084 return;
2085 }
2086
2087 unsigned MinGPR = PPC::R31;
2088 unsigned MinG8R = PPC::X31;
2089 unsigned MinFPR = PPC::F31;
2090 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2091
2092 bool HasGPSaveArea = false;
2093 bool HasG8SaveArea = false;
2094 bool HasFPSaveArea = false;
2095 bool HasVRSaveArea = false;
2096
2101
2102 for (const CalleeSavedInfo &I : CSI) {
2103 Register Reg = I.getReg();
2105 (Reg != PPC::X2 && Reg != PPC::R2)) &&
2106 "Not expecting to try to spill R2 in a function that must save TOC");
2107 if (PPC::GPRCRegClass.contains(Reg)) {
2108 HasGPSaveArea = true;
2109
2110 GPRegs.push_back(I);
2111
2112 if (Reg < MinGPR) {
2113 MinGPR = Reg;
2114 }
2115 } else if (PPC::G8RCRegClass.contains(Reg)) {
2116 HasG8SaveArea = true;
2117
2118 G8Regs.push_back(I);
2119
2120 if (Reg < MinG8R) {
2121 MinG8R = Reg;
2122 }
2123 } else if (PPC::F8RCRegClass.contains(Reg)) {
2124 HasFPSaveArea = true;
2125
2126 FPRegs.push_back(I);
2127
2128 if (Reg < MinFPR) {
2129 MinFPR = Reg;
2130 }
2131 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2132 PPC::CRRCRegClass.contains(Reg)) {
2133 ; // do nothing, as we already know whether CRs are spilled
2134 } else if (PPC::VRRCRegClass.contains(Reg) ||
2135 PPC::SPERCRegClass.contains(Reg)) {
2136 // Altivec and SPE are mutually exclusive, but have the same stack
2137 // alignment requirements, so overload the save area for both cases.
2138 HasVRSaveArea = true;
2139
2140 VRegs.push_back(I);
2141
2142 if (Reg < MinVR) {
2143 MinVR = Reg;
2144 }
2145 } else {
2146 llvm_unreachable("Unknown RegisterClass!");
2147 }
2148 }
2149
2151 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2152
2153 int64_t LowerBound = 0;
2154
2155 // Take into account stack space reserved for tail calls.
2156 int TCSPDelta = 0;
2158 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2159 LowerBound = TCSPDelta;
2160 }
2161
2162 // The Floating-point register save area is right below the back chain word
2163 // of the previous stack frame.
2164 if (HasFPSaveArea) {
2165 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2166 int FI = FPRegs[i].getFrameIdx();
2167
2168 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2169 }
2170
2171 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2172 }
2173
2174 // Check whether the frame pointer register is allocated. If so, make sure it
2175 // is spilled to the correct offset.
2176 if (needsFP(MF)) {
2177 int FI = PFI->getFramePointerSaveIndex();
2178 assert(FI && "No Frame Pointer Save Slot!");
2179 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2180 // FP is R31/X31, so no need to update MinGPR/MinG8R.
2181 HasGPSaveArea = true;
2182 }
2183
2184 if (PFI->usesPICBase()) {
2185 int FI = PFI->getPICBasePointerSaveIndex();
2186 assert(FI && "No PIC Base Pointer Save Slot!");
2187 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2188
2189 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2190 HasGPSaveArea = true;
2191 }
2192
2193 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2194 if (RegInfo->hasBasePointer(MF)) {
2195 int FI = PFI->getBasePointerSaveIndex();
2196 assert(FI && "No Base Pointer Save Slot!");
2197 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2198
2199 Register BP = RegInfo->getBaseRegister(MF);
2200 if (PPC::G8RCRegClass.contains(BP)) {
2201 MinG8R = std::min<unsigned>(MinG8R, BP);
2202 HasG8SaveArea = true;
2203 } else if (PPC::GPRCRegClass.contains(BP)) {
2204 MinGPR = std::min<unsigned>(MinGPR, BP);
2205 HasGPSaveArea = true;
2206 }
2207 }
2208
2209 // General register save area starts right below the Floating-point
2210 // register save area.
2211 if (HasGPSaveArea || HasG8SaveArea) {
2212 // Move general register save area spill slots down, taking into account
2213 // the size of the Floating-point register save area.
2214 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2215 if (!GPRegs[i].isSpilledToReg()) {
2216 int FI = GPRegs[i].getFrameIdx();
2217 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2218 }
2219 }
2220
2221 // Move general register save area spill slots down, taking into account
2222 // the size of the Floating-point register save area.
2223 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2224 if (!G8Regs[i].isSpilledToReg()) {
2225 int FI = G8Regs[i].getFrameIdx();
2226 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2227 }
2228 }
2229
2230 unsigned MinReg =
2231 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2232 TRI->getEncodingValue(MinG8R));
2233
2234 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2235 LowerBound -= (31 - MinReg + 1) * GPRegSize;
2236 }
2237
2238 // For 32-bit only, the CR save area is below the general register
2239 // save area. For 64-bit SVR4, the CR save area is addressed relative
2240 // to the stack pointer and hence does not need an adjustment here.
2241 // Only CR2 (the first nonvolatile spilled) has an associated frame
2242 // index so that we have a single uniform save area.
2243 if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2244 // Adjust the frame index of the CR spill slot.
2245 for (const auto &CSInfo : CSI) {
2246 if (CSInfo.getReg() == PPC::CR2) {
2247 int FI = CSInfo.getFrameIdx();
2248 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2249 break;
2250 }
2251 }
2252
2253 LowerBound -= 4; // The CR save area is always 4 bytes long.
2254 }
2255
2256 // Both Altivec and SPE have the same alignment and padding requirements
2257 // within the stack frame.
2258 if (HasVRSaveArea) {
2259 // Insert alignment padding, we need 16-byte alignment. Note: for positive
2260 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2261 // we are using negative number here (the stack grows downward). We should
2262 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2263 // is the alignment size ( n = 16 here) and y is the size after aligning.
2264 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2265 LowerBound &= ~(15);
2266
2267 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2268 int FI = VRegs[i].getFrameIdx();
2269
2270 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2271 }
2272 }
2273
2274 addScavengingSpillSlot(MF, RS);
2275}
2276
2277void
2279 RegScavenger *RS) const {
2280 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2281 // a large stack, which will require scavenging a register to materialize a
2282 // large offset.
2283
2284 // We need to have a scavenger spill slot for spills if the frame size is
2285 // large. In case there is no free register for large-offset addressing,
2286 // this slot is used for the necessary emergency spill. Also, we need the
2287 // slot for dynamic stack allocations.
2288
2289 // The scavenger might be invoked if the frame offset does not fit into
2290 // the 16-bit immediate. We don't know the complete frame size here
2291 // because we've not yet computed callee-saved register spills or the
2292 // needed alignment padding.
2293 unsigned StackSize = determineFrameLayout(MF, true);
2294 MachineFrameInfo &MFI = MF.getFrameInfo();
2295 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2296 (hasSpills(MF) && !isInt<16>(StackSize))) {
2297 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2298 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2299 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2300 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2301 unsigned Size = TRI.getSpillSize(RC);
2302 Align Alignment = TRI.getSpillAlign(RC);
2303 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2304
2305 // Might we have over-aligned allocas?
2306 bool HasAlVars =
2307 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2308
2309 // These kinds of spills might need two registers.
2310 if (spillsCR(MF) || HasAlVars)
2312 MFI.CreateStackObject(Size, Alignment, false));
2313 }
2314}
2315
2316// This function checks if a callee saved gpr can be spilled to a volatile
2317// vector register. This occurs for leaf functions when the option
2318// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2319// which were not spilled to vectors, return false so the target independent
2320// code can handle them by assigning a FrameIdx to a stack slot.
2323 std::vector<CalleeSavedInfo> &CSI) const {
2324
2325 if (CSI.empty())
2326 return true; // Early exit if no callee saved registers are modified!
2327
2328 // Early exit if cannot spill gprs to volatile vector registers.
2329 MachineFrameInfo &MFI = MF.getFrameInfo();
2330 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2331 return false;
2332
2333 // Build a BitVector of VSRs that can be used for spilling GPRs.
2334 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2335 BitVector BVCalleeSaved(TRI->getNumRegs());
2336 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2337 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2338 for (unsigned i = 0; CSRegs[i]; ++i)
2339 BVCalleeSaved.set(CSRegs[i]);
2340
2341 for (unsigned Reg : BVAllocatable.set_bits()) {
2342 // Set to 0 if the register is not a volatile VSX register, or if it is
2343 // used in the function.
2344 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2345 MF.getRegInfo().isPhysRegUsed(Reg))
2346 BVAllocatable.reset(Reg);
2347 }
2348
2349 bool AllSpilledToReg = true;
2350 unsigned LastVSRUsedForSpill = 0;
2351 for (auto &CS : CSI) {
2352 if (BVAllocatable.none())
2353 return false;
2354
2355 Register Reg = CS.getReg();
2356
2357 if (!PPC::G8RCRegClass.contains(Reg)) {
2358 AllSpilledToReg = false;
2359 continue;
2360 }
2361
2362 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2363 // into one VSR using the mtvsrdd instruction.
2364 if (LastVSRUsedForSpill != 0) {
2365 CS.setDstReg(LastVSRUsedForSpill);
2366 BVAllocatable.reset(LastVSRUsedForSpill);
2367 LastVSRUsedForSpill = 0;
2368 continue;
2369 }
2370
2371 unsigned VolatileVFReg = BVAllocatable.find_first();
2372 if (VolatileVFReg < BVAllocatable.size()) {
2373 CS.setDstReg(VolatileVFReg);
2374 LastVSRUsedForSpill = VolatileVFReg;
2375 } else {
2376 AllSpilledToReg = false;
2377 }
2378 }
2379 return AllSpilledToReg;
2380}
2381
2385
2387 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2389 bool MustSaveTOC = FI->mustSaveTOC();
2390 DebugLoc DL;
2391 bool CRSpilled = false;
2392 MachineInstrBuilder CRMIB;
2393 BitVector Spilled(TRI->getNumRegs());
2394
2395 VSRContainingGPRs.clear();
2396
2397 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2398 // or two GPRs, so we need table to record information for later save/restore.
2399 for (const CalleeSavedInfo &Info : CSI) {
2400 if (Info.isSpilledToReg()) {
2401 auto &SpilledVSR =
2402 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2403 assert(SpilledVSR.second == 0 &&
2404 "Can't spill more than two GPRs into VSR!");
2405 if (SpilledVSR.first == 0)
2406 SpilledVSR.first = Info.getReg();
2407 else
2408 SpilledVSR.second = Info.getReg();
2409 }
2410 }
2411
2412 for (const CalleeSavedInfo &I : CSI) {
2413 Register Reg = I.getReg();
2414
2415 // CR2 through CR4 are the nonvolatile CR fields.
2416 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2417
2418 // Add the callee-saved register as live-in; it's killed at the spill.
2419 // Do not do this for callee-saved registers that are live-in to the
2420 // function because they will already be marked live-in and this will be
2421 // adding it for a second time. It is an error to add the same register
2422 // to the set more than once.
2423 const MachineRegisterInfo &MRI = MF->getRegInfo();
2424 bool IsLiveIn = MRI.isLiveIn(Reg);
2425 if (!IsLiveIn)
2426 MBB.addLiveIn(Reg);
2427
2428 if (CRSpilled && IsCRField) {
2429 CRMIB.addReg(Reg, RegState::ImplicitKill);
2430 continue;
2431 }
2432
2433 // The actual spill will happen in the prologue.
2434 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2435 continue;
2436
2437 // Insert the spill to the stack frame.
2438 if (IsCRField) {
2439 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2440 if (!Subtarget.is32BitELFABI()) {
2441 // The actual spill will happen at the start of the prologue.
2442 FuncInfo->addMustSaveCR(Reg);
2443 } else {
2444 CRSpilled = true;
2445 FuncInfo->setSpillsCR();
2446
2447 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2448 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2449 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2451
2452 MBB.insert(MI, CRMIB);
2453 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2454 .addReg(PPC::R12,
2455 getKillRegState(true)),
2456 I.getFrameIdx()));
2457 }
2458 } else {
2459 if (I.isSpilledToReg()) {
2460 unsigned Dst = I.getDstReg();
2461
2462 if (Spilled[Dst])
2463 continue;
2464
2465 if (VSRContainingGPRs[Dst].second != 0) {
2466 assert(Subtarget.hasP9Vector() &&
2467 "mtvsrdd is unavailable on pre-P9 targets.");
2468
2469 NumPESpillVSR += 2;
2470 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2471 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2472 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2473 } else if (VSRContainingGPRs[Dst].second == 0) {
2474 assert(Subtarget.hasP8Vector() &&
2475 "Can't move GPR to VSR on pre-P8 targets.");
2476
2477 ++NumPESpillVSR;
2478 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2479 TRI->getSubReg(Dst, PPC::sub_64))
2480 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2481 } else {
2482 llvm_unreachable("More than two GPRs spilled to a VSR!");
2483 }
2484 Spilled.set(Dst);
2485 } else {
2486 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2487 // Use !IsLiveIn for the kill flag.
2488 // We do not want to kill registers that are live in this function
2489 // before their use because they will become undefined registers.
2490 // Functions without NoUnwind need to preserve the order of elements in
2491 // saved vector registers.
2492 if (Subtarget.needsSwapsForVSXMemOps() &&
2493 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2494 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2495 I.getFrameIdx(), RC, TRI);
2496 else
2497 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2498 TRI, Register());
2499 }
2500 }
2501 }
2502 return true;
2503}
2504
2505static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2506 bool CR4Spilled, MachineBasicBlock &MBB,
2508 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2509
2511 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2512 DebugLoc DL;
2513 unsigned MoveReg = PPC::R12;
2514
2515 // 32-bit: FP-relative
2516 MBB.insert(MI,
2517 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2518 CSI[CSIIndex].getFrameIdx()));
2519
2520 unsigned RestoreOp = PPC::MTOCRF;
2521 if (CR2Spilled)
2522 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2523 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2524
2525 if (CR3Spilled)
2526 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2527 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2528
2529 if (CR4Spilled)
2530 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2531 .addReg(MoveReg, getKillRegState(true)));
2532}
2533
2537 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2539 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2540 // Add (actually subtract) back the amount the callee popped on return.
2541 if (int CalleeAmt = I->getOperand(1).getImm()) {
2542 bool is64Bit = Subtarget.isPPC64();
2543 CalleeAmt *= -1;
2544 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2545 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2546 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2547 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2548 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2549 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2550 const DebugLoc &dl = I->getDebugLoc();
2551
2552 if (isInt<16>(CalleeAmt)) {
2553 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2554 .addReg(StackReg, RegState::Kill)
2555 .addImm(CalleeAmt);
2556 } else {
2558 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2559 .addImm(CalleeAmt >> 16);
2560 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2561 .addReg(TmpReg, RegState::Kill)
2562 .addImm(CalleeAmt & 0xFFFF);
2563 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2564 .addReg(StackReg, RegState::Kill)
2565 .addReg(TmpReg);
2566 }
2567 }
2568 }
2569 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2570 return MBB.erase(I);
2571}
2572
2573static bool isCalleeSavedCR(unsigned Reg) {
2574 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2575}
2576
2581 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2583 bool MustSaveTOC = FI->mustSaveTOC();
2584 bool CR2Spilled = false;
2585 bool CR3Spilled = false;
2586 bool CR4Spilled = false;
2587 unsigned CSIIndex = 0;
2588 BitVector Restored(TRI->getNumRegs());
2589
2590 // Initialize insertion-point logic; we will be restoring in reverse
2591 // order of spill.
2592 MachineBasicBlock::iterator I = MI, BeforeI = I;
2593 bool AtStart = I == MBB.begin();
2594
2595 if (!AtStart)
2596 --BeforeI;
2597
2598 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2599 Register Reg = CSI[i].getReg();
2600
2601 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2602 continue;
2603
2604 // Restore of callee saved condition register field is handled during
2605 // epilogue insertion.
2606 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2607 continue;
2608
2609 if (Reg == PPC::CR2) {
2610 CR2Spilled = true;
2611 // The spill slot is associated only with CR2, which is the
2612 // first nonvolatile spilled. Save it here.
2613 CSIIndex = i;
2614 continue;
2615 } else if (Reg == PPC::CR3) {
2616 CR3Spilled = true;
2617 continue;
2618 } else if (Reg == PPC::CR4) {
2619 CR4Spilled = true;
2620 continue;
2621 } else {
2622 // On 32-bit ELF when we first encounter a non-CR register after seeing at
2623 // least one CR register, restore all spilled CRs together.
2624 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2625 bool is31 = needsFP(*MF);
2626 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2627 CSIIndex);
2628 CR2Spilled = CR3Spilled = CR4Spilled = false;
2629 }
2630
2631 if (CSI[i].isSpilledToReg()) {
2632 DebugLoc DL;
2633 unsigned Dst = CSI[i].getDstReg();
2634
2635 if (Restored[Dst])
2636 continue;
2637
2638 if (VSRContainingGPRs[Dst].second != 0) {
2639 assert(Subtarget.hasP9Vector());
2640 NumPEReloadVSR += 2;
2641 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2642 VSRContainingGPRs[Dst].second)
2643 .addReg(Dst);
2644 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2645 VSRContainingGPRs[Dst].first)
2646 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2647 } else if (VSRContainingGPRs[Dst].second == 0) {
2648 assert(Subtarget.hasP8Vector());
2649 ++NumPEReloadVSR;
2650 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2651 VSRContainingGPRs[Dst].first)
2652 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2653 } else {
2654 llvm_unreachable("More than two GPRs spilled to a VSR!");
2655 }
2656
2657 Restored.set(Dst);
2658
2659 } else {
2660 // Default behavior for non-CR saves.
2661 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2662
2663 // Functions without NoUnwind need to preserve the order of elements in
2664 // saved vector registers.
2665 if (Subtarget.needsSwapsForVSXMemOps() &&
2666 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2667 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2668 TRI);
2669 else
2670 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2671 Register());
2672
2673 assert(I != MBB.begin() &&
2674 "loadRegFromStackSlot didn't insert any code!");
2675 }
2676 }
2677
2678 // Insert in reverse order.
2679 if (AtStart)
2680 I = MBB.begin();
2681 else {
2682 I = BeforeI;
2683 ++I;
2684 }
2685 }
2686
2687 // If we haven't yet spilled the CRs, do so now.
2688 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2689 assert(Subtarget.is32BitELFABI() &&
2690 "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2691 bool is31 = needsFP(*MF);
2692 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2693 }
2694
2695 return true;
2696}
2697
2699 return TOCSaveOffset;
2700}
2701
2703 return FramePointerSaveOffset;
2704}
2705
2707 return BasePointerSaveOffset;
2708}
2709
2712 return false;
2713 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2714}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define CALLEE_SAVED_VRS
static bool hasSpills(const MachineFunction &MF)
static unsigned computeCRSaveOffset(const PPCSubtarget &STI)
static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, unsigned CSIIndex)
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI)
static bool MustSaveLR(const MachineFunction &MF, unsigned LR)
MustSaveLR - Return true if this function requires that we save the LR register onto the stack in the...
#define CALLEE_SAVED_FPRS
static cl::opt< bool > EnablePEVectorSpills("ppc-enable-pe-vector-spills", cl::desc("Enable spills in prologue to vector registers."), cl::init(false), cl::Hidden)
#define CALLEE_SAVED_GPRS32
#define CALLEE_SAVED_GPRS64
static unsigned computeLinkageSize(const PPCSubtarget &STI)
static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI)
static bool isCalleeSavedCR(unsigned Reg)
static unsigned computeTOCSaveOffset(const PPCSubtarget &STI)
static bool hasNonRISpills(const MachineFunction &MF)
static bool spillsCR(const MachineFunction &MF)
static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void buildDefCFAReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned Reg, const SystemZInstrInfo *ZII)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
static bool is64Bit(const char *name)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
bool test(unsigned Idx) const
Definition: BitVector.h:454
BitVector & reset()
Definition: BitVector.h:385
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:293
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:155
BitVector & set()
Definition: BitVector.h:344
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:301
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:181
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:152
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
An instruction for reading from memory.
Definition: Instructions.h:177
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset)
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:547
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register)
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:540
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset)
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:533
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset)
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:571
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2)
.cfi_register Previous value of Register1 is saved in register Register2.
Definition: MCDwarf.h:586
const MCRegisterInfo * getRegisterInfo() const
Definition: MCContext.h:448
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
bool isReturnBlock() const
Convenience function that returns true if the block ends in a return instruction.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool adjustsStack() const
Return true if this function adjusts the stack – e.g., when calling another function.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
void setObjectOffset(int ObjectIdx, int64_t SPOffset)
Set the stack frame offset of the specified object.
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
MachineBasicBlock * getRestorePoint() const
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
bool hasTailCall() const
Returns true if the function contains a tail call.
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineBasicBlock * getSavePoint() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
int64_t getOffset() const
Return the offset from the symbol in this operand.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
def_iterator def_begin(Register RegNo) const
static def_iterator def_end()
bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
bool needsFP(const MachineFunction &MF) const
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override
Check whether or not the given MBB can be used as a epilogue for the target.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override
Methods used by shrink wrapping to determine if MBB can be used for the function prologue/epilogue.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
void replaceFPWithRealFP(MachineFunction &MF) const
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
uint64_t determineFrameLayout(const MachineFunction &MF, bool UseEstimate=false, unsigned *NewMaxCallFrameSize=nullptr) const
Determine the frame layout but do not update the machine function.
void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const
PPCFrameLowering(const PPCSubtarget &STI)
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
This function will assign callee saved gprs to volatile vector registers for prologue spills when app...
uint64_t determineFrameLayoutAndUpdate(MachineFunction &MF, bool UseEstimate=false) const
Determine the frame layout and update the machine function.
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const override
Replace a StackProbe stub (if any) with the actual probe code inline.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
uint64_t getBasePointerSaveOffset() const
getBasePointerSaveOffset - Return the previous frame offset to save the base pointer.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
const SmallVectorImpl< Register > & getMustSaveCRs() const
void addMustSaveCR(Register Reg)
void setPICBasePointerSaveIndex(int Idx)
unsigned getMinReservedArea() const
void setMustSaveLR(bool U)
MustSaveLR - This is set when the prolog/epilog inserter does its initial scan of the function.
void setFramePointerSaveIndex(int Idx)
bool hasBasePointer(const MachineFunction &MF) const
Register getBaseRegister(const MachineFunction &MF) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:146
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
unsigned getRedZoneSize() const
Definition: PPCSubtarget.h:192
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool hasInlineStackProbe(const MachineFunction &MF) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void enterBasicBlock(MachineBasicBlock &MBB)
Start tracking liveness from the begin of basic block MBB.
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
BitVector getRegsAvailable(const TargetRegisterClass *RC)
Return all available registers in the register class in Mask.
void forward()
Move the internal MBB iterator and update register states.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
TargetOptions Options
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Kill
The last use of a register.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
Definition: LivePhysRegs.h:198
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1809
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39