LLVM 23.0.0git
MipsDelaySlotFiller.cpp
Go to the documentation of this file.
1//===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Simple pass to fill delay slots with useful instructions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Mips.h"
14#include "MipsInstrInfo.h"
15#include "MipsSubtarget.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/StringRef.h"
36#include "llvm/MC/MCInstrDesc.h"
43#include <cassert>
44#include <iterator>
45#include <memory>
46#include <utility>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "mips-delay-slot-filler"
51
52STATISTIC(FilledSlots, "Number of delay slots filled");
53STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
54 " are not NOP.");
55STATISTIC(R5900ShortLoopNops, "Number of delay slots left as NOP for R5900 "
56 "short loop fix");
57
59 "disable-mips-delay-filler",
60 cl::init(false),
61 cl::desc("Fill all delay slots with NOPs."),
63
65 "disable-mips-df-forward-search",
66 cl::init(true),
67 cl::desc("Disallow MIPS delay filler to search forward."),
69
71 "disable-mips-df-succbb-search",
72 cl::init(true),
73 cl::desc("Disallow MIPS delay filler to search successor basic blocks."),
75
77 "disable-mips-df-backward-search",
78 cl::init(false),
79 cl::desc("Disallow MIPS delay filler to search backward."),
81
83
84namespace {
85
86 using Iter = MachineBasicBlock::iterator;
87 using ReverseIter = MachineBasicBlock::reverse_iterator;
89
90 class RegDefsUses {
91 public:
92 RegDefsUses(const TargetRegisterInfo &TRI);
93
94 void init(const MachineInstr &MI);
95
96 /// This function sets all caller-saved registers in Defs.
97 void setCallerSaved(const MachineInstr &MI);
98
99 /// This function sets all unallocatable registers in Defs.
100 void setUnallocatableRegs(const MachineFunction &MF);
101
102 /// Set bits in Uses corresponding to MBB's live-out registers except for
103 /// the registers that are live-in to SuccBB.
104 void addLiveOut(const MachineBasicBlock &MBB,
105 const MachineBasicBlock &SuccBB);
106
107 bool update(const MachineInstr &MI, unsigned Begin, unsigned End);
108
109 private:
110 bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg,
111 bool IsDef) const;
112
113 /// Returns true if Reg or its alias is in RegSet.
114 bool isRegInSet(const BitVector &RegSet, unsigned Reg) const;
115
116 const TargetRegisterInfo &TRI;
117 BitVector Defs, Uses;
118 };
119
120 /// Base class for inspecting loads and stores.
121 class InspectMemInstr {
122 public:
123 InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {}
124 virtual ~InspectMemInstr() = default;
125
126 /// Return true if MI cannot be moved to delay slot.
127 bool hasHazard(const MachineInstr &MI);
128
129 protected:
130 /// Flags indicating whether loads or stores have been seen.
131 bool OrigSeenLoad = false;
132 bool OrigSeenStore = false;
133 bool SeenLoad = false;
134 bool SeenStore = false;
135
136 /// Memory instructions are not allowed to move to delay slot if this flag
137 /// is true.
138 bool ForbidMemInstr;
139
140 private:
141 virtual bool hasHazard_(const MachineInstr &MI) = 0;
142 };
143
144 /// This subclass rejects any memory instructions.
145 class NoMemInstr : public InspectMemInstr {
146 public:
147 NoMemInstr() : InspectMemInstr(true) {}
148
149 private:
150 bool hasHazard_(const MachineInstr &MI) override { return true; }
151 };
152
153 /// This subclass accepts loads from stacks and constant loads.
154 class LoadFromStackOrConst : public InspectMemInstr {
155 public:
156 LoadFromStackOrConst() : InspectMemInstr(false) {}
157
158 private:
159 bool hasHazard_(const MachineInstr &MI) override;
160 };
161
162 /// This subclass uses memory dependence information to determine whether a
163 /// memory instruction can be moved to a delay slot.
164 class MemDefsUses : public InspectMemInstr {
165 public:
166 explicit MemDefsUses(const MachineFrameInfo *MFI);
167
168 private:
169 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
170
171 bool hasHazard_(const MachineInstr &MI) override;
172
173 /// Update Defs and Uses. Return true if there exist dependences that
174 /// disqualify the delay slot candidate between V and values in Uses and
175 /// Defs.
176 bool updateDefsUses(ValueType V, bool MayStore);
177
178 /// Get the list of underlying objects of MI's memory operand.
179 bool getUnderlyingObjects(const MachineInstr &MI,
180 SmallVectorImpl<ValueType> &Objects) const;
181
182 const MachineFrameInfo *MFI;
183 SmallPtrSet<ValueType, 4> Uses, Defs;
184
185 /// Flags indicating whether loads or stores with no underlying objects have
186 /// been seen.
187 bool SeenNoObjLoad = false;
188 bool SeenNoObjStore = false;
189 };
190
191 class MipsDelaySlotFiller : public MachineFunctionPass {
192 public:
193 MipsDelaySlotFiller() : MachineFunctionPass(ID) {}
194
195 StringRef getPassName() const override { return "Mips Delay Slot Filler"; }
196
197 bool runOnMachineFunction(MachineFunction &F) override {
198 TM = &F.getTarget();
199 bool Changed = false;
200 for (MachineBasicBlock &MBB : F)
201 Changed |= runOnMachineBasicBlock(MBB);
202
203 // This pass invalidates liveness information when it reorders
204 // instructions to fill delay slot. Without this, -verify-machineinstrs
205 // will fail.
206 if (Changed)
207 F.getRegInfo().invalidateLiveness();
208
209 return Changed;
210 }
211
212 MachineFunctionProperties getRequiredProperties() const override {
213 return MachineFunctionProperties().setNoVRegs();
214 }
215
216 void getAnalysisUsage(AnalysisUsage &AU) const override {
217 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
219 }
220
221 static char ID;
222
223 private:
224 bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
225
226 Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
227 const DebugLoc &DL);
228
229 /// This function checks if it is valid to move Candidate to the delay slot
230 /// and returns true if it isn't. It also updates memory and register
231 /// dependence information.
232 bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU,
233 InspectMemInstr &IM) const;
234
235 /// This function searches range [Begin, End) for an instruction that can be
236 /// moved to the delay slot. Returns true on success.
237 template<typename IterTy>
238 bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
239 RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot,
240 IterTy &Filler) const;
241
242 /// This function searches in the backward direction for an instruction that
243 /// can be moved to the delay slot. Returns true on success.
244 bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const;
245
246 /// This function searches MBB in the forward direction for an instruction
247 /// that can be moved to the delay slot. Returns true on success.
248 bool searchForward(MachineBasicBlock &MBB, Iter Slot) const;
249
250 /// This function searches one of MBB's successor blocks for an instruction
251 /// that can be moved to the delay slot and inserts clones of the
252 /// instruction into the successor's predecessor blocks.
253 bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const;
254
255 /// Pick a successor block of MBB. Return NULL if MBB doesn't have a
256 /// successor block that is not a landing pad.
257 MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const;
258
259 /// This function analyzes MBB and returns an instruction with an unoccupied
260 /// slot that branches to Dst.
261 std::pair<MipsInstrInfo::BranchType, MachineInstr *>
262 getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const;
263
264 /// Examine Pred and see if it is possible to insert an instruction into
265 /// one of its branches delay slot or its end.
266 bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
267 RegDefsUses &RegDU, bool &HasMultipleSuccs,
268 BB2BrMap &BrMap) const;
269
270 bool terminateSearch(const MachineInstr &Candidate) const;
271
272 const TargetMachine *TM = nullptr;
273 };
274
275} // end anonymous namespace
276
277char MipsDelaySlotFiller::ID = 0;
278
279static bool hasUnoccupiedSlot(const MachineInstr *MI) {
280 return MI->hasDelaySlot() && !MI->isBundledWithSucc();
281}
282
283/// Check if a branch is a short backward loop that triggers the R5900 erratum.
284/// Quote from binutils-gdb/gas/config/tc-mips.c:
285///
286/// On the R5900 short loops need to be fixed by inserting a NOP in the
287/// branch delay slot.
288///
289/// The short loop bug under certain conditions causes loops to execute
290/// only once or twice. We must ensure that the assembler never
291/// generates loops that satisfy all of the following conditions:
292///
293/// - a loop consists of less than or equal to six instructions
294/// (including the branch delay slot);
295/// - a loop contains only one conditional branch instruction at the end
296/// of the loop;
297/// - a loop does not contain any other branch or jump instructions;
298/// - a branch delay slot of the loop is not NOP (EE 2.9 or later).
299///
300/// We need to do this because of a hardware bug in the R5900 chip.
302 const MachineBasicBlock &MBB) {
303 // Must be a conditional branch (not jump or indirect branch)
304 if (!MI->isBranch() || MI->isIndirectBranch())
305 return false;
306
307 // Check if this is a conditional branch by looking for an MBB operand
308 const MachineBasicBlock *TargetMBB = nullptr;
309 for (const MachineOperand &MO : MI->operands()) {
310 if (MO.isMBB()) {
311 TargetMBB = MO.getMBB();
312 break;
313 }
314 }
315
316 // Must have a target and must target the same basic block (backward branch)
317 if (!TargetMBB || TargetMBB != &MBB)
318 return false;
319
320 // Count instructions from the beginning of the block to the branch
321 // A short loop is 6 instructions or fewer (including branch + delay slot)
322 // The delay slot adds 1 more, so we check if instructions before branch <= 5
323 unsigned InstrCount = 0;
324 bool HasOtherBranch = false;
325
326 for (const MachineInstr &Instr : MBB) {
327 if (&Instr == MI)
328 break;
329
330 // Skip debug and pseudo instructions
331 if (Instr.isDebugInstr() || Instr.isTransient())
332 continue;
333
334 ++InstrCount;
335
336 // If there's another branch in the loop, the erratum doesn't apply
337 if (Instr.isBranch() || Instr.isCall()) {
338 HasOtherBranch = true;
339 break;
340 }
341 }
342
343 // If there's another branch/call in the loop, erratum doesn't apply
344 if (HasOtherBranch)
345 return false;
346
347 // Add 1 for the branch itself, +1 for delay slot = InstrCount + 2
348 // Erratum triggers when total <= 6, so InstrCount + 2 <= 6 => InstrCount <= 4
349 // But we're conservative: if InstrCount <= 5 (total <= 7), skip filling
350 // to match the exact condition from r5900check: offset -5 to -1 (2-6 instrs)
351 return InstrCount <= 5;
352}
353
354INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE,
355 "Fill delay slot for MIPS", false, false)
356
357/// This function inserts clones of Filler into predecessor blocks.
358static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
359 MachineFunction *MF = Filler->getParent()->getParent();
360
361 for (const auto &I : BrMap) {
362 if (I.second) {
363 MIBundleBuilder(I.second).append(MF->CloneMachineInstr(&*Filler));
364 ++UsefulSlots;
365 } else {
366 I.first->push_back(MF->CloneMachineInstr(&*Filler));
367 }
368 }
369}
370
371/// This function adds registers Filler defines to MBB's live-in register list.
372static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
373 for (const MachineOperand &MO : Filler->operands()) {
374 unsigned R;
375
376 if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
377 continue;
378
379#ifndef NDEBUG
380 const MachineFunction &MF = *MBB.getParent();
382 "Shouldn't move an instruction with unallocatable registers across "
383 "basic block boundaries.");
384#endif
385
386 if (!MBB.isLiveIn(R))
387 MBB.addLiveIn(R);
388 }
389}
390
391RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI)
392 : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {}
393
394void RegDefsUses::init(const MachineInstr &MI) {
395 // Add all register operands which are explicit and non-variadic.
396 update(MI, 0, MI.getDesc().getNumOperands());
397
398 // If MI is a call, add RA to Defs to prevent users of RA from going into
399 // delay slot.
400 if (MI.isCall())
401 Defs.set(Mips::RA);
402
403 // Add all implicit register operands of branch instructions except
404 // register AT.
405 if (MI.isBranch()) {
406 update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands());
407 Defs.reset(Mips::AT);
408 }
409}
410
411void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
412 assert(MI.isCall());
413
414 // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into
415 // the delay slot. The reason is that RA/RA_64 must not be changed
416 // in the delay slot so that the callee can return to the caller.
417 if (MI.definesRegister(Mips::RA, /*TRI=*/nullptr) ||
418 MI.definesRegister(Mips::RA_64, /*TRI=*/nullptr)) {
419 Defs.set(Mips::RA);
420 Defs.set(Mips::RA_64);
421 }
422
423 // If MI is a call, add all caller-saved registers to Defs.
424 BitVector CallerSavedRegs(TRI.getNumRegs(), true);
425
426 CallerSavedRegs.reset(Mips::ZERO);
427 CallerSavedRegs.reset(Mips::ZERO_64);
428
429 for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent());
430 *R; ++R)
431 for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
432 CallerSavedRegs.reset(*AI);
433
434 Defs |= CallerSavedRegs;
435}
436
437void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
438 BitVector AllocSet = TRI.getAllocatableSet(MF);
439
440 for (unsigned R : AllocSet.set_bits())
441 for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
442 AllocSet.set(*AI);
443
444 AllocSet.set(Mips::ZERO);
445 AllocSet.set(Mips::ZERO_64);
446
447 Defs |= AllocSet.flip();
448}
449
450void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
451 const MachineBasicBlock &SuccBB) {
452 for (const MachineBasicBlock *S : MBB.successors())
453 if (S != &SuccBB)
454 for (const auto &LI : S->liveins())
455 Uses.set(LI.PhysReg.id());
456}
457
458bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
459 BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs());
460 bool HasHazard = false;
461
462 for (unsigned I = Begin; I != End; ++I) {
463 const MachineOperand &MO = MI.getOperand(I);
464
465 if (MO.isReg() && MO.getReg()) {
466 if (checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef())) {
467 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found register hazard for operand "
468 << I << ": ";
469 MO.dump());
470 HasHazard = true;
471 }
472 }
473 }
474
475 Defs |= NewDefs;
476 Uses |= NewUses;
477
478 return HasHazard;
479}
480
481bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses,
482 unsigned Reg, bool IsDef) const {
483 if (IsDef) {
484 NewDefs.set(Reg);
485 // check whether Reg has already been defined or used.
486 return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg));
487 }
488
489 NewUses.set(Reg);
490 // check whether Reg has already been defined.
491 return isRegInSet(Defs, Reg);
492}
493
494bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
495 // Check Reg and all aliased Registers.
496 for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
497 if (RegSet.test(*AI))
498 return true;
499 return false;
500}
501
502bool InspectMemInstr::hasHazard(const MachineInstr &MI) {
503 if (!MI.mayStore() && !MI.mayLoad())
504 return false;
505
506 if (ForbidMemInstr)
507 return true;
508
509 OrigSeenLoad = SeenLoad;
510 OrigSeenStore = SeenStore;
511 SeenLoad |= MI.mayLoad();
512 SeenStore |= MI.mayStore();
513
514 // If MI is an ordered or volatile memory reference, disallow moving
515 // subsequent loads and stores to delay slot.
516 if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) {
517 ForbidMemInstr = true;
518 return true;
519 }
520
521 return hasHazard_(MI);
522}
523
524bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
525 if (MI.mayStore())
526 return true;
527
528 if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue())
529 return true;
530
531 if (const PseudoSourceValue *PSV =
532 (*MI.memoperands_begin())->getPseudoValue()) {
534 return false;
535 return !PSV->isConstant(nullptr) && !PSV->isStack();
536 }
537
538 return true;
539}
540
541MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
542 : InspectMemInstr(false), MFI(MFI_) {}
543
544bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
545 bool HasHazard = false;
546
547 // Check underlying object list.
549 if (getUnderlyingObjects(MI, Objs)) {
550 for (ValueType VT : Objs)
551 HasHazard |= updateDefsUses(VT, MI.mayStore());
552 return HasHazard;
553 }
554
555 // No underlying objects found.
556 HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore);
557 HasHazard |= MI.mayLoad() || OrigSeenStore;
558
559 SeenNoObjLoad |= MI.mayLoad();
560 SeenNoObjStore |= MI.mayStore();
561
562 return HasHazard;
563}
564
565bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
566 if (MayStore)
567 return !Defs.insert(V).second || Uses.count(V) || SeenNoObjStore ||
568 SeenNoObjLoad;
569
570 Uses.insert(V);
571 return Defs.count(V) || SeenNoObjStore;
572}
573
574bool MemDefsUses::
575getUnderlyingObjects(const MachineInstr &MI,
576 SmallVectorImpl<ValueType> &Objects) const {
577 if (!MI.hasOneMemOperand())
578 return false;
579
580 auto & MMO = **MI.memoperands_begin();
581
582 if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) {
583 if (!PSV->isAliased(MFI))
584 return false;
585 Objects.push_back(PSV);
586 return true;
587 }
588
589 if (const Value *V = MMO.getValue()) {
591 ::getUnderlyingObjects(V, Objs);
592
593 for (const Value *UValue : Objs) {
594 if (!isIdentifiedObject(V))
595 return false;
596
597 Objects.push_back(UValue);
598 }
599 return true;
600 }
601
602 return false;
603}
604
605// Replace Branch with the compact branch instruction.
606Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB,
607 Iter Branch,
608 const DebugLoc &DL) {
609 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
610 const MipsInstrInfo *TII = STI.getInstrInfo();
611
612 unsigned NewOpcode = TII->getEquivalentCompactForm(Branch);
613 Branch = TII->genInstrWithNewOpc(NewOpcode, Branch);
614
615 auto *ToErase = cast<MachineInstr>(&*std::next(Branch));
616 // Update call info for the Branch.
617 if (ToErase->shouldUpdateAdditionalCallInfo())
618 ToErase->getMF()->moveAdditionalCallInfo(ToErase,
619 cast<MachineInstr>(&*Branch));
620 ToErase->eraseFromParent();
621 return Branch;
622}
623
624// For given opcode returns opcode of corresponding instruction with short
625// delay slot.
626// For the pseudo TAILCALL*_MM instructions return the short delay slot
627// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
628// that is too short to make use of for tail calls.
629static int getEquivalentCallShort(int Opcode) {
630 switch (Opcode) {
631 case Mips::BGEZAL:
632 return Mips::BGEZALS_MM;
633 case Mips::BLTZAL:
634 return Mips::BLTZALS_MM;
635 case Mips::JAL:
636 case Mips::JAL_MM:
637 return Mips::JALS_MM;
638 case Mips::JALR:
639 return Mips::JALRS_MM;
640 case Mips::JALR16_MM:
641 return Mips::JALRS16_MM;
642 case Mips::TAILCALL_MM:
643 llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!");
644 case Mips::TAILCALLREG:
645 return Mips::JR16_MM;
646 default:
647 llvm_unreachable("Unexpected call instruction for microMIPS.");
648 }
649}
650
651/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
652/// We assume there is only one delay slot per delayed instruction.
653bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
654 bool Changed = false;
655 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
656 bool InMicroMipsMode = STI.inMicroMipsMode();
657 const MipsInstrInfo *TII = STI.getInstrInfo();
658
659 for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
660 if (!hasUnoccupiedSlot(&*I))
661 continue;
662
663 // R5900 short loop erratum fix: skip delay slot filling for short backward
664 // loops to avoid triggering a hardware bug where short loops may exit
665 // early. The fix can be controlled with -mfix-r5900 / -mno-fix-r5900.
666 bool SkipForFixR5900 = false;
667 if (STI.fixR5900() && isR5900ShortLoopBranch(&*I, MBB)) {
668 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": skipping delay slot fill for R5900 "
669 "short loop branch.\n");
670 ++R5900ShortLoopNops;
671 SkipForFixR5900 = true;
672 }
673
674 // Delay slot filling is disabled at -O0, in microMIPS32R6, or for R5900
675 // short loop branches.
677 (TM->getOptLevel() != CodeGenOptLevel::None) &&
678 !(InMicroMipsMode && STI.hasMips32r6()) && !SkipForFixR5900) {
679
680 bool Filled = false;
681
682 if (MipsCompactBranchPolicy.getValue() != CB_Always ||
683 !TII->getEquivalentCompactForm(I)) {
684 if (searchBackward(MBB, *I)) {
685 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
686 " in backwards search.\n");
687 Filled = true;
688 } else if (I->isTerminator()) {
689 if (searchSuccBBs(MBB, I)) {
690 Filled = true;
691 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
692 " in successor BB search.\n");
693 }
694 } else if (searchForward(MBB, I)) {
695 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot"
696 " in forwards search.\n");
697 Filled = true;
698 }
699 }
700
701 if (Filled) {
702 // Get instruction with delay slot.
703 MachineBasicBlock::instr_iterator DSI = I.getInstrIterator();
704
705 if (InMicroMipsMode && TII->getInstSizeInBytes(*std::next(DSI)) == 2 &&
706 DSI->isCall()) {
707 // If instruction in delay slot is 16b change opcode to
708 // corresponding instruction with short delay slot.
709
710 // TODO: Implement an instruction mapping table of 16bit opcodes to
711 // 32bit opcodes so that an instruction can be expanded. This would
712 // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop.
713 // TODO: Permit b16 when branching backwards to the same function
714 // if it is in range.
715 DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode())));
716 }
717 ++FilledSlots;
718 Changed = true;
719 continue;
720 }
721 }
722
723 // For microMIPS if instruction is BEQ or BNE with one ZERO register, then
724 // instead of adding NOP replace this instruction with the corresponding
725 // compact branch instruction, i.e. BEQZC or BNEZC. Additionally
726 // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can
727 // be replaced with JRC16_MM.
728
729 // For MIPSR6 attempt to produce the corresponding compact (no delay slot)
730 // form of the CTI. For indirect jumps this will not require inserting a
731 // NOP and for branches will hopefully avoid requiring a NOP.
732 if ((InMicroMipsMode ||
734 TII->getEquivalentCompactForm(I)) {
735 I = replaceWithCompactBranch(MBB, I, I->getDebugLoc());
736 Changed = true;
737 continue;
738 }
739
740 // Bundle the NOP to the instruction with the delay slot.
741 LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": could not fill delay slot for ";
742 I->dump());
743 TII->insertNop(MBB, std::next(I), I->getDebugLoc());
744 MIBundleBuilder(MBB, I, std::next(I, 2));
745 ++FilledSlots;
746 Changed = true;
747 }
748
749 return Changed;
750}
751
752template <typename IterTy>
753bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
754 IterTy End, RegDefsUses &RegDU,
755 InspectMemInstr &IM, Iter Slot,
756 IterTy &Filler) const {
757 for (IterTy I = Begin; I != End;) {
758 IterTy CurrI = I;
759 ++I;
760 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": checking instruction: "; CurrI->dump());
761 // Skip debug value.
762 // Instruction TargetOpcode::JUMP_TABLE_DEBUG_INFO is only used to note
763 // jump table debug info.
764 if (CurrI->isDebugInstr() || CurrI->isJumpTableDebugInfo()) {
765 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring debug instruction: ";
766 CurrI->dump());
767 continue;
768 }
769
770 if (CurrI->isBundle()) {
771 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": ignoring BUNDLE instruction: ";
772 CurrI->dump());
773 // However, we still need to update the register def-use information.
774 RegDU.update(*CurrI, 0, CurrI->getNumOperands());
775 continue;
776 }
777
778 if (terminateSearch(*CurrI)) {
779 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": should terminate search: ";
780 CurrI->dump());
781 break;
782 }
783
784 assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) &&
785 "Cannot put calls, returns or branches in delay slot.");
786
787 if (CurrI->isKill()) {
788 CurrI->eraseFromParent();
789 continue;
790 }
791
792 if (delayHasHazard(*CurrI, RegDU, IM))
793 continue;
794
795 const MipsSubtarget &STI = MBB.getParent()->getSubtarget<MipsSubtarget>();
796 bool InMicroMipsMode = STI.inMicroMipsMode();
797 const MipsInstrInfo *TII = STI.getInstrInfo();
798 unsigned Opcode = (*Slot).getOpcode();
799
800 // In mips1-4, should not put mflo into the delay slot for the return.
801 if ((IsMFLOMFHI(CurrI->getOpcode())) &&
802 (!STI.hasMips32() && !STI.hasMips5()))
803 continue;
804
805 // This is complicated by the tail call optimization. For non-PIC code
806 // there is only a 32bit sized unconditional branch which can be assumed
807 // to be able to reach the target. b16 only has a range of +/- 1 KB.
808 // It's entirely possible that the target function is reachable with b16
809 // but we don't have enough information to make that decision.
810 if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
811 (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
812 Opcode == Mips::PseudoIndirectBranch_MM ||
813 Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
814 continue;
815 // Instructions LWP/SWP and MOVEP should not be in a delay slot as that
816 // results in unpredictable behaviour
817 if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM ||
818 Opcode == Mips::MOVEP_MM))
819 continue;
820
821 Filler = CurrI;
822 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": found instruction for delay slot: ";
823 CurrI->dump());
824
825 return true;
826 }
827
828 return false;
829}
830
831bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB,
832 MachineInstr &Slot) const {
834 return false;
835
836 auto *Fn = MBB.getParent();
837 RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
838 MemDefsUses MemDU(&Fn->getFrameInfo());
839 ReverseIter Filler;
840
841 RegDU.init(Slot);
842
844 if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot,
845 Filler)) {
846 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
847 "slot using backwards search.\n");
848 return false;
849 }
850
851 MBB.splice(std::next(SlotI), &MBB, Filler.getReverse());
852 MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2));
853 ++UsefulSlots;
854 return true;
855}
856
857bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB,
858 Iter Slot) const {
859 // Can handle only calls.
860 if (DisableForwardSearch || !Slot->isCall())
861 return false;
862
863 RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
864 NoMemInstr NM;
865 Iter Filler;
866
867 RegDU.setCallerSaved(*Slot);
868
869 if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) {
870 LLVM_DEBUG(dbgs() << DEBUG_TYPE ": could not find instruction for delay "
871 "slot using forwards search.\n");
872 return false;
873 }
874
875 MBB.splice(std::next(Slot), &MBB, Filler);
876 MIBundleBuilder(MBB, Slot, std::next(Slot, 2));
877 ++UsefulSlots;
878 return true;
879}
880
881bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB,
882 Iter Slot) const {
884 return false;
885
886 MachineBasicBlock *SuccBB = selectSuccBB(MBB);
887
888 if (!SuccBB)
889 return false;
890
891 RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
892 bool HasMultipleSuccs = false;
893 BB2BrMap BrMap;
894 std::unique_ptr<InspectMemInstr> IM;
895 Iter Filler;
896 auto *Fn = MBB.getParent();
897
898 // Iterate over SuccBB's predecessor list.
899 for (MachineBasicBlock *Pred : SuccBB->predecessors())
900 if (!examinePred(*Pred, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
901 return false;
902
903 // Do not allow moving instructions which have unallocatable register operands
904 // across basic block boundaries.
905 RegDU.setUnallocatableRegs(*Fn);
906
907 // Only allow moving loads from stack or constants if any of the SuccBB's
908 // predecessors have multiple successors.
909 if (HasMultipleSuccs) {
910 IM.reset(new LoadFromStackOrConst());
911 } else {
912 const MachineFrameInfo &MFI = Fn->getFrameInfo();
913 IM.reset(new MemDefsUses(&MFI));
914 }
915
916 if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
917 Filler))
918 return false;
919
920 insertDelayFiller(Filler, BrMap);
921 addLiveInRegs(Filler, *SuccBB);
922 Filler->eraseFromParent();
923
924 return true;
925}
926
927MachineBasicBlock *
928MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const {
929 if (B.succ_empty())
930 return nullptr;
931
932 // Select the successor with the larget edge weight.
933 auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
934 MachineBasicBlock *S =
935 *llvm::max_element(B.successors(), [&](const MachineBasicBlock *Dst0,
936 const MachineBasicBlock *Dst1) {
937 return Prob.getEdgeProbability(&B, Dst0) <
938 Prob.getEdgeProbability(&B, Dst1);
939 });
940 return S->isEHPad() ? nullptr : S;
941}
942
943std::pair<MipsInstrInfo::BranchType, MachineInstr *>
944MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB,
945 const MachineBasicBlock &Dst) const {
946 const MipsInstrInfo *TII =
947 MBB.getParent()->getSubtarget<MipsSubtarget>().getInstrInfo();
948 MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
949 SmallVector<MachineInstr*, 2> BranchInstrs;
951
953 TII->analyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
954
956 return std::make_pair(R, nullptr);
957
959 if (!hasUnoccupiedSlot(BranchInstrs[0]))
960 return std::make_pair(MipsInstrInfo::BT_None, nullptr);
961
962 assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
963
964 return std::make_pair(R, BranchInstrs[0]);
965 }
966
967 assert((TrueBB == &Dst) || (FalseBB == &Dst));
968
969 // Examine the conditional branch. See if its slot is occupied.
970 if (hasUnoccupiedSlot(BranchInstrs[0]))
971 return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]);
972
973 // If that fails, try the unconditional branch.
974 if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
975 return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
976
977 return std::make_pair(MipsInstrInfo::BT_None, nullptr);
978}
979
980bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred,
981 const MachineBasicBlock &Succ,
982 RegDefsUses &RegDU,
983 bool &HasMultipleSuccs,
984 BB2BrMap &BrMap) const {
985 std::pair<MipsInstrInfo::BranchType, MachineInstr *> P =
986 getBranch(Pred, Succ);
987
988 // Return if either getBranch wasn't able to analyze the branches or there
989 // were no branches with unoccupied slots.
990 if (P.first == MipsInstrInfo::BT_None)
991 return false;
992
993 if ((P.first != MipsInstrInfo::BT_Uncond) &&
994 (P.first != MipsInstrInfo::BT_NoBranch)) {
995 HasMultipleSuccs = true;
996 RegDU.addLiveOut(Pred, Succ);
997 }
998
999 BrMap[&Pred] = P.second;
1000 return true;
1001}
1002
1003bool MipsDelaySlotFiller::delayHasHazard(const MachineInstr &Candidate,
1004 RegDefsUses &RegDU,
1005 InspectMemInstr &IM) const {
1006 assert(!Candidate.isKill() &&
1007 "KILL instructions should have been eliminated at this point.");
1008
1009 bool HasHazard = Candidate.isImplicitDef();
1010
1011 HasHazard |= IM.hasHazard(Candidate);
1012 HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands());
1013
1014 return HasHazard;
1015}
1016
1017bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const {
1018 return (Candidate.isTerminator() || Candidate.isCall() ||
1019 Candidate.isPosition() || Candidate.isInlineAsm() ||
1020 Candidate.hasUnmodeledSideEffects());
1021}
1022
1023/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
1024/// slots in Mips MachineFunctions
1026 return new MipsDelaySlotFiller();
1027}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
This file defines the DenseMap class.
static bool hasHazard(StateT InitialState, function_ref< HazardFnResult(StateT &, const MachineInstr &)> IsHazard, function_ref< void(StateT &, const MachineInstr &)> UpdateState, const MachineBasicBlock *InitialMBB, MachineBasicBlock::const_reverse_instr_iterator InitialI)
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static bool hasUnoccupiedSlot(const MachineInstr *MI)
static cl::opt< bool > DisableDelaySlotFiller("disable-mips-delay-filler", cl::init(false), cl::desc("Fill all delay slots with NOPs."), cl::Hidden)
static cl::opt< bool > DisableBackwardSearch("disable-mips-df-backward-search", cl::init(false), cl::desc("Disallow MIPS delay filler to search backward."), cl::Hidden)
static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB)
This function adds registers Filler defines to MBB's live-in register list.
static bool isR5900ShortLoopBranch(const MachineInstr *MI, const MachineBasicBlock &MBB)
Check if a branch is a short backward loop that triggers the R5900 erratum.
static cl::opt< bool > DisableSuccBBSearch("disable-mips-df-succbb-search", cl::init(true), cl::desc("Disallow MIPS delay filler to search successor basic blocks."), cl::Hidden)
const BB2BrMap & BrMap
static cl::opt< bool > DisableForwardSearch("disable-mips-df-forward-search", cl::init(true), cl::desc("Disallow MIPS delay filler to search forward."), cl::Hidden)
cl::opt< CompactBranchPolicy > MipsCompactBranchPolicy
static int getEquivalentCallShort(int Opcode)
#define IsMFLOMFHI(instr)
Definition Mips.h:20
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the PointerUnion class, which is a discriminated union of pointer types.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
AnalysisUsage & addRequired()
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
BitVector & set()
Definition BitVector.h:370
BitVector & flip()
Definition BitVector.h:450
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Helper class for constructing bundles of MachineInstrs.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
bool isEHPad() const
Returns true if the block is a landing pad.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
bool isPosition() const
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
bool isImplicitDef() const
bool isCall(QueryType Type=AnyInBundle) const
unsigned getNumOperands() const
Retuns the total number of operands.
bool isInlineAsm() const
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isKill() const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void dump() const
Register getReg() const
getReg - Returns the register number.
bool hasMips32r6() const
bool inMicroMipsMode() const
const MipsInstrInfo * getInstrInfo() const override
bool fixR5900() const
bool hasMips5() const
bool hasMips32() const
void push_back(const T &Elt)
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
BitVector getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC=nullptr) const
Returns a bitset indexed by register number indicating if a register is allocatable or not.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ CB_Never
The policy 'never' may in some circumstances or for some ISAs not be absolutely adhered to.
@ CB_Always
'always' may in some circumstances may not be absolutely adhered to, there may not be a corresponding...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
FunctionPass * createMipsDelaySlotFillerPass()
createMipsDelaySlotFillerPass - Returns a pass that fills in delay slots in Mips MachineFunctions
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.