LLVM 17.0.0git
MachineSink.cpp
Go to the documentation of this file.
1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/CFG.h"
46#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/LLVMContext.h"
51#include "llvm/Pass.h"
54#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <cstdint>
59#include <map>
60#include <utility>
61#include <vector>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "machine-sink"
66
67static cl::opt<bool>
68SplitEdges("machine-sink-split",
69 cl::desc("Split critical edges during machine sinking"),
70 cl::init(true), cl::Hidden);
71
72static cl::opt<bool>
73UseBlockFreqInfo("machine-sink-bfi",
74 cl::desc("Use block frequency info to find successors to sink"),
75 cl::init(true), cl::Hidden);
76
78 "machine-sink-split-probability-threshold",
80 "Percentage threshold for splitting single-instruction critical edge. "
81 "If the branch threshold is higher than this threshold, we allow "
82 "speculative execution of up to 1 instruction to avoid branching to "
83 "splitted critical edge"),
84 cl::init(40), cl::Hidden);
85
87 "machine-sink-load-instrs-threshold",
88 cl::desc("Do not try to find alias store for a load if there is a in-path "
89 "block whose instruction number is higher than this threshold."),
90 cl::init(2000), cl::Hidden);
91
93 "machine-sink-load-blocks-threshold",
94 cl::desc("Do not try to find alias store for a load if the block number in "
95 "the straight line is higher than this threshold."),
96 cl::init(20), cl::Hidden);
97
98static cl::opt<bool>
99 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
100 cl::desc("Sink instructions into cycles to avoid "
101 "register spills"),
102 cl::init(false), cl::Hidden);
103
105 "machine-sink-cycle-limit",
106 cl::desc("The maximum number of instructions considered for cycle sinking."),
107 cl::init(50), cl::Hidden);
108
109STATISTIC(NumSunk, "Number of machine instructions sunk");
110STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
111STATISTIC(NumSplit, "Number of critical edges split");
112STATISTIC(NumCoalesces, "Number of copies coalesced");
113STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
114
115namespace {
116
117 class MachineSinking : public MachineFunctionPass {
118 const TargetInstrInfo *TII = nullptr;
119 const TargetRegisterInfo *TRI = nullptr;
120 MachineRegisterInfo *MRI = nullptr; // Machine register information
121 MachineDominatorTree *DT = nullptr; // Machine dominator tree
122 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
123 MachineCycleInfo *CI = nullptr;
124 MachineBlockFrequencyInfo *MBFI = nullptr;
125 const MachineBranchProbabilityInfo *MBPI = nullptr;
126 AliasAnalysis *AA = nullptr;
127 RegisterClassInfo RegClassInfo;
128
129 // Remember which edges have been considered for breaking.
131 CEBCandidates;
132 // Remember which edges we are about to split.
133 // This is different from CEBCandidates since those edges
134 // will be split.
136
137 DenseSet<Register> RegsToClearKillFlags;
138
139 using AllSuccsCache =
140 std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
141
142 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
143 /// post-dominated by another DBG_VALUE of the same variable location.
144 /// This is necessary to detect sequences such as:
145 /// %0 = someinst
146 /// DBG_VALUE %0, !123, !DIExpression()
147 /// %1 = anotherinst
148 /// DBG_VALUE %1, !123, !DIExpression()
149 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
150 /// would re-order assignments.
151 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
152
153 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
154 /// debug instructions to sink.
156
157 /// Record of debug variables that have had their locations set in the
158 /// current block.
159 DenseSet<DebugVariable> SeenDbgVars;
160
161 std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
162 HasStoreCache;
163 std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
164 std::vector<MachineInstr *>>
165 StoreInstrCache;
166
167 /// Cached BB's register pressure.
168 std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
169
170 public:
171 static char ID; // Pass identification
172
173 MachineSinking() : MachineFunctionPass(ID) {
175 }
176
177 bool runOnMachineFunction(MachineFunction &MF) override;
178
179 void getAnalysisUsage(AnalysisUsage &AU) const override {
190 }
191
192 void releaseMemory() override {
193 CEBCandidates.clear();
194 }
195
196 private:
198 void ProcessDbgInst(MachineInstr &MI);
199 bool isWorthBreakingCriticalEdge(MachineInstr &MI,
202
203 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
205
206 /// Postpone the splitting of the given critical
207 /// edge (\p From, \p To).
208 ///
209 /// We do not split the edges on the fly. Indeed, this invalidates
210 /// the dominance information and thus triggers a lot of updates
211 /// of that information underneath.
212 /// Instead, we postpone all the splits after each iteration of
213 /// the main loop. That way, the information is at least valid
214 /// for the lifetime of an iteration.
215 ///
216 /// \return True if the edge is marked as toSplit, false otherwise.
217 /// False can be returned if, for instance, this is not profitable.
218 bool PostponeSplitCriticalEdge(MachineInstr &MI,
221 bool BreakPHIEdge);
222 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
223 AllSuccsCache &AllSuccessors);
224
225 /// If we sink a COPY inst, some debug users of it's destination may no
226 /// longer be dominated by the COPY, and will eventually be dropped.
227 /// This is easily rectified by forwarding the non-dominated debug uses
228 /// to the copy source.
229 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
230 MachineBasicBlock *TargetBlock);
231 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
232 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
233 bool &LocalUse) const;
235 bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
236
237 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
239 bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
240
241 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
243 MachineBasicBlock *SuccToSinkTo,
244 AllSuccsCache &AllSuccessors);
245
246 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
248
250 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
251 AllSuccsCache &AllSuccessors) const;
252
253 std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
254 };
255
256} // end anonymous namespace
257
258char MachineSinking::ID = 0;
259
260char &llvm::MachineSinkingID = MachineSinking::ID;
261
263 "Machine code sinking", false, false)
269 "Machine code sinking", false, false)
270
271bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
273 if (!MI.isCopy())
274 return false;
275
276 Register SrcReg = MI.getOperand(1).getReg();
277 Register DstReg = MI.getOperand(0).getReg();
278 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
279 !MRI->hasOneNonDBGUse(SrcReg))
280 return false;
281
282 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
283 const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
284 if (SRC != DRC)
285 return false;
286
287 MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
288 if (DefMI->isCopyLike())
289 return false;
290 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
291 LLVM_DEBUG(dbgs() << "*** to: " << MI);
292 MRI->replaceRegWith(DstReg, SrcReg);
293 MI.eraseFromParent();
294
295 // Conservatively, clear any kill flags, since it's possible that they are no
296 // longer correct.
297 MRI->clearKillFlags(SrcReg);
298
299 ++NumCoalesces;
300 return true;
301}
302
303/// AllUsesDominatedByBlock - Return true if all uses of the specified register
304/// occur in blocks dominated by the specified block. If any use is in the
305/// definition block, then return false since it is never legal to move def
306/// after uses.
307bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
309 MachineBasicBlock *DefMBB,
310 bool &BreakPHIEdge,
311 bool &LocalUse) const {
312 assert(Reg.isVirtual() && "Only makes sense for vregs");
313
314 // Ignore debug uses because debug info doesn't affect the code.
315 if (MRI->use_nodbg_empty(Reg))
316 return true;
317
318 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
319 // into and they are all PHI nodes. In this case, machine-sink must break
320 // the critical edge first. e.g.
321 //
322 // %bb.1:
323 // Predecessors according to CFG: %bb.0
324 // ...
325 // %def = DEC64_32r %x, implicit-def dead %eflags
326 // ...
327 // JE_4 <%bb.37>, implicit %eflags
328 // Successors according to CFG: %bb.37 %bb.2
329 //
330 // %bb.2:
331 // %p = PHI %y, %bb.0, %def, %bb.1
332 if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
333 MachineInstr *UseInst = MO.getParent();
334 unsigned OpNo = MO.getOperandNo();
335 MachineBasicBlock *UseBlock = UseInst->getParent();
336 return UseBlock == MBB && UseInst->isPHI() &&
337 UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
338 })) {
339 BreakPHIEdge = true;
340 return true;
341 }
342
343 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
344 // Determine the block of the use.
345 MachineInstr *UseInst = MO.getParent();
346 unsigned OpNo = &MO - &UseInst->getOperand(0);
347 MachineBasicBlock *UseBlock = UseInst->getParent();
348 if (UseInst->isPHI()) {
349 // PHI nodes use the operand in the predecessor block, not the block with
350 // the PHI.
351 UseBlock = UseInst->getOperand(OpNo+1).getMBB();
352 } else if (UseBlock == DefMBB) {
353 LocalUse = true;
354 return false;
355 }
356
357 // Check that it dominates.
358 if (!DT->dominates(MBB, UseBlock))
359 return false;
360 }
361
362 return true;
363}
364
365/// Return true if this machine instruction loads from global offset table or
366/// constant pool.
368 assert(MI.mayLoad() && "Expected MI that loads!");
369
370 // If we lost memory operands, conservatively assume that the instruction
371 // reads from everything..
372 if (MI.memoperands_empty())
373 return true;
374
375 for (MachineMemOperand *MemOp : MI.memoperands())
376 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
377 if (PSV->isGOT() || PSV->isConstantPool())
378 return true;
379
380 return false;
381}
382
383void MachineSinking::FindCycleSinkCandidates(
386 for (auto &MI : *BB) {
387 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
388 if (!TII->shouldSink(MI)) {
389 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
390 "target\n");
391 continue;
392 }
393 if (!isCycleInvariant(Cycle, MI)) {
394 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
395 continue;
396 }
397 bool DontMoveAcrossStore = true;
398 if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
399 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
400 continue;
401 }
402 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
403 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
404 continue;
405 }
406 if (MI.isConvergent())
407 continue;
408
409 const MachineOperand &MO = MI.getOperand(0);
410 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
411 continue;
412 if (!MRI->hasOneDef(MO.getReg()))
413 continue;
414
415 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
416 Candidates.push_back(&MI);
417 }
418}
419
420bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
421 if (skipFunction(MF.getFunction()))
422 return false;
423
424 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
425
428 MRI = &MF.getRegInfo();
429 DT = &getAnalysis<MachineDominatorTree>();
430 PDT = &getAnalysis<MachinePostDominatorTree>();
431 CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
432 MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
433 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
434 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
435 RegClassInfo.runOnMachineFunction(MF);
436
437 bool EverMadeChange = false;
438
439 while (true) {
440 bool MadeChange = false;
441
442 // Process all basic blocks.
443 CEBCandidates.clear();
444 ToSplit.clear();
445 for (auto &MBB: MF)
446 MadeChange |= ProcessBlock(MBB);
447
448 // If we have anything we marked as toSplit, split it now.
449 for (const auto &Pair : ToSplit) {
450 auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
451 if (NewSucc != nullptr) {
452 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
453 << printMBBReference(*Pair.first) << " -- "
454 << printMBBReference(*NewSucc) << " -- "
455 << printMBBReference(*Pair.second) << '\n');
456 if (MBFI)
457 MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
458
459 MadeChange = true;
460 ++NumSplit;
461 } else
462 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
463 }
464 // If this iteration over the code changed anything, keep iterating.
465 if (!MadeChange) break;
466 EverMadeChange = true;
467 }
468
469 if (SinkInstsIntoCycle) {
471 CI->toplevel_end());
472 for (auto *Cycle : Cycles) {
474 if (!Preheader) {
475 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
476 continue;
477 }
479 FindCycleSinkCandidates(Cycle, Preheader, Candidates);
480
481 // Walk the candidates in reverse order so that we start with the use
482 // of a def-use chain, if there is any.
483 // TODO: Sort the candidates using a cost-model.
484 unsigned i = 0;
485 for (MachineInstr *I : llvm::reverse(Candidates)) {
486 if (i++ == SinkIntoCycleLimit) {
487 LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
488 "be analysed.");
489 break;
490 }
491
492 if (!SinkIntoCycle(Cycle, *I))
493 break;
494 EverMadeChange = true;
495 ++NumCycleSunk;
496 }
497 }
498 }
499
500 HasStoreCache.clear();
501 StoreInstrCache.clear();
502
503 // Now clear any kill flags for recorded registers.
504 for (auto I : RegsToClearKillFlags)
505 MRI->clearKillFlags(I);
506 RegsToClearKillFlags.clear();
507
508 return EverMadeChange;
509}
510
511bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
512 // Can't sink anything out of a block that has less than two successors.
513 if (MBB.succ_size() <= 1 || MBB.empty()) return false;
514
515 // Don't bother sinking code out of unreachable blocks. In addition to being
516 // unprofitable, it can also lead to infinite looping, because in an
517 // unreachable cycle there may be nowhere to stop.
518 if (!DT->isReachableFromEntry(&MBB)) return false;
519
520 bool MadeChange = false;
521
522 // Cache all successors, sorted by frequency info and cycle depth.
523 AllSuccsCache AllSuccessors;
524
525 // Walk the basic block bottom-up. Remember if we saw a store.
527 --I;
528 bool ProcessedBegin, SawStore = false;
529 do {
530 MachineInstr &MI = *I; // The instruction to sink.
531
532 // Predecrement I (if it's not begin) so that it isn't invalidated by
533 // sinking.
534 ProcessedBegin = I == MBB.begin();
535 if (!ProcessedBegin)
536 --I;
537
538 if (MI.isDebugOrPseudoInstr()) {
539 if (MI.isDebugValue())
540 ProcessDbgInst(MI);
541 continue;
542 }
543
544 bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
545 if (Joined) {
546 MadeChange = true;
547 continue;
548 }
549
550 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
551 ++NumSunk;
552 MadeChange = true;
553 }
554
555 // If we just processed the first instruction in the block, we're done.
556 } while (!ProcessedBegin);
557
558 SeenDbgUsers.clear();
559 SeenDbgVars.clear();
560 // recalculate the bb register pressure after sinking one BB.
561 CachedRegisterPressure.clear();
562
563 return MadeChange;
564}
565
566void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
567 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
568 // we know what to sink if the vreg def sinks.
569 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
570
571 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
572 MI.getDebugLoc()->getInlinedAt());
573 bool SeenBefore = SeenDbgVars.contains(Var);
574
575 for (MachineOperand &MO : MI.debug_operands()) {
576 if (MO.isReg() && MO.getReg().isVirtual())
577 SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
578 }
579
580 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
581 SeenDbgVars.insert(Var);
582}
583
584bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
586 MachineBasicBlock *To) {
587 // FIXME: Need much better heuristics.
588
589 // If the pass has already considered breaking this edge (during this pass
590 // through the function), then let's go ahead and break it. This means
591 // sinking multiple "cheap" instructions into the same block.
592 if (!CEBCandidates.insert(std::make_pair(From, To)).second)
593 return true;
594
595 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
596 return true;
597
598 if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
600 return true;
601
602 // MI is cheap, we probably don't want to break the critical edge for it.
603 // However, if this would allow some definitions of its source operands
604 // to be sunk then it's probably worth it.
605 for (const MachineOperand &MO : MI.all_uses()) {
606 Register Reg = MO.getReg();
607 if (Reg == 0)
608 continue;
609
610 // We don't move live definitions of physical registers,
611 // so sinking their uses won't enable any opportunities.
612 if (Reg.isPhysical())
613 continue;
614
615 // If this instruction is the only user of a virtual register,
616 // check if breaking the edge will enable sinking
617 // both this instruction and the defining instruction.
618 if (MRI->hasOneNonDBGUse(Reg)) {
619 // If the definition resides in same MBB,
620 // claim it's likely we can sink these together.
621 // If definition resides elsewhere, we aren't
622 // blocking it from being sunk so don't break the edge.
623 MachineInstr *DefMI = MRI->getVRegDef(Reg);
624 if (DefMI->getParent() == MI.getParent())
625 return true;
626 }
627 }
628
629 return false;
630}
631
632bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
633 MachineBasicBlock *FromBB,
634 MachineBasicBlock *ToBB,
635 bool BreakPHIEdge) {
636 if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
637 return false;
638
639 // Avoid breaking back edge. From == To means backedge for single BB cycle.
640 if (!SplitEdges || FromBB == ToBB)
641 return false;
642
643 MachineCycle *FromCycle = CI->getCycle(FromBB);
644 MachineCycle *ToCycle = CI->getCycle(ToBB);
645
646 // Check for backedges of more "complex" cycles.
647 if (FromCycle == ToCycle && FromCycle &&
648 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
649 return false;
650
651 // It's not always legal to break critical edges and sink the computation
652 // to the edge.
653 //
654 // %bb.1:
655 // v1024
656 // Beq %bb.3
657 // <fallthrough>
658 // %bb.2:
659 // ... no uses of v1024
660 // <fallthrough>
661 // %bb.3:
662 // ...
663 // = v1024
664 //
665 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
666 //
667 // %bb.1:
668 // ...
669 // Bne %bb.2
670 // %bb.4:
671 // v1024 =
672 // B %bb.3
673 // %bb.2:
674 // ... no uses of v1024
675 // <fallthrough>
676 // %bb.3:
677 // ...
678 // = v1024
679 //
680 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
681 // flow. We need to ensure the new basic block where the computation is
682 // sunk to dominates all the uses.
683 // It's only legal to break critical edge and sink the computation to the
684 // new block if all the predecessors of "To", except for "From", are
685 // not dominated by "From". Given SSA property, this means these
686 // predecessors are dominated by "To".
687 //
688 // There is no need to do this check if all the uses are PHI nodes. PHI
689 // sources are only defined on the specific predecessor edges.
690 if (!BreakPHIEdge) {
691 for (MachineBasicBlock *Pred : ToBB->predecessors())
692 if (Pred != FromBB && !DT->dominates(ToBB, Pred))
693 return false;
694 }
695
696 ToSplit.insert(std::make_pair(FromBB, ToBB));
697
698 return true;
699}
700
701std::vector<unsigned> &
702MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
703 // Currently to save compiling time, MBB's register pressure will not change
704 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
705 // register pressure is changed after sinking any instructions into it.
706 // FIXME: need a accurate and cheap register pressure estiminate model here.
707 auto RP = CachedRegisterPressure.find(&MBB);
708 if (RP != CachedRegisterPressure.end())
709 return RP->second;
710
711 RegionPressure Pressure;
712 RegPressureTracker RPTracker(Pressure);
713
714 // Initialize the register pressure tracker.
715 RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
716 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
717
719 MIE = MBB.instr_begin();
720 MII != MIE; --MII) {
721 MachineInstr &MI = *std::prev(MII);
722 if (MI.isDebugInstr() || MI.isPseudoProbe())
723 continue;
724 RegisterOperands RegOpers;
725 RegOpers.collect(MI, *TRI, *MRI, false, false);
726 RPTracker.recedeSkipDebugValues();
727 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
728 RPTracker.recede(RegOpers);
729 }
730
731 RPTracker.closeRegion();
732 auto It = CachedRegisterPressure.insert(
733 std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
734 return It.first->second;
735}
736
737/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
738bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
740 MachineBasicBlock *SuccToSinkTo,
741 AllSuccsCache &AllSuccessors) {
742 assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
743
744 if (MBB == SuccToSinkTo)
745 return false;
746
747 // It is profitable if SuccToSinkTo does not post dominate current block.
748 if (!PDT->dominates(SuccToSinkTo, MBB))
749 return true;
750
751 // It is profitable to sink an instruction from a deeper cycle to a shallower
752 // cycle, even if the latter post-dominates the former (PR21115).
753 if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
754 return true;
755
756 // Check if only use in post dominated block is PHI instruction.
757 bool NonPHIUse = false;
758 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
759 MachineBasicBlock *UseBlock = UseInst.getParent();
760 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
761 NonPHIUse = true;
762 }
763 if (!NonPHIUse)
764 return true;
765
766 // If SuccToSinkTo post dominates then also it may be profitable if MI
767 // can further profitably sinked into another block in next round.
768 bool BreakPHIEdge = false;
769 // FIXME - If finding successor is compile time expensive then cache results.
770 if (MachineBasicBlock *MBB2 =
771 FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
772 return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
773
774 MachineCycle *MCycle = CI->getCycle(MBB);
775
776 // If the instruction is not inside a cycle, it is not profitable to sink MI to
777 // a post dominate block SuccToSinkTo.
778 if (!MCycle)
779 return false;
780
781 auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
782 unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
783 const int *PS = TRI->getRegClassPressureSets(RC);
784 // Get register pressure for block SuccToSinkTo.
785 std::vector<unsigned> BBRegisterPressure =
786 getBBRegisterPressure(*SuccToSinkTo);
787 for (; *PS != -1; PS++)
788 // check if any register pressure set exceeds limit in block SuccToSinkTo
789 // after sinking.
790 if (Weight + BBRegisterPressure[*PS] >=
791 TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
792 return true;
793 return false;
794 };
795
796 // If this instruction is inside a Cycle and sinking this instruction can make
797 // more registers live range shorten, it is still prifitable.
798 for (const MachineOperand &MO : MI.operands()) {
799 // Ignore non-register operands.
800 if (!MO.isReg())
801 continue;
802 Register Reg = MO.getReg();
803 if (Reg == 0)
804 continue;
805
806 if (Reg.isPhysical()) {
807 // Don't handle non-constant and non-ignorable physical register uses.
808 if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
809 return false;
810 continue;
811 }
812
813 // Users for the defs are all dominated by SuccToSinkTo.
814 if (MO.isDef()) {
815 // This def register's live range is shortened after sinking.
816 bool LocalUse = false;
817 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
818 LocalUse))
819 return false;
820 } else {
821 MachineInstr *DefMI = MRI->getVRegDef(Reg);
822 if (!DefMI)
823 continue;
825 // DefMI is defined outside of cycle. There should be no live range
826 // impact for this operand. Defination outside of cycle means:
827 // 1: defination is outside of cycle.
828 // 2: defination is in this cycle, but it is a PHI in the cycle header.
829 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
830 Cycle->getHeader() == DefMI->getParent()))
831 continue;
832 // The DefMI is defined inside the cycle.
833 // If sinking this operand makes some register pressure set exceed limit,
834 // it is not profitable.
835 if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
836 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
837 return false;
838 }
839 }
840 }
841
842 // If MI is in cycle and all its operands are alive across the whole cycle or
843 // if no operand sinking make register pressure set exceed limit, it is
844 // profitable to sink MI.
845 return true;
846}
847
848/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
849/// computing it if it was not already cached.
851MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
852 AllSuccsCache &AllSuccessors) const {
853 // Do we have the sorted successors in cache ?
854 auto Succs = AllSuccessors.find(MBB);
855 if (Succs != AllSuccessors.end())
856 return Succs->second;
857
859
860 // Handle cases where sinking can happen but where the sink point isn't a
861 // successor. For example:
862 //
863 // x = computation
864 // if () {} else {}
865 // use x
866 //
867 for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
868 // DomTree children of MBB that have MBB as immediate dominator are added.
869 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
870 // Skip MBBs already added to the AllSuccs vector above.
871 !MBB->isSuccessor(DTChild->getBlock()))
872 AllSuccs.push_back(DTChild->getBlock());
873 }
874
875 // Sort Successors according to their cycle depth or block frequency info.
877 AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
878 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
879 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
880 bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
881 return HasBlockFreq ? LHSFreq < RHSFreq
882 : CI->getCycleDepth(L) < CI->getCycleDepth(R);
883 });
884
885 auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
886
887 return it.first->second;
888}
889
890/// FindSuccToSinkTo - Find a successor to sink this instruction to.
892MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
893 bool &BreakPHIEdge,
894 AllSuccsCache &AllSuccessors) {
895 assert (MBB && "Invalid MachineBasicBlock!");
896
897 // loop over all the operands of the specified instruction. If there is
898 // anything we can't handle, bail out.
899
900 // SuccToSinkTo - This is the successor to sink this instruction to, once we
901 // decide.
902 MachineBasicBlock *SuccToSinkTo = nullptr;
903 for (const MachineOperand &MO : MI.operands()) {
904 if (!MO.isReg()) continue; // Ignore non-register operands.
905
906 Register Reg = MO.getReg();
907 if (Reg == 0) continue;
908
909 if (Reg.isPhysical()) {
910 if (MO.isUse()) {
911 // If the physreg has no defs anywhere, it's just an ambient register
912 // and we can freely move its uses. Alternatively, if it's allocatable,
913 // it could get allocated to something with a def during allocation.
914 if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
915 return nullptr;
916 } else if (!MO.isDead()) {
917 // A def that isn't dead. We can't move it.
918 return nullptr;
919 }
920 } else {
921 // Virtual register uses are always safe to sink.
922 if (MO.isUse()) continue;
923
924 // If it's not safe to move defs of the register class, then abort.
925 if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
926 return nullptr;
927
928 // Virtual register defs can only be sunk if all their uses are in blocks
929 // dominated by one of the successors.
930 if (SuccToSinkTo) {
931 // If a previous operand picked a block to sink to, then this operand
932 // must be sinkable to the same block.
933 bool LocalUse = false;
934 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
935 BreakPHIEdge, LocalUse))
936 return nullptr;
937
938 continue;
939 }
940
941 // Otherwise, we should look at all the successors and decide which one
942 // we should sink to. If we have reliable block frequency information
943 // (frequency != 0) available, give successors with smaller frequencies
944 // higher priority, otherwise prioritize smaller cycle depths.
945 for (MachineBasicBlock *SuccBlock :
946 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
947 bool LocalUse = false;
948 if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
949 BreakPHIEdge, LocalUse)) {
950 SuccToSinkTo = SuccBlock;
951 break;
952 }
953 if (LocalUse)
954 // Def is used locally, it's never safe to move this def.
955 return nullptr;
956 }
957
958 // If we couldn't find a block to sink to, ignore this instruction.
959 if (!SuccToSinkTo)
960 return nullptr;
961 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
962 return nullptr;
963 }
964 }
965
966 // It is not possible to sink an instruction into its own block. This can
967 // happen with cycles.
968 if (MBB == SuccToSinkTo)
969 return nullptr;
970
971 // It's not safe to sink instructions to EH landing pad. Control flow into
972 // landing pad is implicitly defined.
973 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
974 return nullptr;
975
976 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
977 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
978 // the source block (which this code does not yet do). So for now, forbid
979 // doing so.
980 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
981 return nullptr;
982
983 return SuccToSinkTo;
984}
985
986/// Return true if MI is likely to be usable as a memory operation by the
987/// implicit null check optimization.
988///
989/// This is a "best effort" heuristic, and should not be relied upon for
990/// correctness. This returning true does not guarantee that the implicit null
991/// check optimization is legal over MI, and this returning false does not
992/// guarantee MI cannot possibly be used to do a null check.
994 const TargetInstrInfo *TII,
995 const TargetRegisterInfo *TRI) {
996 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
997
998 auto *MBB = MI.getParent();
999 if (MBB->pred_size() != 1)
1000 return false;
1001
1002 auto *PredMBB = *MBB->pred_begin();
1003 auto *PredBB = PredMBB->getBasicBlock();
1004
1005 // Frontends that don't use implicit null checks have no reason to emit
1006 // branches with make.implicit metadata, and this function should always
1007 // return false for them.
1008 if (!PredBB ||
1009 !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
1010 return false;
1011
1012 const MachineOperand *BaseOp;
1013 int64_t Offset;
1014 bool OffsetIsScalable;
1015 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1016 return false;
1017
1018 if (!BaseOp->isReg())
1019 return false;
1020
1021 if (!(MI.mayLoad() && !MI.isPredicable()))
1022 return false;
1023
1024 MachineBranchPredicate MBP;
1025 if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
1026 return false;
1027
1028 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1029 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1030 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1031 MBP.LHS.getReg() == BaseOp->getReg();
1032}
1033
1034/// If the sunk instruction is a copy, try to forward the copy instead of
1035/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1036/// there's any subregister weirdness involved. Returns true if copy
1037/// propagation occurred.
1038static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1039 Register Reg) {
1040 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1041 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1042
1043 // Copy DBG_VALUE operand and set the original to undef. We then check to
1044 // see whether this is something that can be copy-forwarded. If it isn't,
1045 // continue around the loop.
1046
1047 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1048 auto CopyOperands = TII.isCopyInstr(SinkInst);
1049 if (!CopyOperands)
1050 return false;
1051 SrcMO = CopyOperands->Source;
1052 DstMO = CopyOperands->Destination;
1053
1054 // Check validity of forwarding this copy.
1055 bool PostRA = MRI.getNumVirtRegs() == 0;
1056
1057 // Trying to forward between physical and virtual registers is too hard.
1058 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1059 return false;
1060
1061 // Only try virtual register copy-forwarding before regalloc, and physical
1062 // register copy-forwarding after regalloc.
1063 bool arePhysRegs = !Reg.isVirtual();
1064 if (arePhysRegs != PostRA)
1065 return false;
1066
1067 // Pre-regalloc, only forward if all subregisters agree (or there are no
1068 // subregs at all). More analysis might recover some forwardable copies.
1069 if (!PostRA)
1070 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1071 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1072 DbgMO.getSubReg() != DstMO->getSubReg())
1073 return false;
1074
1075 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1076 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1077 // matches the copy destination.
1078 if (PostRA && Reg != DstMO->getReg())
1079 return false;
1080
1081 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1082 DbgMO.setReg(SrcMO->getReg());
1083 DbgMO.setSubReg(SrcMO->getSubReg());
1084 }
1085 return true;
1086}
1087
1088using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
1089/// Sink an instruction and its associated debug instructions.
1090static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1092 ArrayRef<MIRegs> DbgValuesToSink) {
1093 // If we cannot find a location to use (merge with), then we erase the debug
1094 // location to prevent debug-info driven tools from potentially reporting
1095 // wrong location information.
1096 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1097 MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
1098 InsertPos->getDebugLoc()));
1099 else
1100 MI.setDebugLoc(DebugLoc());
1101
1102 // Move the instruction.
1103 MachineBasicBlock *ParentBlock = MI.getParent();
1104 SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
1106
1107 // Sink a copy of debug users to the insert position. Mark the original
1108 // DBG_VALUE location as 'undef', indicating that any earlier variable
1109 // location should be terminated as we've optimised away the value at this
1110 // point.
1111 for (const auto &DbgValueToSink : DbgValuesToSink) {
1112 MachineInstr *DbgMI = DbgValueToSink.first;
1113 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
1114 SuccToSinkTo.insert(InsertPos, NewDbgMI);
1115
1116 bool PropagatedAllSunkOps = true;
1117 for (unsigned Reg : DbgValueToSink.second) {
1118 if (DbgMI->hasDebugOperandForReg(Reg)) {
1119 if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
1120 PropagatedAllSunkOps = false;
1121 break;
1122 }
1123 }
1124 }
1125 if (!PropagatedAllSunkOps)
1126 DbgMI->setDebugValueUndef();
1127 }
1128}
1129
1130/// hasStoreBetween - check if there is store betweeen straight line blocks From
1131/// and To.
1132bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1134 // Make sure From and To are in straight line which means From dominates To
1135 // and To post dominates From.
1136 if (!DT->dominates(From, To) || !PDT->dominates(To, From))
1137 return true;
1138
1139 auto BlockPair = std::make_pair(From, To);
1140
1141 // Does these two blocks pair be queried before and have a definite cached
1142 // result?
1143 if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
1144 return HasStoreCache[BlockPair];
1145
1146 if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
1147 return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
1148 return I->mayAlias(AA, MI, false);
1149 });
1150
1151 bool SawStore = false;
1152 bool HasAliasedStore = false;
1153 DenseSet<MachineBasicBlock *> HandledBlocks;
1154 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1155 // Go through all reachable blocks from From.
1156 for (MachineBasicBlock *BB : depth_first(From)) {
1157 // We insert the instruction at the start of block To, so no need to worry
1158 // about stores inside To.
1159 // Store in block From should be already considered when just enter function
1160 // SinkInstruction.
1161 if (BB == To || BB == From)
1162 continue;
1163
1164 // We already handle this BB in previous iteration.
1165 if (HandledBlocks.count(BB))
1166 continue;
1167
1168 HandledBlocks.insert(BB);
1169 // To post dominates BB, it must be a path from block From.
1170 if (PDT->dominates(To, BB)) {
1171 if (!HandledDomBlocks.count(BB))
1172 HandledDomBlocks.insert(BB);
1173
1174 // If this BB is too big or the block number in straight line between From
1175 // and To is too big, stop searching to save compiling time.
1176 if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
1177 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1178 for (auto *DomBB : HandledDomBlocks) {
1179 if (DomBB != BB && DT->dominates(DomBB, BB))
1180 HasStoreCache[std::make_pair(DomBB, To)] = true;
1181 else if(DomBB != BB && DT->dominates(BB, DomBB))
1182 HasStoreCache[std::make_pair(From, DomBB)] = true;
1183 }
1184 HasStoreCache[BlockPair] = true;
1185 return true;
1186 }
1187
1188 for (MachineInstr &I : *BB) {
1189 // Treat as alias conservatively for a call or an ordered memory
1190 // operation.
1191 if (I.isCall() || I.hasOrderedMemoryRef()) {
1192 for (auto *DomBB : HandledDomBlocks) {
1193 if (DomBB != BB && DT->dominates(DomBB, BB))
1194 HasStoreCache[std::make_pair(DomBB, To)] = true;
1195 else if(DomBB != BB && DT->dominates(BB, DomBB))
1196 HasStoreCache[std::make_pair(From, DomBB)] = true;
1197 }
1198 HasStoreCache[BlockPair] = true;
1199 return true;
1200 }
1201
1202 if (I.mayStore()) {
1203 SawStore = true;
1204 // We still have chance to sink MI if all stores between are not
1205 // aliased to MI.
1206 // Cache all store instructions, so that we don't need to go through
1207 // all From reachable blocks for next load instruction.
1208 if (I.mayAlias(AA, MI, false))
1209 HasAliasedStore = true;
1210 StoreInstrCache[BlockPair].push_back(&I);
1211 }
1212 }
1213 }
1214 }
1215 // If there is no store at all, cache the result.
1216 if (!SawStore)
1217 HasStoreCache[BlockPair] = false;
1218 return HasAliasedStore;
1219}
1220
1221/// Sink instructions into cycles if profitable. This especially tries to
1222/// prevent register spills caused by register pressure if there is little to no
1223/// overhead moving instructions into cycles.
1224bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1225 LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1227 assert(Preheader && "Cycle sink needs a preheader block");
1228 MachineBasicBlock *SinkBlock = nullptr;
1229 bool CanSink = true;
1230 const MachineOperand &MO = I.getOperand(0);
1231
1232 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1233 LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
1234 if (!Cycle->contains(MI.getParent())) {
1235 LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
1236 CanSink = false;
1237 break;
1238 }
1239
1240 // FIXME: Come up with a proper cost model that estimates whether sinking
1241 // the instruction (and thus possibly executing it on every cycle
1242 // iteration) is more expensive than a register.
1243 // For now assumes that copies are cheap and thus almost always worth it.
1244 if (!MI.isCopy()) {
1245 LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
1246 CanSink = false;
1247 break;
1248 }
1249 if (!SinkBlock) {
1250 SinkBlock = MI.getParent();
1251 LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
1252 << printMBBReference(*SinkBlock) << "\n");
1253 continue;
1254 }
1255 SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1256 if (!SinkBlock) {
1257 LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
1258 CanSink = false;
1259 break;
1260 }
1261 LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
1262 printMBBReference(*SinkBlock) << "\n");
1263 }
1264
1265 if (!CanSink) {
1266 LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1267 return false;
1268 }
1269 if (!SinkBlock) {
1270 LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1271 return false;
1272 }
1273 if (SinkBlock == Preheader) {
1274 LLVM_DEBUG(
1275 dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1276 return false;
1277 }
1279 LLVM_DEBUG(
1280 dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1281 return false;
1282 }
1283
1284 LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1285 SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
1286 I);
1287
1288 // Conservatively clear any kill flags on uses of sunk instruction
1289 for (MachineOperand &MO : I.operands()) {
1290 if (MO.isReg() && MO.readsReg())
1291 RegsToClearKillFlags.insert(MO.getReg());
1292 }
1293
1294 // The instruction is moved from its basic block, so do not retain the
1295 // debug information.
1296 assert(!I.isDebugInstr() && "Should not sink debug inst");
1297 I.setDebugLoc(DebugLoc());
1298 return true;
1299}
1300
1301/// Return true if a target defined block prologue instruction interferes
1302/// with a sink candidate.
1306 const TargetRegisterInfo *TRI,
1307 const TargetInstrInfo *TII,
1308 const MachineRegisterInfo *MRI) {
1309 if (BB->begin() == End)
1310 return false; // no prologue
1311 for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
1312 // Only check target defined prologue instructions
1313 if (!TII->isBasicBlockPrologue(*PI))
1314 continue;
1315 for (auto &MO : MI.operands()) {
1316 if (!MO.isReg())
1317 continue;
1318 Register Reg = MO.getReg();
1319 if (!Reg)
1320 continue;
1321 if (MO.isUse()) {
1322 if (Reg.isPhysical() &&
1323 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
1324 continue;
1325 if (PI->modifiesRegister(Reg, TRI))
1326 return true;
1327 } else {
1328 if (PI->readsRegister(Reg, TRI))
1329 return true;
1330 // Check for interference with non-dead defs
1331 auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
1332 if (DefOp && !DefOp->isDead())
1333 return true;
1334 }
1335 }
1336 }
1337 return false;
1338}
1339
1340/// SinkInstruction - Determine whether it is safe to sink the specified machine
1341/// instruction out of its current block into a successor.
1342bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1343 AllSuccsCache &AllSuccessors) {
1344 // Don't sink instructions that the target prefers not to sink.
1345 if (!TII->shouldSink(MI))
1346 return false;
1347
1348 // Check if it's safe to move the instruction.
1349 if (!MI.isSafeToMove(AA, SawStore))
1350 return false;
1351
1352 // Convergent operations may not be made control-dependent on additional
1353 // values.
1354 if (MI.isConvergent())
1355 return false;
1356
1357 // Don't break implicit null checks. This is a performance heuristic, and not
1358 // required for correctness.
1360 return false;
1361
1362 // FIXME: This should include support for sinking instructions within the
1363 // block they are currently in to shorten the live ranges. We often get
1364 // instructions sunk into the top of a large block, but it would be better to
1365 // also sink them down before their first use in the block. This xform has to
1366 // be careful not to *increase* register pressure though, e.g. sinking
1367 // "x = y + z" down if it kills y and z would increase the live ranges of y
1368 // and z and only shrink the live range of x.
1369
1370 bool BreakPHIEdge = false;
1371 MachineBasicBlock *ParentBlock = MI.getParent();
1372 MachineBasicBlock *SuccToSinkTo =
1373 FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
1374
1375 // If there are no outputs, it must have side-effects.
1376 if (!SuccToSinkTo)
1377 return false;
1378
1379 // If the instruction to move defines a dead physical register which is live
1380 // when leaving the basic block, don't move it because it could turn into a
1381 // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
1382 for (const MachineOperand &MO : MI.all_defs()) {
1383 Register Reg = MO.getReg();
1384 if (Reg == 0 || !Reg.isPhysical())
1385 continue;
1386 if (SuccToSinkTo->isLiveIn(Reg))
1387 return false;
1388 }
1389
1390 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1391
1392 // If the block has multiple predecessors, this is a critical edge.
1393 // Decide if we can sink along it or need to break the edge.
1394 if (SuccToSinkTo->pred_size() > 1) {
1395 // We cannot sink a load across a critical edge - there may be stores in
1396 // other code paths.
1397 bool TryBreak = false;
1398 bool Store =
1399 MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
1400 if (!MI.isSafeToMove(AA, Store)) {
1401 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1402 TryBreak = true;
1403 }
1404
1405 // We don't want to sink across a critical edge if we don't dominate the
1406 // successor. We could be introducing calculations to new code paths.
1407 if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
1408 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1409 TryBreak = true;
1410 }
1411
1412 // Don't sink instructions into a cycle.
1413 if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
1414 (!CI->getCycle(SuccToSinkTo)->isReducible() ||
1415 CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1416 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1417 TryBreak = true;
1418 }
1419
1420 // Otherwise we are OK with sinking along a critical edge.
1421 if (!TryBreak)
1422 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1423 else {
1424 // Mark this edge as to be split.
1425 // If the edge can actually be split, the next iteration of the main loop
1426 // will sink MI in the newly created block.
1427 bool Status =
1428 PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
1429 if (!Status)
1430 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1431 "break critical edge\n");
1432 // The instruction will not be sunk this time.
1433 return false;
1434 }
1435 }
1436
1437 if (BreakPHIEdge) {
1438 // BreakPHIEdge is true if all the uses are in the successor MBB being
1439 // sunken into and they are all PHI nodes. In this case, machine-sink must
1440 // break the critical edge first.
1441 bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
1442 SuccToSinkTo, BreakPHIEdge);
1443 if (!Status)
1444 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1445 "break critical edge\n");
1446 // The instruction will not be sunk this time.
1447 return false;
1448 }
1449
1450 // Determine where to insert into. Skip phi nodes.
1451 MachineBasicBlock::iterator InsertPos =
1452 SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
1453 if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
1454 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1455 return false;
1456 }
1457
1458 // Collect debug users of any vreg that this inst defines.
1459 SmallVector<MIRegs, 4> DbgUsersToSink;
1460 for (auto &MO : MI.all_defs()) {
1461 if (!MO.getReg().isVirtual())
1462 continue;
1463 if (!SeenDbgUsers.count(MO.getReg()))
1464 continue;
1465
1466 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1467 auto &Users = SeenDbgUsers[MO.getReg()];
1468 for (auto &User : Users) {
1469 MachineInstr *DbgMI = User.getPointer();
1470 if (User.getInt()) {
1471 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1472 // it, it can't be recovered. Set it undef.
1473 if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
1474 DbgMI->setDebugValueUndef();
1475 } else {
1476 DbgUsersToSink.push_back(
1477 {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
1478 }
1479 }
1480 }
1481
1482 // After sinking, some debug users may not be dominated any more. If possible,
1483 // copy-propagate their operands. As it's expensive, don't do this if there's
1484 // no debuginfo in the program.
1485 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1486 SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
1487
1488 performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
1489
1490 // Conservatively, clear any kill flags, since it's possible that they are no
1491 // longer correct.
1492 // Note that we have to clear the kill flags for any register this instruction
1493 // uses as we may sink over another instruction which currently kills the
1494 // used registers.
1495 for (MachineOperand &MO : MI.all_uses())
1496 RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
1497
1498 return true;
1499}
1500
1501void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1502 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1503 assert(MI.isCopy());
1504 assert(MI.getOperand(1).isReg());
1505
1506 // Enumerate all users of vreg operands that are def'd. Skip those that will
1507 // be sunk. For the rest, if they are not dominated by the block we will sink
1508 // MI into, propagate the copy source to them.
1510 SmallVector<Register, 4> DbgUseRegs;
1511 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1512 for (auto &MO : MI.all_defs()) {
1513 if (!MO.getReg().isVirtual())
1514 continue;
1515 DbgUseRegs.push_back(MO.getReg());
1516 for (auto &User : MRI.use_instructions(MO.getReg())) {
1517 if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
1518 continue;
1519
1520 // If is in same block, will either sink or be use-before-def.
1521 if (User.getParent() == MI.getParent())
1522 continue;
1523
1524 assert(User.hasDebugOperandForReg(MO.getReg()) &&
1525 "DBG_VALUE user of vreg, but has no operand for it?");
1526 DbgDefUsers.push_back(&User);
1527 }
1528 }
1529
1530 // Point the users of this copy that are no longer dominated, at the source
1531 // of the copy.
1532 for (auto *User : DbgDefUsers) {
1533 for (auto &Reg : DbgUseRegs) {
1534 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
1535 DbgOp.setReg(MI.getOperand(1).getReg());
1536 DbgOp.setSubReg(MI.getOperand(1).getSubReg());
1537 }
1538 }
1539 }
1540}
1541
1542//===----------------------------------------------------------------------===//
1543// This pass is not intended to be a replacement or a complete alternative
1544// for the pre-ra machine sink pass. It is only designed to sink COPY
1545// instructions which should be handled after RA.
1546//
1547// This pass sinks COPY instructions into a successor block, if the COPY is not
1548// used in the current block and the COPY is live-in to a single successor
1549// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
1550// copy on paths where their results aren't needed. This also exposes
1551// additional opportunites for dead copy elimination and shrink wrapping.
1552//
1553// These copies were either not handled by or are inserted after the MachineSink
1554// pass. As an example of the former case, the MachineSink pass cannot sink
1555// COPY instructions with allocatable source registers; for AArch64 these type
1556// of copy instructions are frequently used to move function parameters (PhyReg)
1557// into virtual registers in the entry block.
1558//
1559// For the machine IR below, this pass will sink %w19 in the entry into its
1560// successor (%bb.1) because %w19 is only live-in in %bb.1.
1561// %bb.0:
1562// %wzr = SUBSWri %w1, 1
1563// %w19 = COPY %w0
1564// Bcc 11, %bb.2
1565// %bb.1:
1566// Live Ins: %w19
1567// BL @fun
1568// %w0 = ADDWrr %w0, %w19
1569// RET %w0
1570// %bb.2:
1571// %w0 = COPY %wzr
1572// RET %w0
1573// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
1574// able to see %bb.0 as a candidate.
1575//===----------------------------------------------------------------------===//
1576namespace {
1577
1578class PostRAMachineSinking : public MachineFunctionPass {
1579public:
1580 bool runOnMachineFunction(MachineFunction &MF) override;
1581
1582 static char ID;
1583 PostRAMachineSinking() : MachineFunctionPass(ID) {}
1584 StringRef getPassName() const override { return "PostRA Machine Sink"; }
1585
1586 void getAnalysisUsage(AnalysisUsage &AU) const override {
1587 AU.setPreservesCFG();
1589 }
1590
1593 MachineFunctionProperties::Property::NoVRegs);
1594 }
1595
1596private:
1597 /// Track which register units have been modified and used.
1598 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
1599
1600 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
1601 /// entry in this map for each unit it touches. The DBG_VALUE's entry
1602 /// consists of a pointer to the instruction itself, and a vector of registers
1603 /// referred to by the instruction that overlap the key register unit.
1605
1606 /// Sink Copy instructions unused in the same block close to their uses in
1607 /// successors.
1608 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
1609 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
1610};
1611} // namespace
1612
1613char PostRAMachineSinking::ID = 0;
1614char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
1615
1616INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
1617 "PostRA Machine Sink", false, false)
1618
1619static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
1621 LiveRegUnits LiveInRegUnits(*TRI);
1622 LiveInRegUnits.addLiveIns(MBB);
1623 return !LiveInRegUnits.available(Reg);
1624}
1625
1626static MachineBasicBlock *
1628 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1629 unsigned Reg, const TargetRegisterInfo *TRI) {
1630 // Try to find a single sinkable successor in which Reg is live-in.
1631 MachineBasicBlock *BB = nullptr;
1632 for (auto *SI : SinkableBBs) {
1633 if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
1634 // If BB is set here, Reg is live-in to at least two sinkable successors,
1635 // so quit.
1636 if (BB)
1637 return nullptr;
1638 BB = SI;
1639 }
1640 }
1641 // Reg is not live-in to any sinkable successors.
1642 if (!BB)
1643 return nullptr;
1644
1645 // Check if any register aliased with Reg is live-in in other successors.
1646 for (auto *SI : CurBB.successors()) {
1647 if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
1648 return nullptr;
1649 }
1650 return BB;
1651}
1652
1653static MachineBasicBlock *
1655 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1656 ArrayRef<unsigned> DefedRegsInCopy,
1657 const TargetRegisterInfo *TRI) {
1658 MachineBasicBlock *SingleBB = nullptr;
1659 for (auto DefReg : DefedRegsInCopy) {
1660 MachineBasicBlock *BB =
1661 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
1662 if (!BB || (SingleBB && SingleBB != BB))
1663 return nullptr;
1664 SingleBB = BB;
1665 }
1666 return SingleBB;
1667}
1668
1670 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1671 LiveRegUnits &UsedRegUnits,
1672 const TargetRegisterInfo *TRI) {
1673 for (auto U : UsedOpsInCopy) {
1674 MachineOperand &MO = MI->getOperand(U);
1675 Register SrcReg = MO.getReg();
1676 if (!UsedRegUnits.available(SrcReg)) {
1677 MachineBasicBlock::iterator NI = std::next(MI->getIterator());
1678 for (MachineInstr &UI : make_range(NI, CurBB.end())) {
1679 if (UI.killsRegister(SrcReg, TRI)) {
1680 UI.clearRegisterKills(SrcReg, TRI);
1681 MO.setIsKill(true);
1682 break;
1683 }
1684 }
1685 }
1686 }
1687}
1688
1690 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1691 SmallVectorImpl<unsigned> &DefedRegsInCopy) {
1692 MachineFunction &MF = *SuccBB->getParent();
1694 for (unsigned DefReg : DefedRegsInCopy)
1695 for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
1696 SuccBB->removeLiveIn(S);
1697 for (auto U : UsedOpsInCopy) {
1698 Register SrcReg = MI->getOperand(U).getReg();
1699 LaneBitmask Mask;
1700 for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
1701 Mask |= (*S).second;
1702 }
1703 SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
1704 }
1705 SuccBB->sortUniqueLiveIns();
1706}
1707
1709 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1710 SmallVectorImpl<unsigned> &DefedRegsInCopy,
1711 LiveRegUnits &ModifiedRegUnits,
1712 LiveRegUnits &UsedRegUnits) {
1713 bool HasRegDependency = false;
1714 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1715 MachineOperand &MO = MI->getOperand(i);
1716 if (!MO.isReg())
1717 continue;
1718 Register Reg = MO.getReg();
1719 if (!Reg)
1720 continue;
1721 if (MO.isDef()) {
1722 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
1723 HasRegDependency = true;
1724 break;
1725 }
1726 DefedRegsInCopy.push_back(Reg);
1727
1728 // FIXME: instead of isUse(), readsReg() would be a better fix here,
1729 // For example, we can ignore modifications in reg with undef. However,
1730 // it's not perfectly clear if skipping the internal read is safe in all
1731 // other targets.
1732 } else if (MO.isUse()) {
1733 if (!ModifiedRegUnits.available(Reg)) {
1734 HasRegDependency = true;
1735 break;
1736 }
1737 UsedOpsInCopy.push_back(i);
1738 }
1739 }
1740 return HasRegDependency;
1741}
1742
1743bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
1744 MachineFunction &MF,
1745 const TargetRegisterInfo *TRI,
1746 const TargetInstrInfo *TII) {
1748 // FIXME: For now, we sink only to a successor which has a single predecessor
1749 // so that we can directly sink COPY instructions to the successor without
1750 // adding any new block or branch instruction.
1751 for (MachineBasicBlock *SI : CurBB.successors())
1752 if (!SI->livein_empty() && SI->pred_size() == 1)
1753 SinkableBBs.insert(SI);
1754
1755 if (SinkableBBs.empty())
1756 return false;
1757
1758 bool Changed = false;
1759
1760 // Track which registers have been modified and used between the end of the
1761 // block and the current instruction.
1762 ModifiedRegUnits.clear();
1763 UsedRegUnits.clear();
1764 SeenDbgInstrs.clear();
1765
1767 // Track the operand index for use in Copy.
1768 SmallVector<unsigned, 2> UsedOpsInCopy;
1769 // Track the register number defed in Copy.
1770 SmallVector<unsigned, 2> DefedRegsInCopy;
1771
1772 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
1773 // for DBG_VALUEs later, record them when they're encountered.
1774 if (MI.isDebugValue() && !MI.isDebugRef()) {
1776 bool IsValid = true;
1777 for (MachineOperand &MO : MI.debug_operands()) {
1778 if (MO.isReg() && MO.getReg().isPhysical()) {
1779 // Bail if we can already tell the sink would be rejected, rather
1780 // than needlessly accumulating lots of DBG_VALUEs.
1781 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1782 ModifiedRegUnits, UsedRegUnits)) {
1783 IsValid = false;
1784 break;
1785 }
1786
1787 // Record debug use of each reg unit.
1788 for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
1789 ++RI)
1790 MIUnits[*RI].push_back(MO.getReg());
1791 }
1792 }
1793 if (IsValid) {
1794 for (auto &RegOps : MIUnits)
1795 SeenDbgInstrs[RegOps.first].emplace_back(&MI,
1796 std::move(RegOps.second));
1797 }
1798 continue;
1799 }
1800
1801 if (MI.isDebugOrPseudoInstr())
1802 continue;
1803
1804 // Do not move any instruction across function call.
1805 if (MI.isCall())
1806 return false;
1807
1808 if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
1809 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1810 TRI);
1811 continue;
1812 }
1813
1814 // Don't sink the COPY if it would violate a register dependency.
1815 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1816 ModifiedRegUnits, UsedRegUnits)) {
1817 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1818 TRI);
1819 continue;
1820 }
1821 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
1822 "Unexpect SrcReg or DefReg");
1823 MachineBasicBlock *SuccBB =
1824 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
1825 // Don't sink if we cannot find a single sinkable successor in which Reg
1826 // is live-in.
1827 if (!SuccBB) {
1828 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1829 TRI);
1830 continue;
1831 }
1832 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
1833 "Unexpected predecessor");
1834
1835 // Collect DBG_VALUEs that must sink with this copy. We've previously
1836 // recorded which reg units that DBG_VALUEs read, if this instruction
1837 // writes any of those units then the corresponding DBG_VALUEs must sink.
1839 for (auto &MO : MI.all_defs()) {
1840 for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
1841 for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
1842 auto &Regs = DbgValsToSinkMap[MIRegs.first];
1843 for (unsigned Reg : MIRegs.second)
1844 Regs.push_back(Reg);
1845 }
1846 }
1847 }
1848 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
1849
1850 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
1851
1852 MachineBasicBlock::iterator InsertPos =
1853 SuccBB->SkipPHIsAndLabels(SuccBB->begin());
1854 if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
1855 LLVM_DEBUG(
1856 dbgs() << " *** Not sinking: prologue interference\n");
1857 continue;
1858 }
1859
1860 // Clear the kill flag if SrcReg is killed between MI and the end of the
1861 // block.
1862 clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
1863 performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
1864 updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
1865
1866 Changed = true;
1867 ++NumPostRACopySink;
1868 }
1869 return Changed;
1870}
1871
1872bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
1873 if (skipFunction(MF.getFunction()))
1874 return false;
1875
1876 bool Changed = false;
1879
1880 ModifiedRegUnits.init(*TRI);
1881 UsedRegUnits.init(*TRI);
1882 for (auto &BB : MF)
1883 Changed |= tryToSinkCopy(BB, MF, TRI, TII);
1884
1885 return Changed;
1886}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
basic Basic Alias true
BlockVerifier::State From
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
bool End
Definition: ELF_riscv.cpp:464
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< unsigned > SinkLoadInstsPerBlockThreshold("machine-sink-load-instrs-threshold", cl::desc("Do not try to find alias store for a load if there is a in-path " "block whose instruction number is higher than this threshold."), cl::init(2000), cl::Hidden)
static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, SmallVectorImpl< unsigned > &UsedOpsInCopy, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
unsigned const TargetRegisterInfo * TRI
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, ArrayRef< MIRegs > DbgValuesToSink)
Sink an instruction and its associated debug instructions.
static cl::opt< bool > SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden)
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Return true if MI is likely to be usable as a memory operation by the implicit null check optimizatio...
static cl::opt< bool > SinkInstsIntoCycle("sink-insts-to-avoid-spills", cl::desc("Sink instructions into cycles to avoid " "register spills"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > SinkLoadBlocksThreshold("machine-sink-load-blocks-threshold", cl::desc("Do not try to find alias store for a load if the block number in " "the straight line is higher than this threshold."), cl::init(20), cl::Hidden)
unsigned Reg
Machine code sinking
static bool hasRegisterDependency(MachineInstr *MI, SmallVectorImpl< unsigned > &UsedOpsInCopy, SmallVectorImpl< unsigned > &DefedRegsInCopy, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits)
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, SmallVectorImpl< unsigned > &UsedOpsInCopy, SmallVectorImpl< unsigned > &DefedRegsInCopy)
static bool blockPrologueInterferes(MachineBasicBlock *BB, MachineBasicBlock::iterator End, MachineInstr &MI, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, const MachineRegisterInfo *MRI)
Return true if a target defined block prologue instruction interferes with a sink candidate.
static cl::opt< unsigned > SplitEdgeProbabilityThreshold("machine-sink-split-probability-threshold", cl::desc("Percentage threshold for splitting single-instruction critical edge. " "If the branch threshold is higher than this threshold, we allow " "speculative execution of up to 1 instruction to avoid branching to " "splitted critical edge"), cl::init(40), cl::Hidden)
static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, Register Reg)
If the sunk instruction is a copy, try to forward the copy instead of leaving an 'undef' DBG_VALUE in...
static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl< MachineBasicBlock * > &SinkableBBs, unsigned Reg, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > SinkIntoCycleLimit("machine-sink-cycle-limit", cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden)
#define DEBUG_TYPE
Definition: MachineSink.cpp:65
static cl::opt< bool > UseBlockFreqInfo("machine-sink-bfi", cl::desc("Use block frequency info to find successors to sink"), cl::init(true), cl::Hidden)
std::pair< MachineInstr *, SmallVector< unsigned, 2 > > MIRegs
This file implements a map that provides insertion order iteration.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the PointerIntPair class.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
Definition: Sink.cpp:175
static bool SinkInstruction(Instruction *Inst, SmallPtrSetImpl< Instruction * > &Stores, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
SinkInstruction - Determine whether it is safe to sink the specified machine instruction out of its c...
Definition: Sink.cpp:103
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
A debug info location.
Definition: DebugLoc.h:33
Identifies a unique instance of a variable.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Base class for the actual dominator tree node.
iterator_range< iterator > children()
const_toplevel_iterator toplevel_end() const
unsigned getCycleDepth(const BlockT *Block) const
get the depth for the cycle which containing a given block.
const_toplevel_iterator toplevel_begin() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
bool isReducible() const
Whether the cycle is a natural loop.
BlockT * getCyclePreheader() const
Return the preheader block for this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool shouldSink(const MachineInstr &MI) const override
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
unsigned succ_size() const
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
bool sizeWithoutDebugLargerThan(unsigned Limit) const
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
void onEdgeSplit(const MachineBasicBlock &NewPredecessor, const MachineBasicBlock &NewSuccessor, const MachineBranchProbabilityInfo &MBPI)
incrementally calculate block frequencies when we split edges, to avoid full CFG traversal.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Legacy analysis pass which computes a MachineCycleInfo.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
bool isReachableFromEntry(const MachineBasicBlock *A)
isReachableFromEntry - Return true if A is dominated by the entry block of the function containing it...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Representation of each machine instruction.
Definition: MachineInstr.h:68
static iterator_range< filter_iterator< Operand *, std::function< bool(Operand &Op)> > > getDebugOperandsForReg(Instruction *MI, Register Reg)
Returns a range of all of the operands that correspond to a debug use of Reg.
Definition: MachineInstr.h:573
bool hasDebugOperandForReg(Register Reg) const
Returns whether this debug value has at least one debug operand with the register Reg.
Definition: MachineInstr.h:562
void setDebugValueUndef()
Sets all register debug operands in this debug value instruction to be undef.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:320
bool isCopyLike() const
Return true if the instruction behaves like a copy.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:533
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
VectorType takeVector()
Clear the MapVector and return the underlying vector.
Definition: MapVector.h:56
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
PointerIntPair - This class implements a pair of a pointer and small integer.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
A vector that has set insertion semantics.
Definition: SetVector.h:51
void clear()
Completely clear the SetVector.
Definition: SetVector.h:224
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
void stable_sort(R &&Range)
Definition: STLExtras.h:2063
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I)
char & MachineSinkingID
MachineSinking - This pass performs sinking on machine instructions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
char & PostRAMachineSinkingID
This pass perform post-ra machine sink for COPY instructions.
void initializeMachineSinkingPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
TODO: Might pack better if we changed this to a Struct of Arrays, since MachineOperand is width 32,...
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
Represents a predicate at the MachineFunction level.