LLVM 17.0.0git
MachineSink.cpp
Go to the documentation of this file.
1//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instructions into successor blocks when possible, so that
10// they aren't executed on paths where their results aren't needed.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for an LLVM-IR-level sinking pass. It is only designed to sink simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/DenseSet.h"
20#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/SetVector.h"
24#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/CFG.h"
46#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/LLVMContext.h"
51#include "llvm/Pass.h"
54#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <cstdint>
59#include <map>
60#include <utility>
61#include <vector>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "machine-sink"
66
67static cl::opt<bool>
68SplitEdges("machine-sink-split",
69 cl::desc("Split critical edges during machine sinking"),
70 cl::init(true), cl::Hidden);
71
72static cl::opt<bool>
73UseBlockFreqInfo("machine-sink-bfi",
74 cl::desc("Use block frequency info to find successors to sink"),
75 cl::init(true), cl::Hidden);
76
78 "machine-sink-split-probability-threshold",
80 "Percentage threshold for splitting single-instruction critical edge. "
81 "If the branch threshold is higher than this threshold, we allow "
82 "speculative execution of up to 1 instruction to avoid branching to "
83 "splitted critical edge"),
84 cl::init(40), cl::Hidden);
85
87 "machine-sink-load-instrs-threshold",
88 cl::desc("Do not try to find alias store for a load if there is a in-path "
89 "block whose instruction number is higher than this threshold."),
90 cl::init(2000), cl::Hidden);
91
93 "machine-sink-load-blocks-threshold",
94 cl::desc("Do not try to find alias store for a load if the block number in "
95 "the straight line is higher than this threshold."),
96 cl::init(20), cl::Hidden);
97
98static cl::opt<bool>
99 SinkInstsIntoCycle("sink-insts-to-avoid-spills",
100 cl::desc("Sink instructions into cycles to avoid "
101 "register spills"),
102 cl::init(false), cl::Hidden);
103
105 "machine-sink-cycle-limit",
106 cl::desc("The maximum number of instructions considered for cycle sinking."),
107 cl::init(50), cl::Hidden);
108
109STATISTIC(NumSunk, "Number of machine instructions sunk");
110STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
111STATISTIC(NumSplit, "Number of critical edges split");
112STATISTIC(NumCoalesces, "Number of copies coalesced");
113STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
114
115namespace {
116
117 class MachineSinking : public MachineFunctionPass {
118 const TargetInstrInfo *TII;
119 const TargetRegisterInfo *TRI;
120 MachineRegisterInfo *MRI; // Machine register information
121 MachineDominatorTree *DT; // Machine dominator tree
122 MachinePostDominatorTree *PDT; // Machine post dominator tree
126 AliasAnalysis *AA;
127 RegisterClassInfo RegClassInfo;
128
129 // Remember which edges have been considered for breaking.
131 CEBCandidates;
132 // Remember which edges we are about to split.
133 // This is different from CEBCandidates since those edges
134 // will be split.
136
137 DenseSet<Register> RegsToClearKillFlags;
138
139 using AllSuccsCache =
140 std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
141
142 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
143 /// post-dominated by another DBG_VALUE of the same variable location.
144 /// This is necessary to detect sequences such as:
145 /// %0 = someinst
146 /// DBG_VALUE %0, !123, !DIExpression()
147 /// %1 = anotherinst
148 /// DBG_VALUE %1, !123, !DIExpression()
149 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
150 /// would re-order assignments.
151 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
152
153 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
154 /// debug instructions to sink.
156
157 /// Record of debug variables that have had their locations set in the
158 /// current block.
159 DenseSet<DebugVariable> SeenDbgVars;
160
161 std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
162 HasStoreCache;
163 std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
164 std::vector<MachineInstr *>>
165 StoreInstrCache;
166
167 /// Cached BB's register pressure.
168 std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
169
170 public:
171 static char ID; // Pass identification
172
173 MachineSinking() : MachineFunctionPass(ID) {
175 }
176
177 bool runOnMachineFunction(MachineFunction &MF) override;
178
179 void getAnalysisUsage(AnalysisUsage &AU) const override {
190 }
191
192 void releaseMemory() override {
193 CEBCandidates.clear();
194 }
195
196 private:
198 void ProcessDbgInst(MachineInstr &MI);
199 bool isWorthBreakingCriticalEdge(MachineInstr &MI,
202
203 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
205
206 /// Postpone the splitting of the given critical
207 /// edge (\p From, \p To).
208 ///
209 /// We do not split the edges on the fly. Indeed, this invalidates
210 /// the dominance information and thus triggers a lot of updates
211 /// of that information underneath.
212 /// Instead, we postpone all the splits after each iteration of
213 /// the main loop. That way, the information is at least valid
214 /// for the lifetime of an iteration.
215 ///
216 /// \return True if the edge is marked as toSplit, false otherwise.
217 /// False can be returned if, for instance, this is not profitable.
218 bool PostponeSplitCriticalEdge(MachineInstr &MI,
221 bool BreakPHIEdge);
222 bool SinkInstruction(MachineInstr &MI, bool &SawStore,
223 AllSuccsCache &AllSuccessors);
224
225 /// If we sink a COPY inst, some debug users of it's destination may no
226 /// longer be dominated by the COPY, and will eventually be dropped.
227 /// This is easily rectified by forwarding the non-dominated debug uses
228 /// to the copy source.
229 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
230 MachineBasicBlock *TargetBlock);
231 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
232 MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
233 bool &LocalUse) const;
235 bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
236
237 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
239 bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
240
241 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
243 MachineBasicBlock *SuccToSinkTo,
244 AllSuccsCache &AllSuccessors);
245
246 bool PerformTrivialForwardCoalescing(MachineInstr &MI,
248
250 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
251 AllSuccsCache &AllSuccessors) const;
252
253 std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
254 };
255
256} // end anonymous namespace
257
258char MachineSinking::ID = 0;
259
260char &llvm::MachineSinkingID = MachineSinking::ID;
261
263 "Machine code sinking", false, false)
269 "Machine code sinking", false, false)
270
271bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
273 if (!MI.isCopy())
274 return false;
275
276 Register SrcReg = MI.getOperand(1).getReg();
277 Register DstReg = MI.getOperand(0).getReg();
278 if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
279 !MRI->hasOneNonDBGUse(SrcReg))
280 return false;
281
282 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
283 const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
284 if (SRC != DRC)
285 return false;
286
287 MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
288 if (DefMI->isCopyLike())
289 return false;
290 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
291 LLVM_DEBUG(dbgs() << "*** to: " << MI);
292 MRI->replaceRegWith(DstReg, SrcReg);
293 MI.eraseFromParent();
294
295 // Conservatively, clear any kill flags, since it's possible that they are no
296 // longer correct.
297 MRI->clearKillFlags(SrcReg);
298
299 ++NumCoalesces;
300 return true;
301}
302
303/// AllUsesDominatedByBlock - Return true if all uses of the specified register
304/// occur in blocks dominated by the specified block. If any use is in the
305/// definition block, then return false since it is never legal to move def
306/// after uses.
307bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
309 MachineBasicBlock *DefMBB,
310 bool &BreakPHIEdge,
311 bool &LocalUse) const {
312 assert(Reg.isVirtual() && "Only makes sense for vregs");
313
314 // Ignore debug uses because debug info doesn't affect the code.
315 if (MRI->use_nodbg_empty(Reg))
316 return true;
317
318 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
319 // into and they are all PHI nodes. In this case, machine-sink must break
320 // the critical edge first. e.g.
321 //
322 // %bb.1:
323 // Predecessors according to CFG: %bb.0
324 // ...
325 // %def = DEC64_32r %x, implicit-def dead %eflags
326 // ...
327 // JE_4 <%bb.37>, implicit %eflags
328 // Successors according to CFG: %bb.37 %bb.2
329 //
330 // %bb.2:
331 // %p = PHI %y, %bb.0, %def, %bb.1
332 if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
333 MachineInstr *UseInst = MO.getParent();
334 unsigned OpNo = UseInst->getOperandNo(&MO);
335 MachineBasicBlock *UseBlock = UseInst->getParent();
336 return UseBlock == MBB && UseInst->isPHI() &&
337 UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
338 })) {
339 BreakPHIEdge = true;
340 return true;
341 }
342
343 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
344 // Determine the block of the use.
345 MachineInstr *UseInst = MO.getParent();
346 unsigned OpNo = &MO - &UseInst->getOperand(0);
347 MachineBasicBlock *UseBlock = UseInst->getParent();
348 if (UseInst->isPHI()) {
349 // PHI nodes use the operand in the predecessor block, not the block with
350 // the PHI.
351 UseBlock = UseInst->getOperand(OpNo+1).getMBB();
352 } else if (UseBlock == DefMBB) {
353 LocalUse = true;
354 return false;
355 }
356
357 // Check that it dominates.
358 if (!DT->dominates(MBB, UseBlock))
359 return false;
360 }
361
362 return true;
363}
364
365/// Return true if this machine instruction loads from global offset table or
366/// constant pool.
368 assert(MI.mayLoad() && "Expected MI that loads!");
369
370 // If we lost memory operands, conservatively assume that the instruction
371 // reads from everything..
372 if (MI.memoperands_empty())
373 return true;
374
375 for (MachineMemOperand *MemOp : MI.memoperands())
376 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
377 if (PSV->isGOT() || PSV->isConstantPool())
378 return true;
379
380 return false;
381}
382
383void MachineSinking::FindCycleSinkCandidates(
386 for (auto &MI : *BB) {
387 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
388 if (!TII->shouldSink(MI)) {
389 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
390 "target\n");
391 continue;
392 }
393 if (!isCycleInvariant(Cycle, MI)) {
394 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
395 continue;
396 }
397 bool DontMoveAcrossStore = true;
398 if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
399 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
400 continue;
401 }
402 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
403 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
404 continue;
405 }
406 if (MI.isConvergent())
407 continue;
408
409 const MachineOperand &MO = MI.getOperand(0);
410 if (!MO.isReg() || !MO.getReg() || !MO.isDef())
411 continue;
412 if (!MRI->hasOneDef(MO.getReg()))
413 continue;
414
415 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
416 Candidates.push_back(&MI);
417 }
418}
419
420bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
421 if (skipFunction(MF.getFunction()))
422 return false;
423
424 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
425
428 MRI = &MF.getRegInfo();
429 DT = &getAnalysis<MachineDominatorTree>();
430 PDT = &getAnalysis<MachinePostDominatorTree>();
431 CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
432 MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
433 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
434 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
435 RegClassInfo.runOnMachineFunction(MF);
436
437 bool EverMadeChange = false;
438
439 while (true) {
440 bool MadeChange = false;
441
442 // Process all basic blocks.
443 CEBCandidates.clear();
444 ToSplit.clear();
445 for (auto &MBB: MF)
446 MadeChange |= ProcessBlock(MBB);
447
448 // If we have anything we marked as toSplit, split it now.
449 for (const auto &Pair : ToSplit) {
450 auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
451 if (NewSucc != nullptr) {
452 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
453 << printMBBReference(*Pair.first) << " -- "
454 << printMBBReference(*NewSucc) << " -- "
455 << printMBBReference(*Pair.second) << '\n');
456 if (MBFI)
457 MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
458
459 MadeChange = true;
460 ++NumSplit;
461 } else
462 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
463 }
464 // If this iteration over the code changed anything, keep iterating.
465 if (!MadeChange) break;
466 EverMadeChange = true;
467 }
468
469 if (SinkInstsIntoCycle) {
471 CI->toplevel_end());
472 for (auto *Cycle : Cycles) {
474 if (!Preheader) {
475 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
476 continue;
477 }
479 FindCycleSinkCandidates(Cycle, Preheader, Candidates);
480
481 // Walk the candidates in reverse order so that we start with the use
482 // of a def-use chain, if there is any.
483 // TODO: Sort the candidates using a cost-model.
484 unsigned i = 0;
485 for (MachineInstr *I : llvm::reverse(Candidates)) {
486 if (i++ == SinkIntoCycleLimit) {
487 LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
488 "be analysed.");
489 break;
490 }
491
492 if (!SinkIntoCycle(Cycle, *I))
493 break;
494 EverMadeChange = true;
495 ++NumCycleSunk;
496 }
497 }
498 }
499
500 HasStoreCache.clear();
501 StoreInstrCache.clear();
502
503 // Now clear any kill flags for recorded registers.
504 for (auto I : RegsToClearKillFlags)
505 MRI->clearKillFlags(I);
506 RegsToClearKillFlags.clear();
507
508 return EverMadeChange;
509}
510
511bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
512 // Can't sink anything out of a block that has less than two successors.
513 if (MBB.succ_size() <= 1 || MBB.empty()) return false;
514
515 // Don't bother sinking code out of unreachable blocks. In addition to being
516 // unprofitable, it can also lead to infinite looping, because in an
517 // unreachable cycle there may be nowhere to stop.
518 if (!DT->isReachableFromEntry(&MBB)) return false;
519
520 bool MadeChange = false;
521
522 // Cache all successors, sorted by frequency info and cycle depth.
523 AllSuccsCache AllSuccessors;
524
525 // Walk the basic block bottom-up. Remember if we saw a store.
527 --I;
528 bool ProcessedBegin, SawStore = false;
529 do {
530 MachineInstr &MI = *I; // The instruction to sink.
531
532 // Predecrement I (if it's not begin) so that it isn't invalidated by
533 // sinking.
534 ProcessedBegin = I == MBB.begin();
535 if (!ProcessedBegin)
536 --I;
537
538 if (MI.isDebugOrPseudoInstr()) {
539 if (MI.isDebugValue())
540 ProcessDbgInst(MI);
541 continue;
542 }
543
544 bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
545 if (Joined) {
546 MadeChange = true;
547 continue;
548 }
549
550 if (SinkInstruction(MI, SawStore, AllSuccessors)) {
551 ++NumSunk;
552 MadeChange = true;
553 }
554
555 // If we just processed the first instruction in the block, we're done.
556 } while (!ProcessedBegin);
557
558 SeenDbgUsers.clear();
559 SeenDbgVars.clear();
560 // recalculate the bb register pressure after sinking one BB.
561 CachedRegisterPressure.clear();
562
563 return MadeChange;
564}
565
566void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
567 // When we see DBG_VALUEs for registers, record any vreg it reads, so that
568 // we know what to sink if the vreg def sinks.
569 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
570
571 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
572 MI.getDebugLoc()->getInlinedAt());
573 bool SeenBefore = SeenDbgVars.contains(Var);
574
575 for (MachineOperand &MO : MI.debug_operands()) {
576 if (MO.isReg() && MO.getReg().isVirtual())
577 SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
578 }
579
580 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
581 SeenDbgVars.insert(Var);
582}
583
584bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
586 MachineBasicBlock *To) {
587 // FIXME: Need much better heuristics.
588
589 // If the pass has already considered breaking this edge (during this pass
590 // through the function), then let's go ahead and break it. This means
591 // sinking multiple "cheap" instructions into the same block.
592 if (!CEBCandidates.insert(std::make_pair(From, To)).second)
593 return true;
594
595 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
596 return true;
597
598 if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
600 return true;
601
602 // MI is cheap, we probably don't want to break the critical edge for it.
603 // However, if this would allow some definitions of its source operands
604 // to be sunk then it's probably worth it.
605 for (const MachineOperand &MO : MI.operands()) {
606 if (!MO.isReg() || !MO.isUse())
607 continue;
608 Register Reg = MO.getReg();
609 if (Reg == 0)
610 continue;
611
612 // We don't move live definitions of physical registers,
613 // so sinking their uses won't enable any opportunities.
614 if (Reg.isPhysical())
615 continue;
616
617 // If this instruction is the only user of a virtual register,
618 // check if breaking the edge will enable sinking
619 // both this instruction and the defining instruction.
620 if (MRI->hasOneNonDBGUse(Reg)) {
621 // If the definition resides in same MBB,
622 // claim it's likely we can sink these together.
623 // If definition resides elsewhere, we aren't
624 // blocking it from being sunk so don't break the edge.
625 MachineInstr *DefMI = MRI->getVRegDef(Reg);
626 if (DefMI->getParent() == MI.getParent())
627 return true;
628 }
629 }
630
631 return false;
632}
633
634bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
635 MachineBasicBlock *FromBB,
636 MachineBasicBlock *ToBB,
637 bool BreakPHIEdge) {
638 if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
639 return false;
640
641 // Avoid breaking back edge. From == To means backedge for single BB cycle.
642 if (!SplitEdges || FromBB == ToBB)
643 return false;
644
645 MachineCycle *FromCycle = CI->getCycle(FromBB);
646 MachineCycle *ToCycle = CI->getCycle(ToBB);
647
648 // Check for backedges of more "complex" cycles.
649 if (FromCycle == ToCycle && FromCycle &&
650 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
651 return false;
652
653 // It's not always legal to break critical edges and sink the computation
654 // to the edge.
655 //
656 // %bb.1:
657 // v1024
658 // Beq %bb.3
659 // <fallthrough>
660 // %bb.2:
661 // ... no uses of v1024
662 // <fallthrough>
663 // %bb.3:
664 // ...
665 // = v1024
666 //
667 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
668 //
669 // %bb.1:
670 // ...
671 // Bne %bb.2
672 // %bb.4:
673 // v1024 =
674 // B %bb.3
675 // %bb.2:
676 // ... no uses of v1024
677 // <fallthrough>
678 // %bb.3:
679 // ...
680 // = v1024
681 //
682 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
683 // flow. We need to ensure the new basic block where the computation is
684 // sunk to dominates all the uses.
685 // It's only legal to break critical edge and sink the computation to the
686 // new block if all the predecessors of "To", except for "From", are
687 // not dominated by "From". Given SSA property, this means these
688 // predecessors are dominated by "To".
689 //
690 // There is no need to do this check if all the uses are PHI nodes. PHI
691 // sources are only defined on the specific predecessor edges.
692 if (!BreakPHIEdge) {
693 for (MachineBasicBlock *Pred : ToBB->predecessors())
694 if (Pred != FromBB && !DT->dominates(ToBB, Pred))
695 return false;
696 }
697
698 ToSplit.insert(std::make_pair(FromBB, ToBB));
699
700 return true;
701}
702
703std::vector<unsigned> &
704MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
705 // Currently to save compiling time, MBB's register pressure will not change
706 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
707 // register pressure is changed after sinking any instructions into it.
708 // FIXME: need a accurate and cheap register pressure estiminate model here.
709 auto RP = CachedRegisterPressure.find(&MBB);
710 if (RP != CachedRegisterPressure.end())
711 return RP->second;
712
713 RegionPressure Pressure;
714 RegPressureTracker RPTracker(Pressure);
715
716 // Initialize the register pressure tracker.
717 RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
718 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
719
721 MIE = MBB.instr_begin();
722 MII != MIE; --MII) {
723 MachineInstr &MI = *std::prev(MII);
724 if (MI.isDebugInstr() || MI.isPseudoProbe())
725 continue;
726 RegisterOperands RegOpers;
727 RegOpers.collect(MI, *TRI, *MRI, false, false);
728 RPTracker.recedeSkipDebugValues();
729 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
730 RPTracker.recede(RegOpers);
731 }
732
733 RPTracker.closeRegion();
734 auto It = CachedRegisterPressure.insert(
735 std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
736 return It.first->second;
737}
738
739/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
740bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
742 MachineBasicBlock *SuccToSinkTo,
743 AllSuccsCache &AllSuccessors) {
744 assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
745
746 if (MBB == SuccToSinkTo)
747 return false;
748
749 // It is profitable if SuccToSinkTo does not post dominate current block.
750 if (!PDT->dominates(SuccToSinkTo, MBB))
751 return true;
752
753 // It is profitable to sink an instruction from a deeper cycle to a shallower
754 // cycle, even if the latter post-dominates the former (PR21115).
755 if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
756 return true;
757
758 // Check if only use in post dominated block is PHI instruction.
759 bool NonPHIUse = false;
760 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
761 MachineBasicBlock *UseBlock = UseInst.getParent();
762 if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
763 NonPHIUse = true;
764 }
765 if (!NonPHIUse)
766 return true;
767
768 // If SuccToSinkTo post dominates then also it may be profitable if MI
769 // can further profitably sinked into another block in next round.
770 bool BreakPHIEdge = false;
771 // FIXME - If finding successor is compile time expensive then cache results.
772 if (MachineBasicBlock *MBB2 =
773 FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
774 return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
775
776 MachineCycle *MCycle = CI->getCycle(MBB);
777
778 // If the instruction is not inside a cycle, it is not profitable to sink MI to
779 // a post dominate block SuccToSinkTo.
780 if (!MCycle)
781 return false;
782
783 auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
784 unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
785 const int *PS = TRI->getRegClassPressureSets(RC);
786 // Get register pressure for block SuccToSinkTo.
787 std::vector<unsigned> BBRegisterPressure =
788 getBBRegisterPressure(*SuccToSinkTo);
789 for (; *PS != -1; PS++)
790 // check if any register pressure set exceeds limit in block SuccToSinkTo
791 // after sinking.
792 if (Weight + BBRegisterPressure[*PS] >=
793 TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
794 return true;
795 return false;
796 };
797
798 // If this instruction is inside a Cycle and sinking this instruction can make
799 // more registers live range shorten, it is still prifitable.
800 for (const MachineOperand &MO : MI.operands()) {
801 // Ignore non-register operands.
802 if (!MO.isReg())
803 continue;
804 Register Reg = MO.getReg();
805 if (Reg == 0)
806 continue;
807
808 if (Reg.isPhysical()) {
809 if (MO.isUse() &&
810 (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
811 continue;
812
813 // Don't handle non-constant and non-ignorable physical register.
814 return false;
815 }
816
817 // Users for the defs are all dominated by SuccToSinkTo.
818 if (MO.isDef()) {
819 // This def register's live range is shortened after sinking.
820 bool LocalUse = false;
821 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
822 LocalUse))
823 return false;
824 } else {
825 MachineInstr *DefMI = MRI->getVRegDef(Reg);
826 if (!DefMI)
827 continue;
829 // DefMI is defined outside of cycle. There should be no live range
830 // impact for this operand. Defination outside of cycle means:
831 // 1: defination is outside of cycle.
832 // 2: defination is in this cycle, but it is a PHI in the cycle header.
833 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
834 Cycle->getHeader() == DefMI->getParent()))
835 continue;
836 // The DefMI is defined inside the cycle.
837 // If sinking this operand makes some register pressure set exceed limit,
838 // it is not profitable.
839 if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
840 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
841 return false;
842 }
843 }
844 }
845
846 // If MI is in cycle and all its operands are alive across the whole cycle or
847 // if no operand sinking make register pressure set exceed limit, it is
848 // profitable to sink MI.
849 return true;
850}
851
852/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
853/// computing it if it was not already cached.
855MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
856 AllSuccsCache &AllSuccessors) const {
857 // Do we have the sorted successors in cache ?
858 auto Succs = AllSuccessors.find(MBB);
859 if (Succs != AllSuccessors.end())
860 return Succs->second;
861
863
864 // Handle cases where sinking can happen but where the sink point isn't a
865 // successor. For example:
866 //
867 // x = computation
868 // if () {} else {}
869 // use x
870 //
871 for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
872 // DomTree children of MBB that have MBB as immediate dominator are added.
873 if (DTChild->getIDom()->getBlock() == MI.getParent() &&
874 // Skip MBBs already added to the AllSuccs vector above.
875 !MBB->isSuccessor(DTChild->getBlock()))
876 AllSuccs.push_back(DTChild->getBlock());
877 }
878
879 // Sort Successors according to their cycle depth or block frequency info.
881 AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
882 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
883 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
884 bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
885 return HasBlockFreq ? LHSFreq < RHSFreq
886 : CI->getCycleDepth(L) < CI->getCycleDepth(R);
887 });
888
889 auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
890
891 return it.first->second;
892}
893
894/// FindSuccToSinkTo - Find a successor to sink this instruction to.
896MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
897 bool &BreakPHIEdge,
898 AllSuccsCache &AllSuccessors) {
899 assert (MBB && "Invalid MachineBasicBlock!");
900
901 // loop over all the operands of the specified instruction. If there is
902 // anything we can't handle, bail out.
903
904 // SuccToSinkTo - This is the successor to sink this instruction to, once we
905 // decide.
906 MachineBasicBlock *SuccToSinkTo = nullptr;
907 for (const MachineOperand &MO : MI.operands()) {
908 if (!MO.isReg()) continue; // Ignore non-register operands.
909
910 Register Reg = MO.getReg();
911 if (Reg == 0) continue;
912
913 if (Reg.isPhysical()) {
914 if (MO.isUse()) {
915 // If the physreg has no defs anywhere, it's just an ambient register
916 // and we can freely move its uses. Alternatively, if it's allocatable,
917 // it could get allocated to something with a def during allocation.
918 if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
919 return nullptr;
920 } else if (!MO.isDead()) {
921 // A def that isn't dead. We can't move it.
922 return nullptr;
923 }
924 } else {
925 // Virtual register uses are always safe to sink.
926 if (MO.isUse()) continue;
927
928 // If it's not safe to move defs of the register class, then abort.
929 if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
930 return nullptr;
931
932 // Virtual register defs can only be sunk if all their uses are in blocks
933 // dominated by one of the successors.
934 if (SuccToSinkTo) {
935 // If a previous operand picked a block to sink to, then this operand
936 // must be sinkable to the same block.
937 bool LocalUse = false;
938 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
939 BreakPHIEdge, LocalUse))
940 return nullptr;
941
942 continue;
943 }
944
945 // Otherwise, we should look at all the successors and decide which one
946 // we should sink to. If we have reliable block frequency information
947 // (frequency != 0) available, give successors with smaller frequencies
948 // higher priority, otherwise prioritize smaller cycle depths.
949 for (MachineBasicBlock *SuccBlock :
950 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
951 bool LocalUse = false;
952 if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
953 BreakPHIEdge, LocalUse)) {
954 SuccToSinkTo = SuccBlock;
955 break;
956 }
957 if (LocalUse)
958 // Def is used locally, it's never safe to move this def.
959 return nullptr;
960 }
961
962 // If we couldn't find a block to sink to, ignore this instruction.
963 if (!SuccToSinkTo)
964 return nullptr;
965 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
966 return nullptr;
967 }
968 }
969
970 // It is not possible to sink an instruction into its own block. This can
971 // happen with cycles.
972 if (MBB == SuccToSinkTo)
973 return nullptr;
974
975 // It's not safe to sink instructions to EH landing pad. Control flow into
976 // landing pad is implicitly defined.
977 if (SuccToSinkTo && SuccToSinkTo->isEHPad())
978 return nullptr;
979
980 // It ought to be okay to sink instructions into an INLINEASM_BR target, but
981 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
982 // the source block (which this code does not yet do). So for now, forbid
983 // doing so.
984 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
985 return nullptr;
986
987 return SuccToSinkTo;
988}
989
990/// Return true if MI is likely to be usable as a memory operation by the
991/// implicit null check optimization.
992///
993/// This is a "best effort" heuristic, and should not be relied upon for
994/// correctness. This returning true does not guarantee that the implicit null
995/// check optimization is legal over MI, and this returning false does not
996/// guarantee MI cannot possibly be used to do a null check.
998 const TargetInstrInfo *TII,
999 const TargetRegisterInfo *TRI) {
1000 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1001
1002 auto *MBB = MI.getParent();
1003 if (MBB->pred_size() != 1)
1004 return false;
1005
1006 auto *PredMBB = *MBB->pred_begin();
1007 auto *PredBB = PredMBB->getBasicBlock();
1008
1009 // Frontends that don't use implicit null checks have no reason to emit
1010 // branches with make.implicit metadata, and this function should always
1011 // return false for them.
1012 if (!PredBB ||
1013 !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
1014 return false;
1015
1016 const MachineOperand *BaseOp;
1017 int64_t Offset;
1018 bool OffsetIsScalable;
1019 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1020 return false;
1021
1022 if (!BaseOp->isReg())
1023 return false;
1024
1025 if (!(MI.mayLoad() && !MI.isPredicable()))
1026 return false;
1027
1028 MachineBranchPredicate MBP;
1029 if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
1030 return false;
1031
1032 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1033 (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1034 MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1035 MBP.LHS.getReg() == BaseOp->getReg();
1036}
1037
1038/// If the sunk instruction is a copy, try to forward the copy instead of
1039/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1040/// there's any subregister weirdness involved. Returns true if copy
1041/// propagation occurred.
1042static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1043 Register Reg) {
1044 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1045 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1046
1047 // Copy DBG_VALUE operand and set the original to undef. We then check to
1048 // see whether this is something that can be copy-forwarded. If it isn't,
1049 // continue around the loop.
1050
1051 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1052 auto CopyOperands = TII.isCopyInstr(SinkInst);
1053 if (!CopyOperands)
1054 return false;
1055 SrcMO = CopyOperands->Source;
1056 DstMO = CopyOperands->Destination;
1057
1058 // Check validity of forwarding this copy.
1059 bool PostRA = MRI.getNumVirtRegs() == 0;
1060
1061 // Trying to forward between physical and virtual registers is too hard.
1062 if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1063 return false;
1064
1065 // Only try virtual register copy-forwarding before regalloc, and physical
1066 // register copy-forwarding after regalloc.
1067 bool arePhysRegs = !Reg.isVirtual();
1068 if (arePhysRegs != PostRA)
1069 return false;
1070
1071 // Pre-regalloc, only forward if all subregisters agree (or there are no
1072 // subregs at all). More analysis might recover some forwardable copies.
1073 if (!PostRA)
1074 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1075 if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1076 DbgMO.getSubReg() != DstMO->getSubReg())
1077 return false;
1078
1079 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1080 // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1081 // matches the copy destination.
1082 if (PostRA && Reg != DstMO->getReg())
1083 return false;
1084
1085 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1086 DbgMO.setReg(SrcMO->getReg());
1087 DbgMO.setSubReg(SrcMO->getSubReg());
1088 }
1089 return true;
1090}
1091
1092using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
1093/// Sink an instruction and its associated debug instructions.
1094static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1096 ArrayRef<MIRegs> DbgValuesToSink) {
1097 // If we cannot find a location to use (merge with), then we erase the debug
1098 // location to prevent debug-info driven tools from potentially reporting
1099 // wrong location information.
1100 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1101 MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
1102 InsertPos->getDebugLoc()));
1103 else
1104 MI.setDebugLoc(DebugLoc());
1105
1106 // Move the instruction.
1107 MachineBasicBlock *ParentBlock = MI.getParent();
1108 SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
1110
1111 // Sink a copy of debug users to the insert position. Mark the original
1112 // DBG_VALUE location as 'undef', indicating that any earlier variable
1113 // location should be terminated as we've optimised away the value at this
1114 // point.
1115 for (const auto &DbgValueToSink : DbgValuesToSink) {
1116 MachineInstr *DbgMI = DbgValueToSink.first;
1117 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
1118 SuccToSinkTo.insert(InsertPos, NewDbgMI);
1119
1120 bool PropagatedAllSunkOps = true;
1121 for (unsigned Reg : DbgValueToSink.second) {
1122 if (DbgMI->hasDebugOperandForReg(Reg)) {
1123 if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
1124 PropagatedAllSunkOps = false;
1125 break;
1126 }
1127 }
1128 }
1129 if (!PropagatedAllSunkOps)
1130 DbgMI->setDebugValueUndef();
1131 }
1132}
1133
1134/// hasStoreBetween - check if there is store betweeen straight line blocks From
1135/// and To.
1136bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1138 // Make sure From and To are in straight line which means From dominates To
1139 // and To post dominates From.
1140 if (!DT->dominates(From, To) || !PDT->dominates(To, From))
1141 return true;
1142
1143 auto BlockPair = std::make_pair(From, To);
1144
1145 // Does these two blocks pair be queried before and have a definite cached
1146 // result?
1147 if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
1148 return HasStoreCache[BlockPair];
1149
1150 if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
1151 return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
1152 return I->mayAlias(AA, MI, false);
1153 });
1154
1155 bool SawStore = false;
1156 bool HasAliasedStore = false;
1157 DenseSet<MachineBasicBlock *> HandledBlocks;
1158 DenseSet<MachineBasicBlock *> HandledDomBlocks;
1159 // Go through all reachable blocks from From.
1160 for (MachineBasicBlock *BB : depth_first(From)) {
1161 // We insert the instruction at the start of block To, so no need to worry
1162 // about stores inside To.
1163 // Store in block From should be already considered when just enter function
1164 // SinkInstruction.
1165 if (BB == To || BB == From)
1166 continue;
1167
1168 // We already handle this BB in previous iteration.
1169 if (HandledBlocks.count(BB))
1170 continue;
1171
1172 HandledBlocks.insert(BB);
1173 // To post dominates BB, it must be a path from block From.
1174 if (PDT->dominates(To, BB)) {
1175 if (!HandledDomBlocks.count(BB))
1176 HandledDomBlocks.insert(BB);
1177
1178 // If this BB is too big or the block number in straight line between From
1179 // and To is too big, stop searching to save compiling time.
1180 if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
1181 HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1182 for (auto *DomBB : HandledDomBlocks) {
1183 if (DomBB != BB && DT->dominates(DomBB, BB))
1184 HasStoreCache[std::make_pair(DomBB, To)] = true;
1185 else if(DomBB != BB && DT->dominates(BB, DomBB))
1186 HasStoreCache[std::make_pair(From, DomBB)] = true;
1187 }
1188 HasStoreCache[BlockPair] = true;
1189 return true;
1190 }
1191
1192 for (MachineInstr &I : *BB) {
1193 // Treat as alias conservatively for a call or an ordered memory
1194 // operation.
1195 if (I.isCall() || I.hasOrderedMemoryRef()) {
1196 for (auto *DomBB : HandledDomBlocks) {
1197 if (DomBB != BB && DT->dominates(DomBB, BB))
1198 HasStoreCache[std::make_pair(DomBB, To)] = true;
1199 else if(DomBB != BB && DT->dominates(BB, DomBB))
1200 HasStoreCache[std::make_pair(From, DomBB)] = true;
1201 }
1202 HasStoreCache[BlockPair] = true;
1203 return true;
1204 }
1205
1206 if (I.mayStore()) {
1207 SawStore = true;
1208 // We still have chance to sink MI if all stores between are not
1209 // aliased to MI.
1210 // Cache all store instructions, so that we don't need to go through
1211 // all From reachable blocks for next load instruction.
1212 if (I.mayAlias(AA, MI, false))
1213 HasAliasedStore = true;
1214 StoreInstrCache[BlockPair].push_back(&I);
1215 }
1216 }
1217 }
1218 }
1219 // If there is no store at all, cache the result.
1220 if (!SawStore)
1221 HasStoreCache[BlockPair] = false;
1222 return HasAliasedStore;
1223}
1224
1225/// Sink instructions into cycles if profitable. This especially tries to
1226/// prevent register spills caused by register pressure if there is little to no
1227/// overhead moving instructions into cycles.
1228bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1229 LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1231 assert(Preheader && "Cycle sink needs a preheader block");
1232 MachineBasicBlock *SinkBlock = nullptr;
1233 bool CanSink = true;
1234 const MachineOperand &MO = I.getOperand(0);
1235
1236 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1237 LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
1238 if (!Cycle->contains(MI.getParent())) {
1239 LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
1240 CanSink = false;
1241 break;
1242 }
1243
1244 // FIXME: Come up with a proper cost model that estimates whether sinking
1245 // the instruction (and thus possibly executing it on every cycle
1246 // iteration) is more expensive than a register.
1247 // For now assumes that copies are cheap and thus almost always worth it.
1248 if (!MI.isCopy()) {
1249 LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
1250 CanSink = false;
1251 break;
1252 }
1253 if (!SinkBlock) {
1254 SinkBlock = MI.getParent();
1255 LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
1256 << printMBBReference(*SinkBlock) << "\n");
1257 continue;
1258 }
1259 SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1260 if (!SinkBlock) {
1261 LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
1262 CanSink = false;
1263 break;
1264 }
1265 LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
1266 printMBBReference(*SinkBlock) << "\n");
1267 }
1268
1269 if (!CanSink) {
1270 LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1271 return false;
1272 }
1273 if (!SinkBlock) {
1274 LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1275 return false;
1276 }
1277 if (SinkBlock == Preheader) {
1278 LLVM_DEBUG(
1279 dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1280 return false;
1281 }
1283 LLVM_DEBUG(
1284 dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1285 return false;
1286 }
1287
1288 LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1289 SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
1290 I);
1291
1292 // Conservatively clear any kill flags on uses of sunk instruction
1293 for (MachineOperand &MO : I.operands()) {
1294 if (MO.isReg() && MO.readsReg())
1295 RegsToClearKillFlags.insert(MO.getReg());
1296 }
1297
1298 // The instruction is moved from its basic block, so do not retain the
1299 // debug information.
1300 assert(!I.isDebugInstr() && "Should not sink debug inst");
1301 I.setDebugLoc(DebugLoc());
1302 return true;
1303}
1304
1305/// Return true if a target defined block prologue instruction interferes
1306/// with a sink candidate.
1310 const TargetRegisterInfo *TRI,
1311 const TargetInstrInfo *TII,
1312 const MachineRegisterInfo *MRI) {
1313 if (BB->begin() == End)
1314 return false; // no prologue
1315 for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
1316 // Only check target defined prologue instructions
1317 if (!TII->isBasicBlockPrologue(*PI))
1318 continue;
1319 for (auto &MO : MI.operands()) {
1320 if (!MO.isReg())
1321 continue;
1322 Register Reg = MO.getReg();
1323 if (!Reg)
1324 continue;
1325 if (MO.isUse()) {
1326 if (Reg.isPhysical() &&
1327 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
1328 continue;
1329 if (PI->modifiesRegister(Reg, TRI))
1330 return true;
1331 } else {
1332 if (PI->readsRegister(Reg, TRI))
1333 return true;
1334 // Check for interference with non-dead defs
1335 auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
1336 if (DefOp && !DefOp->isDead())
1337 return true;
1338 }
1339 }
1340 }
1341 return false;
1342}
1343
1344/// SinkInstruction - Determine whether it is safe to sink the specified machine
1345/// instruction out of its current block into a successor.
1346bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1347 AllSuccsCache &AllSuccessors) {
1348 // Don't sink instructions that the target prefers not to sink.
1349 if (!TII->shouldSink(MI))
1350 return false;
1351
1352 // Check if it's safe to move the instruction.
1353 if (!MI.isSafeToMove(AA, SawStore))
1354 return false;
1355
1356 // Convergent operations may not be made control-dependent on additional
1357 // values.
1358 if (MI.isConvergent())
1359 return false;
1360
1361 // Don't break implicit null checks. This is a performance heuristic, and not
1362 // required for correctness.
1364 return false;
1365
1366 // FIXME: This should include support for sinking instructions within the
1367 // block they are currently in to shorten the live ranges. We often get
1368 // instructions sunk into the top of a large block, but it would be better to
1369 // also sink them down before their first use in the block. This xform has to
1370 // be careful not to *increase* register pressure though, e.g. sinking
1371 // "x = y + z" down if it kills y and z would increase the live ranges of y
1372 // and z and only shrink the live range of x.
1373
1374 bool BreakPHIEdge = false;
1375 MachineBasicBlock *ParentBlock = MI.getParent();
1376 MachineBasicBlock *SuccToSinkTo =
1377 FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
1378
1379 // If there are no outputs, it must have side-effects.
1380 if (!SuccToSinkTo)
1381 return false;
1382
1383 // If the instruction to move defines a dead physical register which is live
1384 // when leaving the basic block, don't move it because it could turn into a
1385 // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
1386 for (const MachineOperand &MO : MI.operands()) {
1387 if (!MO.isReg() || MO.isUse())
1388 continue;
1389 Register Reg = MO.getReg();
1390 if (Reg == 0 || !Reg.isPhysical())
1391 continue;
1392 if (SuccToSinkTo->isLiveIn(Reg))
1393 return false;
1394 }
1395
1396 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1397
1398 // If the block has multiple predecessors, this is a critical edge.
1399 // Decide if we can sink along it or need to break the edge.
1400 if (SuccToSinkTo->pred_size() > 1) {
1401 // We cannot sink a load across a critical edge - there may be stores in
1402 // other code paths.
1403 bool TryBreak = false;
1404 bool Store =
1405 MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
1406 if (!MI.isSafeToMove(AA, Store)) {
1407 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1408 TryBreak = true;
1409 }
1410
1411 // We don't want to sink across a critical edge if we don't dominate the
1412 // successor. We could be introducing calculations to new code paths.
1413 if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
1414 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1415 TryBreak = true;
1416 }
1417
1418 // Don't sink instructions into a cycle.
1419 if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
1420 (!CI->getCycle(SuccToSinkTo)->isReducible() ||
1421 CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1422 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1423 TryBreak = true;
1424 }
1425
1426 // Otherwise we are OK with sinking along a critical edge.
1427 if (!TryBreak)
1428 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1429 else {
1430 // Mark this edge as to be split.
1431 // If the edge can actually be split, the next iteration of the main loop
1432 // will sink MI in the newly created block.
1433 bool Status =
1434 PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
1435 if (!Status)
1436 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1437 "break critical edge\n");
1438 // The instruction will not be sunk this time.
1439 return false;
1440 }
1441 }
1442
1443 if (BreakPHIEdge) {
1444 // BreakPHIEdge is true if all the uses are in the successor MBB being
1445 // sunken into and they are all PHI nodes. In this case, machine-sink must
1446 // break the critical edge first.
1447 bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
1448 SuccToSinkTo, BreakPHIEdge);
1449 if (!Status)
1450 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1451 "break critical edge\n");
1452 // The instruction will not be sunk this time.
1453 return false;
1454 }
1455
1456 // Determine where to insert into. Skip phi nodes.
1457 MachineBasicBlock::iterator InsertPos =
1458 SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
1459 if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
1460 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1461 return false;
1462 }
1463
1464 // Collect debug users of any vreg that this inst defines.
1465 SmallVector<MIRegs, 4> DbgUsersToSink;
1466 for (auto &MO : MI.operands()) {
1467 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
1468 continue;
1469 if (!SeenDbgUsers.count(MO.getReg()))
1470 continue;
1471
1472 // Sink any users that don't pass any other DBG_VALUEs for this variable.
1473 auto &Users = SeenDbgUsers[MO.getReg()];
1474 for (auto &User : Users) {
1475 MachineInstr *DbgMI = User.getPointer();
1476 if (User.getInt()) {
1477 // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1478 // it, it can't be recovered. Set it undef.
1479 if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
1480 DbgMI->setDebugValueUndef();
1481 } else {
1482 DbgUsersToSink.push_back(
1483 {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
1484 }
1485 }
1486 }
1487
1488 // After sinking, some debug users may not be dominated any more. If possible,
1489 // copy-propagate their operands. As it's expensive, don't do this if there's
1490 // no debuginfo in the program.
1491 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1492 SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
1493
1494 performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
1495
1496 // Conservatively, clear any kill flags, since it's possible that they are no
1497 // longer correct.
1498 // Note that we have to clear the kill flags for any register this instruction
1499 // uses as we may sink over another instruction which currently kills the
1500 // used registers.
1501 for (MachineOperand &MO : MI.operands()) {
1502 if (MO.isReg() && MO.isUse())
1503 RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
1504 }
1505
1506 return true;
1507}
1508
1509void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1510 MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1511 assert(MI.isCopy());
1512 assert(MI.getOperand(1).isReg());
1513
1514 // Enumerate all users of vreg operands that are def'd. Skip those that will
1515 // be sunk. For the rest, if they are not dominated by the block we will sink
1516 // MI into, propagate the copy source to them.
1518 SmallVector<Register, 4> DbgUseRegs;
1519 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1520 for (auto &MO : MI.operands()) {
1521 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
1522 continue;
1523 DbgUseRegs.push_back(MO.getReg());
1524 for (auto &User : MRI.use_instructions(MO.getReg())) {
1525 if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
1526 continue;
1527
1528 // If is in same block, will either sink or be use-before-def.
1529 if (User.getParent() == MI.getParent())
1530 continue;
1531
1532 assert(User.hasDebugOperandForReg(MO.getReg()) &&
1533 "DBG_VALUE user of vreg, but has no operand for it?");
1534 DbgDefUsers.push_back(&User);
1535 }
1536 }
1537
1538 // Point the users of this copy that are no longer dominated, at the source
1539 // of the copy.
1540 for (auto *User : DbgDefUsers) {
1541 for (auto &Reg : DbgUseRegs) {
1542 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
1543 DbgOp.setReg(MI.getOperand(1).getReg());
1544 DbgOp.setSubReg(MI.getOperand(1).getSubReg());
1545 }
1546 }
1547 }
1548}
1549
1550//===----------------------------------------------------------------------===//
1551// This pass is not intended to be a replacement or a complete alternative
1552// for the pre-ra machine sink pass. It is only designed to sink COPY
1553// instructions which should be handled after RA.
1554//
1555// This pass sinks COPY instructions into a successor block, if the COPY is not
1556// used in the current block and the COPY is live-in to a single successor
1557// (i.e., doesn't require the COPY to be duplicated). This avoids executing the
1558// copy on paths where their results aren't needed. This also exposes
1559// additional opportunites for dead copy elimination and shrink wrapping.
1560//
1561// These copies were either not handled by or are inserted after the MachineSink
1562// pass. As an example of the former case, the MachineSink pass cannot sink
1563// COPY instructions with allocatable source registers; for AArch64 these type
1564// of copy instructions are frequently used to move function parameters (PhyReg)
1565// into virtual registers in the entry block.
1566//
1567// For the machine IR below, this pass will sink %w19 in the entry into its
1568// successor (%bb.1) because %w19 is only live-in in %bb.1.
1569// %bb.0:
1570// %wzr = SUBSWri %w1, 1
1571// %w19 = COPY %w0
1572// Bcc 11, %bb.2
1573// %bb.1:
1574// Live Ins: %w19
1575// BL @fun
1576// %w0 = ADDWrr %w0, %w19
1577// RET %w0
1578// %bb.2:
1579// %w0 = COPY %wzr
1580// RET %w0
1581// As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
1582// able to see %bb.0 as a candidate.
1583//===----------------------------------------------------------------------===//
1584namespace {
1585
1586class PostRAMachineSinking : public MachineFunctionPass {
1587public:
1588 bool runOnMachineFunction(MachineFunction &MF) override;
1589
1590 static char ID;
1591 PostRAMachineSinking() : MachineFunctionPass(ID) {}
1592 StringRef getPassName() const override { return "PostRA Machine Sink"; }
1593
1594 void getAnalysisUsage(AnalysisUsage &AU) const override {
1595 AU.setPreservesCFG();
1597 }
1598
1601 MachineFunctionProperties::Property::NoVRegs);
1602 }
1603
1604private:
1605 /// Track which register units have been modified and used.
1606 LiveRegUnits ModifiedRegUnits, UsedRegUnits;
1607
1608 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
1609 /// entry in this map for each unit it touches. The DBG_VALUE's entry
1610 /// consists of a pointer to the instruction itself, and a vector of registers
1611 /// referred to by the instruction that overlap the key register unit.
1613
1614 /// Sink Copy instructions unused in the same block close to their uses in
1615 /// successors.
1616 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
1617 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
1618};
1619} // namespace
1620
1621char PostRAMachineSinking::ID = 0;
1622char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
1623
1624INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
1625 "PostRA Machine Sink", false, false)
1626
1627static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
1629 LiveRegUnits LiveInRegUnits(*TRI);
1630 LiveInRegUnits.addLiveIns(MBB);
1631 return !LiveInRegUnits.available(Reg);
1632}
1633
1634static MachineBasicBlock *
1636 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1637 unsigned Reg, const TargetRegisterInfo *TRI) {
1638 // Try to find a single sinkable successor in which Reg is live-in.
1639 MachineBasicBlock *BB = nullptr;
1640 for (auto *SI : SinkableBBs) {
1641 if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
1642 // If BB is set here, Reg is live-in to at least two sinkable successors,
1643 // so quit.
1644 if (BB)
1645 return nullptr;
1646 BB = SI;
1647 }
1648 }
1649 // Reg is not live-in to any sinkable successors.
1650 if (!BB)
1651 return nullptr;
1652
1653 // Check if any register aliased with Reg is live-in in other successors.
1654 for (auto *SI : CurBB.successors()) {
1655 if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
1656 return nullptr;
1657 }
1658 return BB;
1659}
1660
1661static MachineBasicBlock *
1663 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1664 ArrayRef<unsigned> DefedRegsInCopy,
1665 const TargetRegisterInfo *TRI) {
1666 MachineBasicBlock *SingleBB = nullptr;
1667 for (auto DefReg : DefedRegsInCopy) {
1668 MachineBasicBlock *BB =
1669 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
1670 if (!BB || (SingleBB && SingleBB != BB))
1671 return nullptr;
1672 SingleBB = BB;
1673 }
1674 return SingleBB;
1675}
1676
1678 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1679 LiveRegUnits &UsedRegUnits,
1680 const TargetRegisterInfo *TRI) {
1681 for (auto U : UsedOpsInCopy) {
1682 MachineOperand &MO = MI->getOperand(U);
1683 Register SrcReg = MO.getReg();
1684 if (!UsedRegUnits.available(SrcReg)) {
1685 MachineBasicBlock::iterator NI = std::next(MI->getIterator());
1686 for (MachineInstr &UI : make_range(NI, CurBB.end())) {
1687 if (UI.killsRegister(SrcReg, TRI)) {
1688 UI.clearRegisterKills(SrcReg, TRI);
1689 MO.setIsKill(true);
1690 break;
1691 }
1692 }
1693 }
1694 }
1695}
1696
1698 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1699 SmallVectorImpl<unsigned> &DefedRegsInCopy) {
1700 MachineFunction &MF = *SuccBB->getParent();
1702 for (unsigned DefReg : DefedRegsInCopy)
1703 for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
1704 SuccBB->removeLiveIn(*S);
1705 for (auto U : UsedOpsInCopy) {
1706 Register SrcReg = MI->getOperand(U).getReg();
1707 LaneBitmask Mask;
1708 for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
1709 Mask |= (*S).second;
1710 }
1711 SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
1712 }
1713 SuccBB->sortUniqueLiveIns();
1714}
1715
1717 SmallVectorImpl<unsigned> &UsedOpsInCopy,
1718 SmallVectorImpl<unsigned> &DefedRegsInCopy,
1719 LiveRegUnits &ModifiedRegUnits,
1720 LiveRegUnits &UsedRegUnits) {
1721 bool HasRegDependency = false;
1722 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1723 MachineOperand &MO = MI->getOperand(i);
1724 if (!MO.isReg())
1725 continue;
1726 Register Reg = MO.getReg();
1727 if (!Reg)
1728 continue;
1729 if (MO.isDef()) {
1730 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
1731 HasRegDependency = true;
1732 break;
1733 }
1734 DefedRegsInCopy.push_back(Reg);
1735
1736 // FIXME: instead of isUse(), readsReg() would be a better fix here,
1737 // For example, we can ignore modifications in reg with undef. However,
1738 // it's not perfectly clear if skipping the internal read is safe in all
1739 // other targets.
1740 } else if (MO.isUse()) {
1741 if (!ModifiedRegUnits.available(Reg)) {
1742 HasRegDependency = true;
1743 break;
1744 }
1745 UsedOpsInCopy.push_back(i);
1746 }
1747 }
1748 return HasRegDependency;
1749}
1750
1751bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
1752 MachineFunction &MF,
1753 const TargetRegisterInfo *TRI,
1754 const TargetInstrInfo *TII) {
1756 // FIXME: For now, we sink only to a successor which has a single predecessor
1757 // so that we can directly sink COPY instructions to the successor without
1758 // adding any new block or branch instruction.
1759 for (MachineBasicBlock *SI : CurBB.successors())
1760 if (!SI->livein_empty() && SI->pred_size() == 1)
1761 SinkableBBs.insert(SI);
1762
1763 if (SinkableBBs.empty())
1764 return false;
1765
1766 bool Changed = false;
1767
1768 // Track which registers have been modified and used between the end of the
1769 // block and the current instruction.
1770 ModifiedRegUnits.clear();
1771 UsedRegUnits.clear();
1772 SeenDbgInstrs.clear();
1773
1775 // Track the operand index for use in Copy.
1776 SmallVector<unsigned, 2> UsedOpsInCopy;
1777 // Track the register number defed in Copy.
1778 SmallVector<unsigned, 2> DefedRegsInCopy;
1779
1780 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
1781 // for DBG_VALUEs later, record them when they're encountered.
1782 if (MI.isDebugValue() && !MI.isDebugRef()) {
1784 bool IsValid = true;
1785 for (MachineOperand &MO : MI.debug_operands()) {
1786 if (MO.isReg() && MO.getReg().isPhysical()) {
1787 // Bail if we can already tell the sink would be rejected, rather
1788 // than needlessly accumulating lots of DBG_VALUEs.
1789 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1790 ModifiedRegUnits, UsedRegUnits)) {
1791 IsValid = false;
1792 break;
1793 }
1794
1795 // Record debug use of each reg unit.
1796 for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
1797 ++RI)
1798 MIUnits[*RI].push_back(MO.getReg());
1799 }
1800 }
1801 if (IsValid) {
1802 for (auto &RegOps : MIUnits)
1803 SeenDbgInstrs[RegOps.first].emplace_back(&MI,
1804 std::move(RegOps.second));
1805 }
1806 continue;
1807 }
1808
1809 if (MI.isDebugOrPseudoInstr())
1810 continue;
1811
1812 // Do not move any instruction across function call.
1813 if (MI.isCall())
1814 return false;
1815
1816 if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
1817 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1818 TRI);
1819 continue;
1820 }
1821
1822 // Don't sink the COPY if it would violate a register dependency.
1823 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1824 ModifiedRegUnits, UsedRegUnits)) {
1825 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1826 TRI);
1827 continue;
1828 }
1829 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
1830 "Unexpect SrcReg or DefReg");
1831 MachineBasicBlock *SuccBB =
1832 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
1833 // Don't sink if we cannot find a single sinkable successor in which Reg
1834 // is live-in.
1835 if (!SuccBB) {
1836 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1837 TRI);
1838 continue;
1839 }
1840 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
1841 "Unexpected predecessor");
1842
1843 // Collect DBG_VALUEs that must sink with this copy. We've previously
1844 // recorded which reg units that DBG_VALUEs read, if this instruction
1845 // writes any of those units then the corresponding DBG_VALUEs must sink.
1847 for (auto &MO : MI.operands()) {
1848 if (!MO.isReg() || !MO.isDef())
1849 continue;
1850
1851 for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
1852 for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
1853 auto &Regs = DbgValsToSinkMap[MIRegs.first];
1854 for (unsigned Reg : MIRegs.second)
1855 Regs.push_back(Reg);
1856 }
1857 }
1858 }
1859 auto DbgValsToSink = DbgValsToSinkMap.takeVector();
1860
1861 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
1862
1863 MachineBasicBlock::iterator InsertPos =
1864 SuccBB->SkipPHIsAndLabels(SuccBB->begin());
1865 if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
1866 LLVM_DEBUG(
1867 dbgs() << " *** Not sinking: prologue interference\n");
1868 continue;
1869 }
1870
1871 // Clear the kill flag if SrcReg is killed between MI and the end of the
1872 // block.
1873 clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
1874 performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
1875 updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
1876
1877 Changed = true;
1878 ++NumPostRACopySink;
1879 }
1880 return Changed;
1881}
1882
1883bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
1884 if (skipFunction(MF.getFunction()))
1885 return false;
1886
1887 bool Changed = false;
1890
1891 ModifiedRegUnits.init(*TRI);
1892 UsedRegUnits.init(*TRI);
1893 for (auto &BB : MF)
1894 Changed |= tryToSinkCopy(BB, MF, TRI, TII);
1895
1896 return Changed;
1897}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
basic Basic Alias true
BlockVerifier::State From
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define I(x, y, z)
Definition: MD5.cpp:58
static cl::opt< unsigned > SinkLoadInstsPerBlockThreshold("machine-sink-load-instrs-threshold", cl::desc("Do not try to find alias store for a load if there is a in-path " "block whose instruction number is higher than this threshold."), cl::init(2000), cl::Hidden)
static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, SmallVectorImpl< unsigned > &UsedOpsInCopy, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
unsigned const TargetRegisterInfo * TRI
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, ArrayRef< MIRegs > DbgValuesToSink)
Sink an instruction and its associated debug instructions.
static cl::opt< bool > SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden)
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Return true if MI is likely to be usable as a memory operation by the implicit null check optimizatio...
static cl::opt< bool > SinkInstsIntoCycle("sink-insts-to-avoid-spills", cl::desc("Sink instructions into cycles to avoid " "register spills"), cl::init(false), cl::Hidden)
static cl::opt< unsigned > SinkLoadBlocksThreshold("machine-sink-load-blocks-threshold", cl::desc("Do not try to find alias store for a load if the block number in " "the straight line is higher than this threshold."), cl::init(20), cl::Hidden)
unsigned Reg
Machine code sinking
static bool hasRegisterDependency(MachineInstr *MI, SmallVectorImpl< unsigned > &UsedOpsInCopy, SmallVectorImpl< unsigned > &DefedRegsInCopy, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits)
static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, SmallVectorImpl< unsigned > &UsedOpsInCopy, SmallVectorImpl< unsigned > &DefedRegsInCopy)
static bool blockPrologueInterferes(MachineBasicBlock *BB, MachineBasicBlock::iterator End, MachineInstr &MI, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, const MachineRegisterInfo *MRI)
Return true if a target defined block prologue instruction interferes with a sink candidate.
static cl::opt< unsigned > SplitEdgeProbabilityThreshold("machine-sink-split-probability-threshold", cl::desc("Percentage threshold for splitting single-instruction critical edge. " "If the branch threshold is higher than this threshold, we allow " "speculative execution of up to 1 instruction to avoid branching to " "splitted critical edge"), cl::init(40), cl::Hidden)
static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, Register Reg)
If the sunk instruction is a copy, try to forward the copy instead of leaving an 'undef' DBG_VALUE in...
static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl< MachineBasicBlock * > &SinkableBBs, unsigned Reg, const TargetRegisterInfo *TRI)
static cl::opt< unsigned > SinkIntoCycleLimit("machine-sink-cycle-limit", cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden)
#define DEBUG_TYPE
Definition: MachineSink.cpp:65
static cl::opt< bool > UseBlockFreqInfo("machine-sink-bfi", cl::desc("Use block frequency info to find successors to sink"), cl::init(true), cl::Hidden)
std::pair< MachineInstr *, SmallVector< unsigned, 2 > > MIRegs
This file implements a map that provides insertion order iteration.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the PointerIntPair class.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
Definition: Sink.cpp:175
static bool SinkInstruction(Instruction *Inst, SmallPtrSetImpl< Instruction * > &Stores, DominatorTree &DT, LoopInfo &LI, AAResults &AA)
SinkInstruction - Determine whether it is safe to sink the specified machine instruction out of its c...
Definition: Sink.cpp:103
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
static const DILocation * getMergedLocation(const DILocation *LocA, const DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
A debug info location.
Definition: DebugLoc.h:33
Identifies a unique instance of a variable.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Base class for the actual dominator tree node.
iterator_range< iterator > children()
const_toplevel_iterator toplevel_end() const
unsigned getCycleDepth(const BlockT *Block) const
get the depth for the cycle which containing a given block.
const_toplevel_iterator toplevel_begin() const
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
bool isReducible() const
Whether the cycle is a natural loop.
BlockT * getCyclePreheader() const
Return the preheader block for this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool shouldSink(const MachineInstr &MI) const override
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
Definition: LiveRegUnits.h:47
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
MCSubRegIterator enumerates all sub-registers of Reg.
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
unsigned succ_size() const
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Remove the specified register from the live in set.
bool sizeWithoutDebugLargerThan(unsigned Limit) const
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
void onEdgeSplit(const MachineBasicBlock &NewPredecessor, const MachineBasicBlock &NewSuccessor, const MachineBranchProbabilityInfo &MBPI)
incrementally calculate block frequencies when we split edges, to avoid full CFG traversal.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Legacy analysis pass which computes a MachineCycleInfo.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineBasicBlock * findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B)
findNearestCommonDominator - Find nearest common dominator basic block for basic block A and B.
bool isReachableFromEntry(const MachineBasicBlock *A)
isReachableFromEntry - Return true if A is dominated by the entry block of the function containing it...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Representation of each machine instruction.
Definition: MachineInstr.h:68
static iterator_range< filter_iterator< Operand *, std::function< bool(Operand &Op)> > > getDebugOperandsForReg(Instruction *MI, Register Reg)
Returns a range of all of the operands that correspond to a debug use of Reg.
Definition: MachineInstr.h:566
bool hasDebugOperandForReg(Register Reg) const
Returns whether this debug value has at least one debug operand with the register Reg.
Definition: MachineInstr.h:555
void setDebugValueUndef()
Sets all register debug operands in this debug value instruction to be undef.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
bool isCopyLike() const
Return true if the instruction behaves like a copy.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
VectorType takeVector()
Clear the MapVector and return the underlying vector.
Definition: MapVector.h:56
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
PointerIntPair - This class implements a pair of a pointer and small integer.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
A vector that has set insertion semantics.
Definition: SetVector.h:40
void clear()
Completely clear the SetVector.
Definition: SetVector.h:213
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
void clear()
Definition: SmallSet.h:216
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
void stable_sort(R &&Range)
Definition: STLExtras.h:1948
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I)
char & MachineSinkingID
MachineSinking - This pass performs sinking on machine instructions.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
char & PostRAMachineSinkingID
This pass perform post-ra machine sink for COPY instructions.
void initializeMachineSinkingPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
TODO: Might pack better if we changed this to a Struct of Arrays, since MachineOperand is width 32,...
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos.
Represents a predicate at the MachineFunction level.