LLVM 17.0.0git
RegisterCoalescer.cpp
Go to the documentation of this file.
1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the generic RegisterCoalescer interface which
10// is used as the common interface used by all clients and
11// implementations of register coalescing.
12//
13//===----------------------------------------------------------------------===//
14
15#include "RegisterCoalescer.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/CodeGen/Passes.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/LaneBitmask.h"
45#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <limits>
57#include <tuple>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "regalloc"
64
65STATISTIC(numJoins , "Number of interval joins performed");
66STATISTIC(numCrossRCs , "Number of cross class joins performed");
67STATISTIC(numCommutes , "Number of instruction commuting performed");
68STATISTIC(numExtends , "Number of copies extended");
69STATISTIC(NumReMats , "Number of instructions re-materialized");
70STATISTIC(NumInflated , "Number of register classes inflated");
71STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
73STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
74
75static cl::opt<bool> EnableJoining("join-liveintervals",
76 cl::desc("Coalesce copies (default=true)"),
77 cl::init(true), cl::Hidden);
78
79static cl::opt<bool> UseTerminalRule("terminal-rule",
80 cl::desc("Apply the terminal rule"),
81 cl::init(false), cl::Hidden);
82
83/// Temporary flag to test critical edge unsplitting.
84static cl::opt<bool>
85EnableJoinSplits("join-splitedges",
86 cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87
88/// Temporary flag to test global copy optimization.
90EnableGlobalCopies("join-globalcopies",
91 cl::desc("Coalesce copies that span blocks (default=subtarget)"),
93
94static cl::opt<bool>
95VerifyCoalescing("verify-coalescing",
96 cl::desc("Verify machine instrs before and after register coalescing"),
98
100 "late-remat-update-threshold", cl::Hidden,
101 cl::desc("During rematerialization for a copy, if the def instruction has "
102 "many other copy uses to be rematerialized, delay the multiple "
103 "separate live interval update work and do them all at once after "
104 "all those rematerialization are done. It will save a lot of "
105 "repeated work. "),
106 cl::init(100));
107
109 "large-interval-size-threshold", cl::Hidden,
110 cl::desc("If the valnos size of an interval is larger than the threshold, "
111 "it is regarded as a large interval. "),
112 cl::init(100));
113
115 "large-interval-freq-threshold", cl::Hidden,
116 cl::desc("For a large interval, if it is coalesed with other live "
117 "intervals many times more than the threshold, stop its "
118 "coalescing to control the compile time. "),
119 cl::init(256));
120
121namespace {
122
123 class JoinVals;
124
125 class RegisterCoalescer : public MachineFunctionPass,
127 MachineFunction* MF = nullptr;
128 MachineRegisterInfo* MRI = nullptr;
129 const TargetRegisterInfo* TRI = nullptr;
130 const TargetInstrInfo* TII = nullptr;
131 LiveIntervals *LIS = nullptr;
132 const MachineLoopInfo* Loops = nullptr;
133 AliasAnalysis *AA = nullptr;
134 RegisterClassInfo RegClassInfo;
135
136 /// Position and VReg of a PHI instruction during coalescing.
137 struct PHIValPos {
138 SlotIndex SI; ///< Slot where this PHI occurs.
139 Register Reg; ///< VReg the PHI occurs in.
140 unsigned SubReg; ///< Qualifying subregister for Reg.
141 };
142
143 /// Map from debug instruction number to PHI position during coalescing.
145 /// Index of, for each VReg, which debug instruction numbers and
146 /// corresponding PHIs are sensitive to coalescing. Each VReg may have
147 /// multiple PHI defs, at different positions.
149
150 /// Debug variable location tracking -- for each VReg, maintain an
151 /// ordered-by-slot-index set of DBG_VALUEs, to help quick
152 /// identification of whether coalescing may change location validity.
153 using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
155
156 /// A LaneMask to remember on which subregister live ranges we need to call
157 /// shrinkToUses() later.
158 LaneBitmask ShrinkMask;
159
160 /// True if the main range of the currently coalesced intervals should be
161 /// checked for smaller live intervals.
162 bool ShrinkMainRange = false;
163
164 /// True if the coalescer should aggressively coalesce global copies
165 /// in favor of keeping local copies.
166 bool JoinGlobalCopies = false;
167
168 /// True if the coalescer should aggressively coalesce fall-thru
169 /// blocks exclusively containing copies.
170 bool JoinSplitEdges = false;
171
172 /// Copy instructions yet to be coalesced.
174 SmallVector<MachineInstr*, 8> LocalWorkList;
175
176 /// Set of instruction pointers that have been erased, and
177 /// that may be present in WorkList.
179
180 /// Dead instructions that are about to be deleted.
182
183 /// Virtual registers to be considered for register class inflation.
184 SmallVector<Register, 8> InflateRegs;
185
186 /// The collection of live intervals which should have been updated
187 /// immediately after rematerialiation but delayed until
188 /// lateLiveIntervalUpdate is called.
189 DenseSet<Register> ToBeUpdated;
190
191 /// Record how many times the large live interval with many valnos
192 /// has been tried to join with other live interval.
193 DenseMap<Register, unsigned long> LargeLIVisitCounter;
194
195 /// Recursively eliminate dead defs in DeadDefs.
196 void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
197
198 /// LiveRangeEdit callback for eliminateDeadDefs().
200
201 /// Coalesce the LocalWorkList.
202 void coalesceLocals();
203
204 /// Join compatible live intervals
205 void joinAllIntervals();
206
207 /// Coalesce copies in the specified MBB, putting
208 /// copies that cannot yet be coalesced into WorkList.
209 void copyCoalesceInMBB(MachineBasicBlock *MBB);
210
211 /// Tries to coalesce all copies in CurrList. Returns true if any progress
212 /// was made.
213 bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
214
215 /// If one def has many copy like uses, and those copy uses are all
216 /// rematerialized, the live interval update needed for those
217 /// rematerializations will be delayed and done all at once instead
218 /// of being done multiple times. This is to save compile cost because
219 /// live interval update is costly.
220 void lateLiveIntervalUpdate();
221
222 /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
223 /// has no value defined in the predecessors. If the incoming value is the
224 /// same as defined by the copy itself, the value is considered undefined.
225 bool copyValueUndefInPredecessors(LiveRange &S,
226 const MachineBasicBlock *MBB,
227 LiveQueryResult SLRQ);
228
229 /// Set necessary undef flags on subregister uses after pruning out undef
230 /// lane segments from the subrange.
231 void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
232 LaneBitmask PrunedLanes);
233
234 /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
235 /// src/dst of the copy instruction CopyMI. This returns true if the copy
236 /// was successfully coalesced away. If it is not currently possible to
237 /// coalesce this interval, but it may be possible if other things get
238 /// coalesced, then it returns true by reference in 'Again'.
239 bool joinCopy(MachineInstr *CopyMI, bool &Again);
240
241 /// Attempt to join these two intervals. On failure, this
242 /// returns false. The output "SrcInt" will not have been modified, so we
243 /// can use this information below to update aliases.
244 bool joinIntervals(CoalescerPair &CP);
245
246 /// Attempt joining two virtual registers. Return true on success.
247 bool joinVirtRegs(CoalescerPair &CP);
248
249 /// If a live interval has many valnos and is coalesced with other
250 /// live intervals many times, we regard such live interval as having
251 /// high compile time cost.
252 bool isHighCostLiveInterval(LiveInterval &LI);
253
254 /// Attempt joining with a reserved physreg.
255 bool joinReservedPhysReg(CoalescerPair &CP);
256
257 /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
258 /// Subranges in @p LI which only partially interfere with the desired
259 /// LaneMask are split as necessary. @p LaneMask are the lanes that
260 /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
261 /// lanemasks already adjusted to the coalesced register.
262 void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
263 LaneBitmask LaneMask, CoalescerPair &CP,
264 unsigned DstIdx);
265
266 /// Join the liveranges of two subregisters. Joins @p RRange into
267 /// @p LRange, @p RRange may be invalid afterwards.
268 void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
269 LaneBitmask LaneMask, const CoalescerPair &CP);
270
271 /// We found a non-trivially-coalescable copy. If the source value number is
272 /// defined by a copy from the destination reg see if we can merge these two
273 /// destination reg valno# into a single value number, eliminating a copy.
274 /// This returns true if an interval was modified.
275 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
276
277 /// Return true if there are definitions of IntB
278 /// other than BValNo val# that can reach uses of AValno val# of IntA.
279 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
280 VNInfo *AValNo, VNInfo *BValNo);
281
282 /// We found a non-trivially-coalescable copy.
283 /// If the source value number is defined by a commutable instruction and
284 /// its other operand is coalesced to the copy dest register, see if we
285 /// can transform the copy into a noop by commuting the definition.
286 /// This returns a pair of two flags:
287 /// - the first element is true if an interval was modified,
288 /// - the second element is true if the destination interval needs
289 /// to be shrunk after deleting the copy.
290 std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
291 MachineInstr *CopyMI);
292
293 /// We found a copy which can be moved to its less frequent predecessor.
294 bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
295
296 /// If the source of a copy is defined by a
297 /// trivial computation, replace the copy by rematerialize the definition.
298 bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
299 bool &IsDefCopy);
300
301 /// Return true if a copy involving a physreg should be joined.
302 bool canJoinPhys(const CoalescerPair &CP);
303
304 /// Replace all defs and uses of SrcReg to DstReg and update the subregister
305 /// number if it is not zero. If DstReg is a physical register and the
306 /// existing subregister number of the def / use being updated is not zero,
307 /// make sure to set it to the correct physical subregister.
308 void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
309
310 /// If the given machine operand reads only undefined lanes add an undef
311 /// flag.
312 /// This can happen when undef uses were previously concealed by a copy
313 /// which we coalesced. Example:
314 /// %0:sub0<def,read-undef> = ...
315 /// %1 = COPY %0 <-- Coalescing COPY reveals undef
316 /// = use %1:sub1 <-- hidden undef use
317 void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
318 MachineOperand &MO, unsigned SubRegIdx);
319
320 /// Handle copies of undef values. If the undef value is an incoming
321 /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
322 /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
323 /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
324 MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
325
326 /// Check whether or not we should apply the terminal rule on the
327 /// destination (Dst) of \p Copy.
328 /// When the terminal rule applies, Copy is not profitable to
329 /// coalesce.
330 /// Dst is terminal if it has exactly one affinity (Dst, Src) and
331 /// at least one interference (Dst, Dst2). If Dst is terminal, the
332 /// terminal rule consists in checking that at least one of
333 /// interfering node, say Dst2, has an affinity of equal or greater
334 /// weight with Src.
335 /// In that case, Dst2 and Dst will not be able to be both coalesced
336 /// with Src. Since Dst2 exposes more coalescing opportunities than
337 /// Dst, we can drop \p Copy.
338 bool applyTerminalRule(const MachineInstr &Copy) const;
339
340 /// Wrapper method for \see LiveIntervals::shrinkToUses.
341 /// This method does the proper fixing of the live-ranges when the afore
342 /// mentioned method returns true.
343 void shrinkToUses(LiveInterval *LI,
344 SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
345 NumShrinkToUses++;
346 if (LIS->shrinkToUses(LI, Dead)) {
347 /// Check whether or not \p LI is composed by multiple connected
348 /// components and if that is the case, fix that.
350 LIS->splitSeparateComponents(*LI, SplitLIs);
351 }
352 }
353
354 /// Wrapper Method to do all the necessary work when an Instruction is
355 /// deleted.
356 /// Optimizations should use this to make sure that deleted instructions
357 /// are always accounted for.
358 void deleteInstr(MachineInstr* MI) {
359 ErasedInstrs.insert(MI);
361 MI->eraseFromParent();
362 }
363
364 /// Walk over function and initialize the DbgVRegToValues map.
366
367 /// Test whether, after merging, any DBG_VALUEs would refer to a
368 /// different value number than before merging, and whether this can
369 /// be resolved. If not, mark the DBG_VALUE as being undef.
370 void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
371 JoinVals &LHSVals, LiveRange &RHS,
372 JoinVals &RHSVals);
373
374 void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
375 LiveRange &RegRange, JoinVals &Vals2);
376
377 public:
378 static char ID; ///< Class identification, replacement for typeinfo
379
380 RegisterCoalescer() : MachineFunctionPass(ID) {
382 }
383
384 void getAnalysisUsage(AnalysisUsage &AU) const override;
385
386 void releaseMemory() override;
387
388 /// This is the pass entry point.
390
391 /// Implement the dump method.
392 void print(raw_ostream &O, const Module* = nullptr) const override;
393 };
394
395} // end anonymous namespace
396
397char RegisterCoalescer::ID = 0;
398
399char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
400
401INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
402 "Simple Register Coalescing", false, false)
407INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
409
410[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
412 Register &Dst, unsigned &SrcSub,
413 unsigned &DstSub) {
414 if (MI->isCopy()) {
415 Dst = MI->getOperand(0).getReg();
416 DstSub = MI->getOperand(0).getSubReg();
417 Src = MI->getOperand(1).getReg();
418 SrcSub = MI->getOperand(1).getSubReg();
419 } else if (MI->isSubregToReg()) {
420 Dst = MI->getOperand(0).getReg();
421 DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
422 MI->getOperand(3).getImm());
423 Src = MI->getOperand(2).getReg();
424 SrcSub = MI->getOperand(2).getSubReg();
425 } else
426 return false;
427 return true;
428}
429
430/// Return true if this block should be vacated by the coalescer to eliminate
431/// branches. The important cases to handle in the coalescer are critical edges
432/// split during phi elimination which contain only copies. Simple blocks that
433/// contain non-branches should also be vacated, but this can be handled by an
434/// earlier pass similar to early if-conversion.
435static bool isSplitEdge(const MachineBasicBlock *MBB) {
436 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
437 return false;
438
439 for (const auto &MI : *MBB) {
440 if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
441 return false;
442 }
443 return true;
444}
445
447 SrcReg = DstReg = Register();
448 SrcIdx = DstIdx = 0;
449 NewRC = nullptr;
450 Flipped = CrossClass = false;
451
452 Register Src, Dst;
453 unsigned SrcSub = 0, DstSub = 0;
454 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
455 return false;
456 Partial = SrcSub || DstSub;
457
458 // If one register is a physreg, it must be Dst.
459 if (Src.isPhysical()) {
460 if (Dst.isPhysical())
461 return false;
462 std::swap(Src, Dst);
463 std::swap(SrcSub, DstSub);
464 Flipped = true;
465 }
466
467 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
468
469 if (Dst.isPhysical()) {
470 // Eliminate DstSub on a physreg.
471 if (DstSub) {
472 Dst = TRI.getSubReg(Dst, DstSub);
473 if (!Dst) return false;
474 DstSub = 0;
475 }
476
477 // Eliminate SrcSub by picking a corresponding Dst superregister.
478 if (SrcSub) {
479 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
480 if (!Dst) return false;
481 } else if (!MRI.getRegClass(Src)->contains(Dst)) {
482 return false;
483 }
484 } else {
485 // Both registers are virtual.
486 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
487 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
488
489 // Both registers have subreg indices.
490 if (SrcSub && DstSub) {
491 // Copies between different sub-registers are never coalescable.
492 if (Src == Dst && SrcSub != DstSub)
493 return false;
494
495 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
496 SrcIdx, DstIdx);
497 if (!NewRC)
498 return false;
499 } else if (DstSub) {
500 // SrcReg will be merged with a sub-register of DstReg.
501 SrcIdx = DstSub;
502 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
503 } else if (SrcSub) {
504 // DstReg will be merged with a sub-register of SrcReg.
505 DstIdx = SrcSub;
506 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
507 } else {
508 // This is a straight copy without sub-registers.
509 NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
510 }
511
512 // The combined constraint may be impossible to satisfy.
513 if (!NewRC)
514 return false;
515
516 // Prefer SrcReg to be a sub-register of DstReg.
517 // FIXME: Coalescer should support subregs symmetrically.
518 if (DstIdx && !SrcIdx) {
519 std::swap(Src, Dst);
520 std::swap(SrcIdx, DstIdx);
521 Flipped = !Flipped;
522 }
523
524 CrossClass = NewRC != DstRC || NewRC != SrcRC;
525 }
526 // Check our invariants
527 assert(Src.isVirtual() && "Src must be virtual");
528 assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
529 SrcReg = Src;
530 DstReg = Dst;
531 return true;
532}
533
535 if (DstReg.isPhysical())
536 return false;
537 std::swap(SrcReg, DstReg);
538 std::swap(SrcIdx, DstIdx);
539 Flipped = !Flipped;
540 return true;
541}
542
544 if (!MI)
545 return false;
546 Register Src, Dst;
547 unsigned SrcSub = 0, DstSub = 0;
548 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
549 return false;
550
551 // Find the virtual register that is SrcReg.
552 if (Dst == SrcReg) {
553 std::swap(Src, Dst);
554 std::swap(SrcSub, DstSub);
555 } else if (Src != SrcReg) {
556 return false;
557 }
558
559 // Now check that Dst matches DstReg.
560 if (DstReg.isPhysical()) {
561 if (!Dst.isPhysical())
562 return false;
563 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
564 // DstSub could be set for a physreg from INSERT_SUBREG.
565 if (DstSub)
566 Dst = TRI.getSubReg(Dst, DstSub);
567 // Full copy of Src.
568 if (!SrcSub)
569 return DstReg == Dst;
570 // This is a partial register copy. Check that the parts match.
571 return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
572 } else {
573 // DstReg is virtual.
574 if (DstReg != Dst)
575 return false;
576 // Registers match, do the subregisters line up?
577 return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
578 TRI.composeSubRegIndices(DstIdx, DstSub);
579 }
580}
581
582void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
583 AU.setPreservesCFG();
592}
593
594void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
595 if (Edit) {
596 Edit->eliminateDeadDefs(DeadDefs);
597 return;
598 }
600 LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
601 nullptr, this).eliminateDeadDefs(DeadDefs);
602}
603
604void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
605 // MI may be in WorkList. Make sure we don't visit it.
606 ErasedInstrs.insert(MI);
607}
608
609bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
610 MachineInstr *CopyMI) {
611 assert(!CP.isPartial() && "This doesn't work for partial copies.");
612 assert(!CP.isPhys() && "This doesn't work for physreg copies.");
613
614 LiveInterval &IntA =
615 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
616 LiveInterval &IntB =
617 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
618 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
619
620 // We have a non-trivially-coalescable copy with IntA being the source and
621 // IntB being the dest, thus this defines a value number in IntB. If the
622 // source value number (in IntA) is defined by a copy from B, see if we can
623 // merge these two pieces of B into a single value number, eliminating a copy.
624 // For example:
625 //
626 // A3 = B0
627 // ...
628 // B1 = A3 <- this copy
629 //
630 // In this case, B0 can be extended to where the B1 copy lives, allowing the
631 // B1 value number to be replaced with B0 (which simplifies the B
632 // liveinterval).
633
634 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
635 // the example above.
637 if (BS == IntB.end()) return false;
638 VNInfo *BValNo = BS->valno;
639
640 // Get the location that B is defined at. Two options: either this value has
641 // an unknown definition point or it is defined at CopyIdx. If unknown, we
642 // can't process it.
643 if (BValNo->def != CopyIdx) return false;
644
645 // AValNo is the value number in A that defines the copy, A3 in the example.
646 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
647 LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
648 // The live segment might not exist after fun with physreg coalescing.
649 if (AS == IntA.end()) return false;
650 VNInfo *AValNo = AS->valno;
651
652 // If AValNo is defined as a copy from IntB, we can potentially process this.
653 // Get the instruction that defines this value number.
654 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
655 // Don't allow any partial copies, even if isCoalescable() allows them.
656 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
657 return false;
658
659 // Get the Segment in IntB that this value number starts with.
661 IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
662 if (ValS == IntB.end())
663 return false;
664
665 // Make sure that the end of the live segment is inside the same block as
666 // CopyMI.
667 MachineInstr *ValSEndInst =
668 LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
669 if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
670 return false;
671
672 // Okay, we now know that ValS ends in the same block that the CopyMI
673 // live-range starts. If there are no intervening live segments between them
674 // in IntB, we can merge them.
675 if (ValS+1 != BS) return false;
676
677 LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
678
679 SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
680 // We are about to delete CopyMI, so need to remove it as the 'instruction
681 // that defines this value #'. Update the valnum with the new defining
682 // instruction #.
683 BValNo->def = FillerStart;
684
685 // Okay, we can merge them. We need to insert a new liverange:
686 // [ValS.end, BS.begin) of either value number, then we merge the
687 // two value numbers.
688 IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
689
690 // Okay, merge "B1" into the same value number as "B0".
691 if (BValNo != ValS->valno)
692 IntB.MergeValueNumberInto(BValNo, ValS->valno);
693
694 // Do the same for the subregister segments.
695 for (LiveInterval::SubRange &S : IntB.subranges()) {
696 // Check for SubRange Segments of the form [1234r,1234d:0) which can be
697 // removed to prevent creating bogus SubRange Segments.
698 LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
699 if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
700 S.removeSegment(*SS, true);
701 continue;
702 }
703 // The subrange may have ended before FillerStart. If so, extend it.
704 if (!S.getVNInfoAt(FillerStart)) {
705 SlotIndex BBStart =
706 LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
707 S.extendInBlock(BBStart, FillerStart);
708 }
709 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
710 S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
711 VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
712 if (SubBValNo != SubValSNo)
713 S.MergeValueNumberInto(SubBValNo, SubValSNo);
714 }
715
716 LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
717
718 // If the source instruction was killing the source register before the
719 // merge, unset the isKill marker given the live range has been extended.
720 int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
721 if (UIdx != -1) {
722 ValSEndInst->getOperand(UIdx).setIsKill(false);
723 }
724
725 // Rewrite the copy.
726 CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
727 // If the copy instruction was killing the destination register or any
728 // subrange before the merge trim the live range.
729 bool RecomputeLiveRange = AS->end == CopyIdx;
730 if (!RecomputeLiveRange) {
731 for (LiveInterval::SubRange &S : IntA.subranges()) {
732 LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
733 if (SS != S.end() && SS->end == CopyIdx) {
734 RecomputeLiveRange = true;
735 break;
736 }
737 }
738 }
739 if (RecomputeLiveRange)
740 shrinkToUses(&IntA);
741
742 ++numExtends;
743 return true;
744}
745
746bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
747 LiveInterval &IntB,
748 VNInfo *AValNo,
749 VNInfo *BValNo) {
750 // If AValNo has PHI kills, conservatively assume that IntB defs can reach
751 // the PHI values.
752 if (LIS->hasPHIKill(IntA, AValNo))
753 return true;
754
755 for (LiveRange::Segment &ASeg : IntA.segments) {
756 if (ASeg.valno != AValNo) continue;
758 if (BI != IntB.begin())
759 --BI;
760 for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
761 if (BI->valno == BValNo)
762 continue;
763 if (BI->start <= ASeg.start && BI->end > ASeg.start)
764 return true;
765 if (BI->start > ASeg.start && BI->start < ASeg.end)
766 return true;
767 }
768 }
769 return false;
770}
771
772/// Copy segments with value number @p SrcValNo from liverange @p Src to live
773/// range @Dst and use value number @p DstValNo there.
774static std::pair<bool,bool>
775addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
776 const VNInfo *SrcValNo) {
777 bool Changed = false;
778 bool MergedWithDead = false;
779 for (const LiveRange::Segment &S : Src.segments) {
780 if (S.valno != SrcValNo)
781 continue;
782 // This is adding a segment from Src that ends in a copy that is about
783 // to be removed. This segment is going to be merged with a pre-existing
784 // segment in Dst. This works, except in cases when the corresponding
785 // segment in Dst is dead. For example: adding [192r,208r:1) from Src
786 // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
787 // Recognized such cases, so that the segments can be shrunk.
788 LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
789 LiveRange::Segment &Merged = *Dst.addSegment(Added);
790 if (Merged.end.isDead())
791 MergedWithDead = true;
792 Changed = true;
793 }
794 return std::make_pair(Changed, MergedWithDead);
795}
796
797std::pair<bool,bool>
798RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
799 MachineInstr *CopyMI) {
800 assert(!CP.isPhys());
801
802 LiveInterval &IntA =
803 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
804 LiveInterval &IntB =
805 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
806
807 // We found a non-trivially-coalescable copy with IntA being the source and
808 // IntB being the dest, thus this defines a value number in IntB. If the
809 // source value number (in IntA) is defined by a commutable instruction and
810 // its other operand is coalesced to the copy dest register, see if we can
811 // transform the copy into a noop by commuting the definition. For example,
812 //
813 // A3 = op A2 killed B0
814 // ...
815 // B1 = A3 <- this copy
816 // ...
817 // = op A3 <- more uses
818 //
819 // ==>
820 //
821 // B2 = op B0 killed A2
822 // ...
823 // B1 = B2 <- now an identity copy
824 // ...
825 // = op B2 <- more uses
826
827 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
828 // the example above.
829 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
830 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
831 assert(BValNo != nullptr && BValNo->def == CopyIdx);
832
833 // AValNo is the value number in A that defines the copy, A3 in the example.
834 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
835 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
836 if (AValNo->isPHIDef())
837 return { false, false };
839 if (!DefMI)
840 return { false, false };
841 if (!DefMI->isCommutable())
842 return { false, false };
843 // If DefMI is a two-address instruction then commuting it will change the
844 // destination register.
845 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
846 assert(DefIdx != -1);
847 unsigned UseOpIdx;
848 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
849 return { false, false };
850
851 // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
852 // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
853 // passed to the method. That _other_ operand is chosen by
854 // the findCommutedOpIndices() method.
855 //
856 // That is obviously an area for improvement in case of instructions having
857 // more than 2 operands. For example, if some instruction has 3 commutable
858 // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
859 // op#2<->op#3) of commute transformation should be considered/tried here.
860 unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
861 if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
862 return { false, false };
863
864 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
865 Register NewReg = NewDstMO.getReg();
866 if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
867 return { false, false };
868
869 // Make sure there are no other definitions of IntB that would reach the
870 // uses which the new definition can reach.
871 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
872 return { false, false };
873
874 // If some of the uses of IntA.reg is already coalesced away, return false.
875 // It's not possible to determine whether it's safe to perform the coalescing.
876 for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
877 MachineInstr *UseMI = MO.getParent();
878 unsigned OpNo = &MO - &UseMI->getOperand(0);
879 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
881 if (US == IntA.end() || US->valno != AValNo)
882 continue;
883 // If this use is tied to a def, we can't rewrite the register.
884 if (UseMI->isRegTiedToDefOperand(OpNo))
885 return { false, false };
886 }
887
888 LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
889 << *DefMI);
890
891 // At this point we have decided that it is legal to do this
892 // transformation. Start by commuting the instruction.
894 MachineInstr *NewMI =
895 TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
896 if (!NewMI)
897 return { false, false };
898 if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
899 !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
900 return { false, false };
901 if (NewMI != DefMI) {
902 LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
904 MBB->insert(Pos, NewMI);
905 MBB->erase(DefMI);
906 }
907
908 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
909 // A = or A, B
910 // ...
911 // B = A
912 // ...
913 // C = killed A
914 // ...
915 // = B
916
917 // Update uses of IntA of the specific Val# with IntB.
918 for (MachineOperand &UseMO :
919 llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
920 if (UseMO.isUndef())
921 continue;
922 MachineInstr *UseMI = UseMO.getParent();
923 if (UseMI->isDebugInstr()) {
924 // FIXME These don't have an instruction index. Not clear we have enough
925 // info to decide whether to do this replacement or not. For now do it.
926 UseMO.setReg(NewReg);
927 continue;
928 }
929 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
931 assert(US != IntA.end() && "Use must be live");
932 if (US->valno != AValNo)
933 continue;
934 // Kill flags are no longer accurate. They are recomputed after RA.
935 UseMO.setIsKill(false);
936 if (NewReg.isPhysical())
937 UseMO.substPhysReg(NewReg, *TRI);
938 else
939 UseMO.setReg(NewReg);
940 if (UseMI == CopyMI)
941 continue;
942 if (!UseMI->isCopy())
943 continue;
944 if (UseMI->getOperand(0).getReg() != IntB.reg() ||
946 continue;
947
948 // This copy will become a noop. If it's defining a new val#, merge it into
949 // BValNo.
950 SlotIndex DefIdx = UseIdx.getRegSlot();
951 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
952 if (!DVNI)
953 continue;
954 LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
955 assert(DVNI->def == DefIdx);
956 BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
957 for (LiveInterval::SubRange &S : IntB.subranges()) {
958 VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
959 if (!SubDVNI)
960 continue;
961 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
962 assert(SubBValNo->def == CopyIdx);
963 S.MergeValueNumberInto(SubDVNI, SubBValNo);
964 }
965
966 deleteInstr(UseMI);
967 }
968
969 // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
970 // is updated.
971 bool ShrinkB = false;
973 if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
974 if (!IntA.hasSubRanges()) {
975 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
976 IntA.createSubRangeFrom(Allocator, Mask, IntA);
977 } else if (!IntB.hasSubRanges()) {
978 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
979 IntB.createSubRangeFrom(Allocator, Mask, IntB);
980 }
981 SlotIndex AIdx = CopyIdx.getRegSlot(true);
982 LaneBitmask MaskA;
983 const SlotIndexes &Indexes = *LIS->getSlotIndexes();
984 for (LiveInterval::SubRange &SA : IntA.subranges()) {
985 VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
986 // Even if we are dealing with a full copy, some lanes can
987 // still be undefined.
988 // E.g.,
989 // undef A.subLow = ...
990 // B = COPY A <== A.subHigh is undefined here and does
991 // not have a value number.
992 if (!ASubValNo)
993 continue;
994 MaskA |= SA.LaneMask;
995
996 IntB.refineSubRanges(
997 Allocator, SA.LaneMask,
998 [&Allocator, &SA, CopyIdx, ASubValNo,
999 &ShrinkB](LiveInterval::SubRange &SR) {
1000 VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1001 : SR.getVNInfoAt(CopyIdx);
1002 assert(BSubValNo != nullptr);
1003 auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1004 ShrinkB |= P.second;
1005 if (P.first)
1006 BSubValNo->def = ASubValNo->def;
1007 },
1008 Indexes, *TRI);
1009 }
1010 // Go over all subranges of IntB that have not been covered by IntA,
1011 // and delete the segments starting at CopyIdx. This can happen if
1012 // IntA has undef lanes that are defined in IntB.
1013 for (LiveInterval::SubRange &SB : IntB.subranges()) {
1014 if ((SB.LaneMask & MaskA).any())
1015 continue;
1016 if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1017 if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1018 SB.removeSegment(*S, true);
1019 }
1020 }
1021
1022 BValNo->def = AValNo->def;
1023 auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1024 ShrinkB |= P.second;
1025 LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1026
1027 LIS->removeVRegDefAt(IntA, AValNo->def);
1028
1029 LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
1030 ++numCommutes;
1031 return { true, ShrinkB };
1032}
1033
1034/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1035/// predecessor of BB2, and if B is not redefined on the way from A = B
1036/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1037/// execution goes through the path from BB0 to BB2. We may move B = A
1038/// to the predecessor without such reversed copy.
1039/// So we will transform the program from:
1040/// BB0:
1041/// A = B; BB1:
1042/// ... ...
1043/// / \ /
1044/// BB2:
1045/// ...
1046/// B = A;
1047///
1048/// to:
1049///
1050/// BB0: BB1:
1051/// A = B; ...
1052/// ... B = A;
1053/// / \ /
1054/// BB2:
1055/// ...
1056///
1057/// A special case is when BB0 and BB2 are the same BB which is the only
1058/// BB in a loop:
1059/// BB1:
1060/// ...
1061/// BB0/BB2: ----
1062/// B = A; |
1063/// ... |
1064/// A = B; |
1065/// |-------
1066/// |
1067/// We may hoist B = A from BB0/BB2 to BB1.
1068///
1069/// The major preconditions for correctness to remove such partial
1070/// redundancy include:
1071/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1072/// the PHI is defined by the reversed copy A = B in BB0.
1073/// 2. No B is referenced from the start of BB2 to B = A.
1074/// 3. No B is defined from A = B to the end of BB0.
1075/// 4. BB1 has only one successor.
1076///
1077/// 2 and 4 implicitly ensure B is not live at the end of BB1.
1078/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1079/// colder place, which not only prevent endless loop, but also make sure
1080/// the movement of copy is beneficial.
1081bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1082 MachineInstr &CopyMI) {
1083 assert(!CP.isPhys());
1084 if (!CopyMI.isFullCopy())
1085 return false;
1086
1087 MachineBasicBlock &MBB = *CopyMI.getParent();
1088 // If this block is the target of an invoke/inlineasm_br, moving the copy into
1089 // the predecessor is tricker, and we don't handle it.
1091 return false;
1092
1093 if (MBB.pred_size() != 2)
1094 return false;
1095
1096 LiveInterval &IntA =
1097 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1098 LiveInterval &IntB =
1099 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1100
1101 // A is defined by PHI at the entry of MBB.
1102 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1103 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1104 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1105 if (!AValNo->isPHIDef())
1106 return false;
1107
1108 // No B is referenced before CopyMI in MBB.
1109 if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1110 return false;
1111
1112 // MBB has two predecessors: one contains A = B so no copy will be inserted
1113 // for it. The other one will have a copy moved from MBB.
1114 bool FoundReverseCopy = false;
1115 MachineBasicBlock *CopyLeftBB = nullptr;
1116 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1117 VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1119 if (!DefMI || !DefMI->isFullCopy()) {
1120 CopyLeftBB = Pred;
1121 continue;
1122 }
1123 // Check DefMI is a reverse copy and it is in BB Pred.
1124 if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1125 DefMI->getOperand(1).getReg() != IntB.reg() ||
1126 DefMI->getParent() != Pred) {
1127 CopyLeftBB = Pred;
1128 continue;
1129 }
1130 // If there is any other def of B after DefMI and before the end of Pred,
1131 // we need to keep the copy of B = A at the end of Pred if we remove
1132 // B = A from MBB.
1133 bool ValB_Changed = false;
1134 for (auto *VNI : IntB.valnos) {
1135 if (VNI->isUnused())
1136 continue;
1137 if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1138 ValB_Changed = true;
1139 break;
1140 }
1141 }
1142 if (ValB_Changed) {
1143 CopyLeftBB = Pred;
1144 continue;
1145 }
1146 FoundReverseCopy = true;
1147 }
1148
1149 // If no reverse copy is found in predecessors, nothing to do.
1150 if (!FoundReverseCopy)
1151 return false;
1152
1153 // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1154 // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1155 // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1156 // update IntA/IntB.
1157 //
1158 // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1159 // MBB is hotter than CopyLeftBB.
1160 if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1161 return false;
1162
1163 // Now (almost sure it's) ok to move copy.
1164 if (CopyLeftBB) {
1165 // Position in CopyLeftBB where we should insert new copy.
1166 auto InsPos = CopyLeftBB->getFirstTerminator();
1167
1168 // Make sure that B isn't referenced in the terminators (if any) at the end
1169 // of the predecessor since we're about to insert a new definition of B
1170 // before them.
1171 if (InsPos != CopyLeftBB->end()) {
1172 SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1173 if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1174 return false;
1175 }
1176
1177 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1178 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1179
1180 // Insert new copy to CopyLeftBB.
1181 MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1182 TII->get(TargetOpcode::COPY), IntB.reg())
1183 .addReg(IntA.reg());
1184 SlotIndex NewCopyIdx =
1185 LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1186 IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1187 for (LiveInterval::SubRange &SR : IntB.subranges())
1188 SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1189
1190 // If the newly created Instruction has an address of an instruction that was
1191 // deleted before (object recycled by the allocator) it needs to be removed from
1192 // the deleted list.
1193 ErasedInstrs.erase(NewCopyMI);
1194 } else {
1195 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1196 << printMBBReference(MBB) << '\t' << CopyMI);
1197 }
1198
1199 // Remove CopyMI.
1200 // Note: This is fine to remove the copy before updating the live-ranges.
1201 // While updating the live-ranges, we only look at slot indices and
1202 // never go back to the instruction.
1203 // Mark instructions as deleted.
1204 deleteInstr(&CopyMI);
1205
1206 // Update the liveness.
1207 SmallVector<SlotIndex, 8> EndPoints;
1208 VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1209 LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1210 &EndPoints);
1211 BValNo->markUnused();
1212 // Extend IntB to the EndPoints of its original live interval.
1213 LIS->extendToIndices(IntB, EndPoints);
1214
1215 // Now, do the same for its subranges.
1216 for (LiveInterval::SubRange &SR : IntB.subranges()) {
1217 EndPoints.clear();
1218 VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1219 assert(BValNo && "All sublanes should be live");
1220 LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1221 BValNo->markUnused();
1222 // We can have a situation where the result of the original copy is live,
1223 // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1224 // the copy appear as an endpoint from pruneValue(), but we don't want it
1225 // to because the copy has been removed. We can go ahead and remove that
1226 // endpoint; there is no other situation here that there could be a use at
1227 // the same place as we know that the copy is a full copy.
1228 for (unsigned I = 0; I != EndPoints.size(); ) {
1229 if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1230 EndPoints[I] = EndPoints.back();
1231 EndPoints.pop_back();
1232 continue;
1233 }
1234 ++I;
1235 }
1237 IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1238 *LIS->getSlotIndexes());
1239 LIS->extendToIndices(SR, EndPoints, Undefs);
1240 }
1241 // If any dead defs were extended, truncate them.
1242 shrinkToUses(&IntB);
1243
1244 // Finally, update the live-range of IntA.
1245 shrinkToUses(&IntA);
1246 return true;
1247}
1248
1249/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1250/// defining a subregister.
1251static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1252 assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1253
1254 for (const MachineOperand &Op : MI.all_defs()) {
1255 if (Op.getReg() != Reg)
1256 continue;
1257 // Return true if we define the full register or don't care about the value
1258 // inside other subregisters.
1259 if (Op.getSubReg() == 0 || Op.isUndef())
1260 return true;
1261 }
1262 return false;
1263}
1264
1265bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1266 MachineInstr *CopyMI,
1267 bool &IsDefCopy) {
1268 IsDefCopy = false;
1269 Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1270 unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1271 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1272 unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1273 if (SrcReg.isPhysical())
1274 return false;
1275
1276 LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1277 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1278 VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1279 if (!ValNo)
1280 return false;
1281 if (ValNo->isPHIDef() || ValNo->isUnused())
1282 return false;
1284 if (!DefMI)
1285 return false;
1286 if (DefMI->isCopyLike()) {
1287 IsDefCopy = true;
1288 return false;
1289 }
1290 if (!TII->isAsCheapAsAMove(*DefMI))
1291 return false;
1292
1294 LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1295 if (!Edit.checkRematerializable(ValNo, DefMI))
1296 return false;
1297
1298 if (!definesFullReg(*DefMI, SrcReg))
1299 return false;
1300 bool SawStore = false;
1301 if (!DefMI->isSafeToMove(AA, SawStore))
1302 return false;
1303 const MCInstrDesc &MCID = DefMI->getDesc();
1304 if (MCID.getNumDefs() != 1)
1305 return false;
1306 // Only support subregister destinations when the def is read-undef.
1307 MachineOperand &DstOperand = CopyMI->getOperand(0);
1308 Register CopyDstReg = DstOperand.getReg();
1309 if (DstOperand.getSubReg() && !DstOperand.isUndef())
1310 return false;
1311
1312 // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1313 // the register substantially (beyond both source and dest size). This is bad
1314 // for performance since it can cascade through a function, introducing many
1315 // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1316 // around after a few subreg copies).
1317 if (SrcIdx && DstIdx)
1318 return false;
1319
1320 const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1321 if (!DefMI->isImplicitDef()) {
1322 if (DstReg.isPhysical()) {
1323 Register NewDstReg = DstReg;
1324
1325 unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1326 DefMI->getOperand(0).getSubReg());
1327 if (NewDstIdx)
1328 NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1329
1330 // Finally, make sure that the physical subregister that will be
1331 // constructed later is permitted for the instruction.
1332 if (!DefRC->contains(NewDstReg))
1333 return false;
1334 } else {
1335 // Theoretically, some stack frame reference could exist. Just make sure
1336 // it hasn't actually happened.
1337 assert(DstReg.isVirtual() &&
1338 "Only expect to deal with virtual or physical registers");
1339 }
1340 }
1341
1342 LiveRangeEdit::Remat RM(ValNo);
1343 RM.OrigMI = DefMI;
1344 if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1345 return false;
1346
1347 DebugLoc DL = CopyMI->getDebugLoc();
1348 MachineBasicBlock *MBB = CopyMI->getParent();
1350 std::next(MachineBasicBlock::iterator(CopyMI));
1351 Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1352 MachineInstr &NewMI = *std::prev(MII);
1353 NewMI.setDebugLoc(DL);
1354
1355 // In a situation like the following:
1356 // %0:subreg = instr ; DefMI, subreg = DstIdx
1357 // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
1358 // instead of widening %1 to the register class of %0 simply do:
1359 // %1 = instr
1360 const TargetRegisterClass *NewRC = CP.getNewRC();
1361 if (DstIdx != 0) {
1362 MachineOperand &DefMO = NewMI.getOperand(0);
1363 if (DefMO.getSubReg() == DstIdx) {
1364 assert(SrcIdx == 0 && CP.isFlipped()
1365 && "Shouldn't have SrcIdx+DstIdx at this point");
1366 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1367 const TargetRegisterClass *CommonRC =
1368 TRI->getCommonSubClass(DefRC, DstRC);
1369 if (CommonRC != nullptr) {
1370 NewRC = CommonRC;
1371
1372 // Instruction might contain "undef %0:subreg" as use operand:
1373 // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1374 //
1375 // Need to check all operands.
1376 for (MachineOperand &MO : NewMI.operands()) {
1377 if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1378 MO.setSubReg(0);
1379 }
1380 }
1381
1382 DstIdx = 0;
1383 DefMO.setIsUndef(false); // Only subregs can have def+undef.
1384 }
1385 }
1386 }
1387
1388 // CopyMI may have implicit operands, save them so that we can transfer them
1389 // over to the newly materialized instruction after CopyMI is removed.
1391 ImplicitOps.reserve(CopyMI->getNumOperands() -
1392 CopyMI->getDesc().getNumOperands());
1393 for (unsigned I = CopyMI->getDesc().getNumOperands(),
1394 E = CopyMI->getNumOperands();
1395 I != E; ++I) {
1396 MachineOperand &MO = CopyMI->getOperand(I);
1397 if (MO.isReg()) {
1398 assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1399 // Discard VReg implicit defs.
1400 if (MO.getReg().isPhysical())
1401 ImplicitOps.push_back(MO);
1402 }
1403 }
1404
1405 CopyMI->eraseFromParent();
1406 ErasedInstrs.insert(CopyMI);
1407
1408 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1409 // We need to remember these so we can add intervals once we insert
1410 // NewMI into SlotIndexes.
1411 SmallVector<MCRegister, 4> NewMIImplDefs;
1412 for (unsigned i = NewMI.getDesc().getNumOperands(),
1413 e = NewMI.getNumOperands();
1414 i != e; ++i) {
1415 MachineOperand &MO = NewMI.getOperand(i);
1416 if (MO.isReg() && MO.isDef()) {
1417 assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
1418 NewMIImplDefs.push_back(MO.getReg().asMCReg());
1419 }
1420 }
1421
1422 if (DstReg.isVirtual()) {
1423 unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1424
1425 if (DefRC != nullptr) {
1426 if (NewIdx)
1427 NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1428 else
1429 NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1430 assert(NewRC && "subreg chosen for remat incompatible with instruction");
1431 }
1432 // Remap subranges to new lanemask and change register class.
1433 LiveInterval &DstInt = LIS->getInterval(DstReg);
1434 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1435 SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1436 }
1437 MRI->setRegClass(DstReg, NewRC);
1438
1439 // Update machine operands and add flags.
1440 updateRegDefsUses(DstReg, DstReg, DstIdx);
1441 NewMI.getOperand(0).setSubReg(NewIdx);
1442 // updateRegDefUses can add an "undef" flag to the definition, since
1443 // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1444 // sure that "undef" is not set.
1445 if (NewIdx == 0)
1446 NewMI.getOperand(0).setIsUndef(false);
1447 // Add dead subregister definitions if we are defining the whole register
1448 // but only part of it is live.
1449 // This could happen if the rematerialization instruction is rematerializing
1450 // more than actually is used in the register.
1451 // An example would be:
1452 // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1453 // ; Copying only part of the register here, but the rest is undef.
1454 // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1455 // ==>
1456 // ; Materialize all the constants but only using one
1457 // %2 = LOAD_CONSTANTS 5, 8
1458 //
1459 // at this point for the part that wasn't defined before we could have
1460 // subranges missing the definition.
1461 if (NewIdx == 0 && DstInt.hasSubRanges()) {
1462 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1463 SlotIndex DefIndex =
1464 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1465 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1467 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1468 if (!SR.liveAt(DefIndex))
1469 SR.createDeadDef(DefIndex, Alloc);
1470 MaxMask &= ~SR.LaneMask;
1471 }
1472 if (MaxMask.any()) {
1473 LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1474 SR->createDeadDef(DefIndex, Alloc);
1475 }
1476 }
1477
1478 // Make sure that the subrange for resultant undef is removed
1479 // For example:
1480 // %1:sub1<def,read-undef> = LOAD CONSTANT 1
1481 // %2 = COPY %1
1482 // ==>
1483 // %2:sub1<def, read-undef> = LOAD CONSTANT 1
1484 // ; Correct but need to remove the subrange for %2:sub0
1485 // ; as it is now undef
1486 if (NewIdx != 0 && DstInt.hasSubRanges()) {
1487 // The affected subregister segments can be removed.
1488 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1489 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1490 bool UpdatedSubRanges = false;
1491 SlotIndex DefIndex =
1492 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1494 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1495 if ((SR.LaneMask & DstMask).none()) {
1497 << "Removing undefined SubRange "
1498 << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1499 // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1500 if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1501 SR.removeValNo(RmValNo);
1502 UpdatedSubRanges = true;
1503 }
1504 } else {
1505 // We know that this lane is defined by this instruction,
1506 // but at this point it may be empty because it is not used by
1507 // anything. This happens when updateRegDefUses adds the missing
1508 // lanes. Assign that lane a dead def so that the interferences
1509 // are properly modeled.
1510 if (SR.empty())
1511 SR.createDeadDef(DefIndex, Alloc);
1512 }
1513 }
1514 if (UpdatedSubRanges)
1515 DstInt.removeEmptySubRanges();
1516 }
1517 } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1518 // The New instruction may be defining a sub-register of what's actually
1519 // been asked for. If so it must implicitly define the whole thing.
1520 assert(DstReg.isPhysical() &&
1521 "Only expect virtual or physical registers in remat");
1522 NewMI.getOperand(0).setIsDead(true);
1524 CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1525 // Record small dead def live-ranges for all the subregisters
1526 // of the destination register.
1527 // Otherwise, variables that live through may miss some
1528 // interferences, thus creating invalid allocation.
1529 // E.g., i386 code:
1530 // %1 = somedef ; %1 GR8
1531 // %2 = remat ; %2 GR32
1532 // CL = COPY %2.sub_8bit
1533 // = somedef %1 ; %1 GR8
1534 // =>
1535 // %1 = somedef ; %1 GR8
1536 // dead ECX = remat ; implicit-def CL
1537 // = somedef %1 ; %1 GR8
1538 // %1 will see the interferences with CL but not with CH since
1539 // no live-ranges would have been created for ECX.
1540 // Fix that!
1541 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1542 for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1543 Units.isValid(); ++Units)
1544 if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1545 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1546 }
1547
1548 if (NewMI.getOperand(0).getSubReg())
1549 NewMI.getOperand(0).setIsUndef();
1550
1551 // Transfer over implicit operands to the rematerialized instruction.
1552 for (MachineOperand &MO : ImplicitOps)
1553 NewMI.addOperand(MO);
1554
1555 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1556 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1557 MCRegister Reg = NewMIImplDefs[i];
1558 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1559 if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1560 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1561 }
1562
1563 LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1564 ++NumReMats;
1565
1566 // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1567 // to describe DstReg instead.
1568 if (MRI->use_nodbg_empty(SrcReg)) {
1569 for (MachineOperand &UseMO :
1570 llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1571 MachineInstr *UseMI = UseMO.getParent();
1572 if (UseMI->isDebugInstr()) {
1573 if (DstReg.isPhysical())
1574 UseMO.substPhysReg(DstReg, *TRI);
1575 else
1576 UseMO.setReg(DstReg);
1577 // Move the debug value directly after the def of the rematerialized
1578 // value in DstReg.
1579 MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1580 LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1581 }
1582 }
1583 }
1584
1585 if (ToBeUpdated.count(SrcReg))
1586 return true;
1587
1588 unsigned NumCopyUses = 0;
1589 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1590 if (UseMO.getParent()->isCopyLike())
1591 NumCopyUses++;
1592 }
1593 if (NumCopyUses < LateRematUpdateThreshold) {
1594 // The source interval can become smaller because we removed a use.
1595 shrinkToUses(&SrcInt, &DeadDefs);
1596 if (!DeadDefs.empty())
1597 eliminateDeadDefs(&Edit);
1598 } else {
1599 ToBeUpdated.insert(SrcReg);
1600 }
1601 return true;
1602}
1603
1604MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1605 // ProcessImplicitDefs may leave some copies of <undef> values, it only
1606 // removes local variables. When we have a copy like:
1607 //
1608 // %1 = COPY undef %2
1609 //
1610 // We delete the copy and remove the corresponding value number from %1.
1611 // Any uses of that value number are marked as <undef>.
1612
1613 // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1614 // CoalescerPair may have a new register class with adjusted subreg indices
1615 // at this point.
1616 Register SrcReg, DstReg;
1617 unsigned SrcSubIdx = 0, DstSubIdx = 0;
1618 if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1619 return nullptr;
1620
1621 SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1622 const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1623 // CopyMI is undef iff SrcReg is not live before the instruction.
1624 if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1625 LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1626 for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1627 if ((SR.LaneMask & SrcMask).none())
1628 continue;
1629 if (SR.liveAt(Idx))
1630 return nullptr;
1631 }
1632 } else if (SrcLI.liveAt(Idx))
1633 return nullptr;
1634
1635 // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1636 // then replace it with an IMPLICIT_DEF.
1637 LiveInterval &DstLI = LIS->getInterval(DstReg);
1638 SlotIndex RegIndex = Idx.getRegSlot();
1639 LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1640 assert(Seg != nullptr && "No segment for defining instruction");
1641 VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1642
1643 // The source interval may also have been on an undef use, in which case the
1644 // copy introduced a live value.
1645 if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1646 CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1647 for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1648 MachineOperand &MO = CopyMI->getOperand(i-1);
1649 if (MO.isReg() && MO.isUse())
1650 CopyMI->removeOperand(i-1);
1651 }
1652 LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1653 "implicit def\n");
1654 return CopyMI;
1655 }
1656
1657 // Remove any DstReg segments starting at the instruction.
1658 LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1659
1660 // Remove value or merge with previous one in case of a subregister def.
1661 if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1662 VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1663 DstLI.MergeValueNumberInto(VNI, PrevVNI);
1664
1665 // The affected subregister segments can be removed.
1666 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1667 for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1668 if ((SR.LaneMask & DstMask).none())
1669 continue;
1670
1671 VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1672 assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1673 SR.removeValNo(SVNI);
1674 }
1675 DstLI.removeEmptySubRanges();
1676 } else
1677 LIS->removeVRegDefAt(DstLI, RegIndex);
1678
1679 // Mark uses as undef.
1680 for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1681 if (MO.isDef() /*|| MO.isUndef()*/)
1682 continue;
1683 const MachineInstr &MI = *MO.getParent();
1684 SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1685 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1686 bool isLive;
1687 if (!UseMask.all() && DstLI.hasSubRanges()) {
1688 isLive = false;
1689 for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1690 if ((SR.LaneMask & UseMask).none())
1691 continue;
1692 if (SR.liveAt(UseIdx)) {
1693 isLive = true;
1694 break;
1695 }
1696 }
1697 } else
1698 isLive = DstLI.liveAt(UseIdx);
1699 if (isLive)
1700 continue;
1701 MO.setIsUndef(true);
1702 LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1703 }
1704
1705 // A def of a subregister may be a use of the other subregisters, so
1706 // deleting a def of a subregister may also remove uses. Since CopyMI
1707 // is still part of the function (but about to be erased), mark all
1708 // defs of DstReg in it as <undef>, so that shrinkToUses would
1709 // ignore them.
1710 for (MachineOperand &MO : CopyMI->all_defs())
1711 if (MO.getReg() == DstReg)
1712 MO.setIsUndef(true);
1713 LIS->shrinkToUses(&DstLI);
1714
1715 return CopyMI;
1716}
1717
1718void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1719 MachineOperand &MO, unsigned SubRegIdx) {
1720 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1721 if (MO.isDef())
1722 Mask = ~Mask;
1723 bool IsUndef = true;
1724 for (const LiveInterval::SubRange &S : Int.subranges()) {
1725 if ((S.LaneMask & Mask).none())
1726 continue;
1727 if (S.liveAt(UseIdx)) {
1728 IsUndef = false;
1729 break;
1730 }
1731 }
1732 if (IsUndef) {
1733 MO.setIsUndef(true);
1734 // We found out some subregister use is actually reading an undefined
1735 // value. In some cases the whole vreg has become undefined at this
1736 // point so we have to potentially shrink the main range if the
1737 // use was ending a live segment there.
1738 LiveQueryResult Q = Int.Query(UseIdx);
1739 if (Q.valueOut() == nullptr)
1740 ShrinkMainRange = true;
1741 }
1742}
1743
1744void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1745 unsigned SubIdx) {
1746 bool DstIsPhys = DstReg.isPhysical();
1747 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1748
1749 if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1750 for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1751 unsigned SubReg = MO.getSubReg();
1752 if (SubReg == 0 || MO.isUndef())
1753 continue;
1754 MachineInstr &MI = *MO.getParent();
1755 if (MI.isDebugInstr())
1756 continue;
1757 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1758 addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1759 }
1760 }
1761
1764 I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1765 I != E; ) {
1766 MachineInstr *UseMI = &*(I++);
1767
1768 // Each instruction can only be rewritten once because sub-register
1769 // composition is not always idempotent. When SrcReg != DstReg, rewriting
1770 // the UseMI operands removes them from the SrcReg use-def chain, but when
1771 // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1772 // operands mentioning the virtual register.
1773 if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1774 continue;
1775
1777 bool Reads, Writes;
1778 std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1779
1780 // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1781 // because SrcReg is a sub-register.
1782 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1783 Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1784
1785 // Replace SrcReg with DstReg in all UseMI operands.
1786 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1787 MachineOperand &MO = UseMI->getOperand(Ops[i]);
1788
1789 // Adjust <undef> flags in case of sub-register joins. We don't want to
1790 // turn a full def into a read-modify-write sub-register def and vice
1791 // versa.
1792 if (SubIdx && MO.isDef())
1793 MO.setIsUndef(!Reads);
1794
1795 // A subreg use of a partially undef (super) register may be a complete
1796 // undef use now and then has to be marked that way.
1797 if (MO.isUse() && !DstIsPhys) {
1798 unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1799 if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1800 if (!DstInt->hasSubRanges()) {
1802 LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1803 LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1804 LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1805 DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1806 // The unused lanes are just empty live-ranges at this point.
1807 // It is the caller responsibility to set the proper
1808 // dead segments if there is an actual dead def of the
1809 // unused lanes. This may happen with rematerialization.
1810 DstInt->createSubRange(Allocator, UnusedLanes);
1811 }
1812 SlotIndex MIIdx = UseMI->isDebugInstr()
1814 : LIS->getInstructionIndex(*UseMI);
1815 SlotIndex UseIdx = MIIdx.getRegSlot(true);
1816 addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1817 }
1818 }
1819
1820 if (DstIsPhys)
1821 MO.substPhysReg(DstReg, *TRI);
1822 else
1823 MO.substVirtReg(DstReg, SubIdx, *TRI);
1824 }
1825
1826 LLVM_DEBUG({
1827 dbgs() << "\t\tupdated: ";
1828 if (!UseMI->isDebugInstr())
1829 dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1830 dbgs() << *UseMI;
1831 });
1832 }
1833}
1834
1835bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1836 // Always join simple intervals that are defined by a single copy from a
1837 // reserved register. This doesn't increase register pressure, so it is
1838 // always beneficial.
1839 if (!MRI->isReserved(CP.getDstReg())) {
1840 LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1841 return false;
1842 }
1843
1844 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1845 if (JoinVInt.containsOneValue())
1846 return true;
1847
1848 LLVM_DEBUG(
1849 dbgs() << "\tCannot join complex intervals into reserved register.\n");
1850 return false;
1851}
1852
1853bool RegisterCoalescer::copyValueUndefInPredecessors(
1855 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1856 SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1857 if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1858 // If this is a self loop, we may be reading the same value.
1859 if (V->id != SLRQ.valueOutOrDead()->id)
1860 return false;
1861 }
1862 }
1863
1864 return true;
1865}
1866
1867void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1868 Register Reg,
1869 LaneBitmask PrunedLanes) {
1870 // If we had other instructions in the segment reading the undef sublane
1871 // value, we need to mark them with undef.
1872 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1873 unsigned SubRegIdx = MO.getSubReg();
1874 if (SubRegIdx == 0 || MO.isUndef())
1875 continue;
1876
1877 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1878 SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
1879 for (LiveInterval::SubRange &S : LI.subranges()) {
1880 if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
1881 MO.setIsUndef();
1882 break;
1883 }
1884 }
1885 }
1886
1888
1889 // A def of a subregister may be a use of other register lanes. Replacing
1890 // such a def with a def of a different register will eliminate the use,
1891 // and may cause the recorded live range to be larger than the actual
1892 // liveness in the program IR.
1893 LIS->shrinkToUses(&LI);
1894}
1895
1896bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1897 Again = false;
1898 LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1899
1901 if (!CP.setRegisters(CopyMI)) {
1902 LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1903 return false;
1904 }
1905
1906 if (CP.getNewRC()) {
1907 auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1908 auto DstRC = MRI->getRegClass(CP.getDstReg());
1909 unsigned SrcIdx = CP.getSrcIdx();
1910 unsigned DstIdx = CP.getDstIdx();
1911 if (CP.isFlipped()) {
1912 std::swap(SrcIdx, DstIdx);
1913 std::swap(SrcRC, DstRC);
1914 }
1915 if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1916 CP.getNewRC(), *LIS)) {
1917 LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1918 return false;
1919 }
1920 }
1921
1922 // Dead code elimination. This really should be handled by MachineDCE, but
1923 // sometimes dead copies slip through, and we can't generate invalid live
1924 // ranges.
1925 if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1926 LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1927 DeadDefs.push_back(CopyMI);
1928 eliminateDeadDefs();
1929 return true;
1930 }
1931
1932 // Eliminate undefs.
1933 if (!CP.isPhys()) {
1934 // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
1935 if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
1936 if (UndefMI->isImplicitDef())
1937 return false;
1938 deleteInstr(CopyMI);
1939 return false; // Not coalescable.
1940 }
1941 }
1942
1943 // Coalesced copies are normally removed immediately, but transformations
1944 // like removeCopyByCommutingDef() can inadvertently create identity copies.
1945 // When that happens, just join the values and remove the copy.
1946 if (CP.getSrcReg() == CP.getDstReg()) {
1947 LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1948 LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1949 const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1950 LiveQueryResult LRQ = LI.Query(CopyIdx);
1951 if (VNInfo *DefVNI = LRQ.valueDefined()) {
1952 VNInfo *ReadVNI = LRQ.valueIn();
1953 assert(ReadVNI && "No value before copy and no <undef> flag.");
1954 assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1955
1956 // Track incoming undef lanes we need to eliminate from the subrange.
1957 LaneBitmask PrunedLanes;
1958 MachineBasicBlock *MBB = CopyMI->getParent();
1959
1960 // Process subregister liveranges.
1961 for (LiveInterval::SubRange &S : LI.subranges()) {
1962 LiveQueryResult SLRQ = S.Query(CopyIdx);
1963 if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1964 if (VNInfo *SReadVNI = SLRQ.valueIn())
1965 SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
1966
1967 // If this copy introduced an undef subrange from an incoming value,
1968 // we need to eliminate the undef live in values from the subrange.
1969 if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
1970 LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
1971 PrunedLanes |= S.LaneMask;
1972 S.removeValNo(SDefVNI);
1973 }
1974 }
1975 }
1976
1977 LI.MergeValueNumberInto(DefVNI, ReadVNI);
1978 if (PrunedLanes.any()) {
1979 LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
1980 << PrunedLanes << '\n');
1981 setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
1982 }
1983
1984 LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
1985 }
1986 deleteInstr(CopyMI);
1987 return true;
1988 }
1989
1990 // Enforce policies.
1991 if (CP.isPhys()) {
1992 LLVM_DEBUG(dbgs() << "\tConsidering merging "
1993 << printReg(CP.getSrcReg(), TRI) << " with "
1994 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
1995 if (!canJoinPhys(CP)) {
1996 // Before giving up coalescing, if definition of source is defined by
1997 // trivial computation, try rematerializing it.
1998 bool IsDefCopy = false;
1999 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2000 return true;
2001 if (IsDefCopy)
2002 Again = true; // May be possible to coalesce later.
2003 return false;
2004 }
2005 } else {
2006 // When possible, let DstReg be the larger interval.
2007 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2008 LIS->getInterval(CP.getDstReg()).size())
2009 CP.flip();
2010
2011 LLVM_DEBUG({
2012 dbgs() << "\tConsidering merging to "
2013 << TRI->getRegClassName(CP.getNewRC()) << " with ";
2014 if (CP.getDstIdx() && CP.getSrcIdx())
2015 dbgs() << printReg(CP.getDstReg()) << " in "
2016 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2017 << printReg(CP.getSrcReg()) << " in "
2018 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2019 else
2020 dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2021 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2022 });
2023 }
2024
2025 ShrinkMask = LaneBitmask::getNone();
2026 ShrinkMainRange = false;
2027
2028 // Okay, attempt to join these two intervals. On failure, this returns false.
2029 // Otherwise, if one of the intervals being joined is a physreg, this method
2030 // always canonicalizes DstInt to be it. The output "SrcInt" will not have
2031 // been modified, so we can use this information below to update aliases.
2032 if (!joinIntervals(CP)) {
2033 // Coalescing failed.
2034
2035 // If definition of source is defined by trivial computation, try
2036 // rematerializing it.
2037 bool IsDefCopy = false;
2038 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2039 return true;
2040
2041 // If we can eliminate the copy without merging the live segments, do so
2042 // now.
2043 if (!CP.isPartial() && !CP.isPhys()) {
2044 bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2045 bool Shrink = false;
2046 if (!Changed)
2047 std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2048 if (Changed) {
2049 deleteInstr(CopyMI);
2050 if (Shrink) {
2051 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2052 LiveInterval &DstLI = LIS->getInterval(DstReg);
2053 shrinkToUses(&DstLI);
2054 LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
2055 }
2056 LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2057 return true;
2058 }
2059 }
2060
2061 // Try and see if we can partially eliminate the copy by moving the copy to
2062 // its predecessor.
2063 if (!CP.isPartial() && !CP.isPhys())
2064 if (removePartialRedundancy(CP, *CopyMI))
2065 return true;
2066
2067 // Otherwise, we are unable to join the intervals.
2068 LLVM_DEBUG(dbgs() << "\tInterference!\n");
2069 Again = true; // May be possible to coalesce later.
2070 return false;
2071 }
2072
2073 // Coalescing to a virtual register that is of a sub-register class of the
2074 // other. Make sure the resulting register is set to the right register class.
2075 if (CP.isCrossClass()) {
2076 ++numCrossRCs;
2077 MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2078 }
2079
2080 // Removing sub-register copies can ease the register class constraints.
2081 // Make sure we attempt to inflate the register class of DstReg.
2082 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2083 InflateRegs.push_back(CP.getDstReg());
2084
2085 // CopyMI has been erased by joinIntervals at this point. Remove it from
2086 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2087 // to the work list. This keeps ErasedInstrs from growing needlessly.
2088 ErasedInstrs.erase(CopyMI);
2089
2090 // Rewrite all SrcReg operands to DstReg.
2091 // Also update DstReg operands to include DstIdx if it is set.
2092 if (CP.getDstIdx())
2093 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2094 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2095
2096 // Shrink subregister ranges if necessary.
2097 if (ShrinkMask.any()) {
2098 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2099 for (LiveInterval::SubRange &S : LI.subranges()) {
2100 if ((S.LaneMask & ShrinkMask).none())
2101 continue;
2102 LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2103 << ")\n");
2104 LIS->shrinkToUses(S, LI.reg());
2105 ShrinkMainRange = true;
2106 }
2108 }
2109
2110 // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2111 // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2112 // is not up-to-date, need to update the merged live interval here.
2113 if (ToBeUpdated.count(CP.getSrcReg()))
2114 ShrinkMainRange = true;
2115
2116 if (ShrinkMainRange) {
2117 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2118 shrinkToUses(&LI);
2119 }
2120
2121 // SrcReg is guaranteed to be the register whose live interval that is
2122 // being merged.
2123 LIS->removeInterval(CP.getSrcReg());
2124
2125 // Update regalloc hint.
2126 TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2127
2128 LLVM_DEBUG({
2129 dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2130 << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2131 dbgs() << "\tResult = ";
2132 if (CP.isPhys())
2133 dbgs() << printReg(CP.getDstReg(), TRI);
2134 else
2135 dbgs() << LIS->getInterval(CP.getDstReg());
2136 dbgs() << '\n';
2137 });
2138
2139 ++numJoins;
2140 return true;
2141}
2142
2143bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2144 Register DstReg = CP.getDstReg();
2145 Register SrcReg = CP.getSrcReg();
2146 assert(CP.isPhys() && "Must be a physreg copy");
2147 assert(MRI->isReserved(DstReg) && "Not a reserved register");
2148 LiveInterval &RHS = LIS->getInterval(SrcReg);
2149 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2150
2151 assert(RHS.containsOneValue() && "Invalid join with reserved register");
2152
2153 // Optimization for reserved registers like ESP. We can only merge with a
2154 // reserved physreg if RHS has a single value that is a copy of DstReg.
2155 // The live range of the reserved register will look like a set of dead defs
2156 // - we don't properly track the live range of reserved registers.
2157
2158 // Deny any overlapping intervals. This depends on all the reserved
2159 // register live ranges to look like dead defs.
2160 if (!MRI->isConstantPhysReg(DstReg)) {
2161 for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
2162 // Abort if not all the regunits are reserved.
2163 for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
2164 if (!MRI->isReserved(*RI))
2165 return false;
2166 }
2167 if (RHS.overlaps(LIS->getRegUnit(*UI))) {
2168 LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
2169 << '\n');
2170 return false;
2171 }
2172 }
2173
2174 // We must also check for overlaps with regmask clobbers.
2175 BitVector RegMaskUsable;
2176 if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2177 !RegMaskUsable.test(DstReg)) {
2178 LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2179 return false;
2180 }
2181 }
2182
2183 // Skip any value computations, we are not adding new values to the
2184 // reserved register. Also skip merging the live ranges, the reserved
2185 // register live range doesn't need to be accurate as long as all the
2186 // defs are there.
2187
2188 // Delete the identity copy.
2189 MachineInstr *CopyMI;
2190 if (CP.isFlipped()) {
2191 // Physreg is copied into vreg
2192 // %y = COPY %physreg_x
2193 // ... //< no other def of %physreg_x here
2194 // use %y
2195 // =>
2196 // ...
2197 // use %physreg_x
2198 CopyMI = MRI->getVRegDef(SrcReg);
2199 deleteInstr(CopyMI);
2200 } else {
2201 // VReg is copied into physreg:
2202 // %y = def
2203 // ... //< no other def or use of %physreg_x here
2204 // %physreg_x = COPY %y
2205 // =>
2206 // %physreg_x = def
2207 // ...
2208 if (!MRI->hasOneNonDBGUse(SrcReg)) {
2209 LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2210 return false;
2211 }
2212
2213 if (!LIS->intervalIsInOneMBB(RHS)) {
2214 LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2215 return false;
2216 }
2217
2218 MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2219 CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2220 SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2221 SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2222
2223 if (!MRI->isConstantPhysReg(DstReg)) {
2224 // We checked above that there are no interfering defs of the physical
2225 // register. However, for this case, where we intend to move up the def of
2226 // the physical register, we also need to check for interfering uses.
2227 SlotIndexes *Indexes = LIS->getSlotIndexes();
2228 for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2229 SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2231 if (MI->readsRegister(DstReg, TRI)) {
2232 LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2233 return false;
2234 }
2235 }
2236 }
2237
2238 // We're going to remove the copy which defines a physical reserved
2239 // register, so remove its valno, etc.
2240 LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2241 << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2242
2243 LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2244 deleteInstr(CopyMI);
2245
2246 // Create a new dead def at the new def location.
2247 for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
2248 LiveRange &LR = LIS->getRegUnit(*UI);
2249 LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2250 }
2251 }
2252
2253 // We don't track kills for reserved registers.
2254 MRI->clearKillFlags(CP.getSrcReg());
2255
2256 return true;
2257}
2258
2259//===----------------------------------------------------------------------===//
2260// Interference checking and interval joining
2261//===----------------------------------------------------------------------===//
2262//
2263// In the easiest case, the two live ranges being joined are disjoint, and
2264// there is no interference to consider. It is quite common, though, to have
2265// overlapping live ranges, and we need to check if the interference can be
2266// resolved.
2267//
2268// The live range of a single SSA value forms a sub-tree of the dominator tree.
2269// This means that two SSA values overlap if and only if the def of one value
2270// is contained in the live range of the other value. As a special case, the
2271// overlapping values can be defined at the same index.
2272//
2273// The interference from an overlapping def can be resolved in these cases:
2274//
2275// 1. Coalescable copies. The value is defined by a copy that would become an
2276// identity copy after joining SrcReg and DstReg. The copy instruction will
2277// be removed, and the value will be merged with the source value.
2278//
2279// There can be several copies back and forth, causing many values to be
2280// merged into one. We compute a list of ultimate values in the joined live
2281// range as well as a mappings from the old value numbers.
2282//
2283// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2284// predecessors have a live out value. It doesn't cause real interference,
2285// and can be merged into the value it overlaps. Like a coalescable copy, it
2286// can be erased after joining.
2287//
2288// 3. Copy of external value. The overlapping def may be a copy of a value that
2289// is already in the other register. This is like a coalescable copy, but
2290// the live range of the source register must be trimmed after erasing the
2291// copy instruction:
2292//
2293// %src = COPY %ext
2294// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
2295//
2296// 4. Clobbering undefined lanes. Vector registers are sometimes built by
2297// defining one lane at a time:
2298//
2299// %dst:ssub0<def,read-undef> = FOO
2300// %src = BAR
2301// %dst:ssub1 = COPY %src
2302//
2303// The live range of %src overlaps the %dst value defined by FOO, but
2304// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2305// which was undef anyway.
2306//
2307// The value mapping is more complicated in this case. The final live range
2308// will have different value numbers for both FOO and BAR, but there is no
2309// simple mapping from old to new values. It may even be necessary to add
2310// new PHI values.
2311//
2312// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2313// is live, but never read. This can happen because we don't compute
2314// individual live ranges per lane.
2315//
2316// %dst = FOO
2317// %src = BAR
2318// %dst:ssub1 = COPY %src
2319//
2320// This kind of interference is only resolved locally. If the clobbered
2321// lane value escapes the block, the join is aborted.
2322
2323namespace {
2324
2325/// Track information about values in a single virtual register about to be
2326/// joined. Objects of this class are always created in pairs - one for each
2327/// side of the CoalescerPair (or one for each lane of a side of the coalescer
2328/// pair)
2329class JoinVals {
2330 /// Live range we work on.
2331 LiveRange &LR;
2332
2333 /// (Main) register we work on.
2334 const Register Reg;
2335
2336 /// Reg (and therefore the values in this liverange) will end up as
2337 /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2338 /// CP.SrcIdx.
2339 const unsigned SubIdx;
2340
2341 /// The LaneMask that this liverange will occupy the coalesced register. May
2342 /// be smaller than the lanemask produced by SubIdx when merging subranges.
2343 const LaneBitmask LaneMask;
2344
2345 /// This is true when joining sub register ranges, false when joining main
2346 /// ranges.
2347 const bool SubRangeJoin;
2348
2349 /// Whether the current LiveInterval tracks subregister liveness.
2350 const bool TrackSubRegLiveness;
2351
2352 /// Values that will be present in the final live range.
2353 SmallVectorImpl<VNInfo*> &NewVNInfo;
2354
2355 const CoalescerPair &CP;
2356 LiveIntervals *LIS;
2357 SlotIndexes *Indexes;
2358 const TargetRegisterInfo *TRI;
2359
2360 /// Value number assignments. Maps value numbers in LI to entries in
2361 /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2362 SmallVector<int, 8> Assignments;
2363
2364 public:
2365 /// Conflict resolution for overlapping values.
2366 enum ConflictResolution {
2367 /// No overlap, simply keep this value.
2368 CR_Keep,
2369
2370 /// Merge this value into OtherVNI and erase the defining instruction.
2371 /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2372 /// values.
2373 CR_Erase,
2374
2375 /// Merge this value into OtherVNI but keep the defining instruction.
2376 /// This is for the special case where OtherVNI is defined by the same
2377 /// instruction.
2378 CR_Merge,
2379
2380 /// Keep this value, and have it replace OtherVNI where possible. This
2381 /// complicates value mapping since OtherVNI maps to two different values
2382 /// before and after this def.
2383 /// Used when clobbering undefined or dead lanes.
2384 CR_Replace,
2385
2386 /// Unresolved conflict. Visit later when all values have been mapped.
2387 CR_Unresolved,
2388
2389 /// Unresolvable conflict. Abort the join.
2390 CR_Impossible
2391 };
2392
2393 private:
2394 /// Per-value info for LI. The lane bit masks are all relative to the final
2395 /// joined register, so they can be compared directly between SrcReg and
2396 /// DstReg.
2397 struct Val {
2398 ConflictResolution Resolution = CR_Keep;
2399
2400 /// Lanes written by this def, 0 for unanalyzed values.
2401 LaneBitmask WriteLanes;
2402
2403 /// Lanes with defined values in this register. Other lanes are undef and
2404 /// safe to clobber.
2405 LaneBitmask ValidLanes;
2406
2407 /// Value in LI being redefined by this def.
2408 VNInfo *RedefVNI = nullptr;
2409
2410 /// Value in the other live range that overlaps this def, if any.
2411 VNInfo *OtherVNI = nullptr;
2412
2413 /// Is this value an IMPLICIT_DEF that can be erased?
2414 ///
2415 /// IMPLICIT_DEF values should only exist at the end of a basic block that
2416 /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2417 /// safely erased if they are overlapping a live value in the other live
2418 /// interval.
2419 ///
2420 /// Weird control flow graphs and incomplete PHI handling in
2421 /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2422 /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2423 /// normal values.
2424 bool ErasableImplicitDef = false;
2425
2426 /// True when the live range of this value will be pruned because of an
2427 /// overlapping CR_Replace value in the other live range.
2428 bool Pruned = false;
2429
2430 /// True once Pruned above has been computed.
2431 bool PrunedComputed = false;
2432
2433 /// True if this value is determined to be identical to OtherVNI
2434 /// (in valuesIdentical). This is used with CR_Erase where the erased
2435 /// copy is redundant, i.e. the source value is already the same as
2436 /// the destination. In such cases the subranges need to be updated
2437 /// properly. See comment at pruneSubRegValues for more info.
2438 bool Identical = false;
2439
2440 Val() = default;
2441
2442 bool isAnalyzed() const { return WriteLanes.any(); }
2443 };
2444
2445 /// One entry per value number in LI.
2447
2448 /// Compute the bitmask of lanes actually written by DefMI.
2449 /// Set Redef if there are any partial register definitions that depend on the
2450 /// previous value of the register.
2451 LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2452
2453 /// Find the ultimate value that VNI was copied from.
2454 std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2455
2456 bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2457
2458 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2459 /// Return a conflict resolution when possible, but leave the hard cases as
2460 /// CR_Unresolved.
2461 /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2462 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2463 /// The recursion always goes upwards in the dominator tree, making loops
2464 /// impossible.
2465 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2466
2467 /// Compute the value assignment for ValNo in RI.
2468 /// This may be called recursively by analyzeValue(), but never for a ValNo on
2469 /// the stack.
2470 void computeAssignment(unsigned ValNo, JoinVals &Other);
2471
2472 /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2473 /// the extent of the tainted lanes in the block.
2474 ///
2475 /// Multiple values in Other.LR can be affected since partial redefinitions
2476 /// can preserve previously tainted lanes.
2477 ///
2478 /// 1 %dst = VLOAD <-- Define all lanes in %dst
2479 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
2480 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
2481 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2482 ///
2483 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2484 /// entry to TaintedVals.
2485 ///
2486 /// Returns false if the tainted lanes extend beyond the basic block.
2487 bool
2488 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2489 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2490
2491 /// Return true if MI uses any of the given Lanes from Reg.
2492 /// This does not include partial redefinitions of Reg.
2493 bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2494
2495 /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2496 /// be pruned:
2497 ///
2498 /// %dst = COPY %src
2499 /// %src = COPY %dst <-- This value to be pruned.
2500 /// %dst = COPY %src <-- This value is a copy of a pruned value.
2501 bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2502
2503public:
2504 JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2505 SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2506 LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2507 bool TrackSubRegLiveness)
2508 : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2509 SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2510 NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2511 TRI(TRI), Assignments(LR.getNumValNums(), -1),
2512 Vals(LR.getNumValNums()) {}
2513
2514 /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2515 /// Returns false if any conflicts were impossible to resolve.
2516 bool mapValues(JoinVals &Other);
2517
2518 /// Try to resolve conflicts that require all values to be mapped.
2519 /// Returns false if any conflicts were impossible to resolve.
2520 bool resolveConflicts(JoinVals &Other);
2521
2522 /// Prune the live range of values in Other.LR where they would conflict with
2523 /// CR_Replace values in LR. Collect end points for restoring the live range
2524 /// after joining.
2525 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2526 bool changeInstrs);
2527
2528 /// Removes subranges starting at copies that get removed. This sometimes
2529 /// happens when undefined subranges are copied around. These ranges contain
2530 /// no useful information and can be removed.
2531 void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2532
2533 /// Pruning values in subranges can lead to removing segments in these
2534 /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2535 /// the main range also need to be removed. This function will mark
2536 /// the corresponding values in the main range as pruned, so that
2537 /// eraseInstrs can do the final cleanup.
2538 /// The parameter @p LI must be the interval whose main range is the
2539 /// live range LR.
2540 void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2541
2542 /// Erase any machine instructions that have been coalesced away.
2543 /// Add erased instructions to ErasedInstrs.
2544 /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2545 /// the erased instrs.
2547 SmallVectorImpl<Register> &ShrinkRegs,
2548 LiveInterval *LI = nullptr);
2549
2550 /// Remove liverange defs at places where implicit defs will be removed.
2551 void removeImplicitDefs();
2552
2553 /// Get the value assignments suitable for passing to LiveInterval::join.
2554 const int *getAssignments() const { return Assignments.data(); }
2555
2556 /// Get the conflict resolution for a value number.
2557 ConflictResolution getResolution(unsigned Num) const {
2558 return Vals[Num].Resolution;
2559 }
2560};
2561
2562} // end anonymous namespace
2563
2564LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2565 const {
2566 LaneBitmask L;
2567 for (const MachineOperand &MO : DefMI->all_defs()) {
2568 if (MO.getReg() != Reg)
2569 continue;
2570 L |= TRI->getSubRegIndexLaneMask(
2571 TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2572 if (MO.readsReg())
2573 Redef = true;
2574 }
2575 return L;
2576}
2577
2578std::pair<const VNInfo *, Register>
2579JoinVals::followCopyChain(const VNInfo *VNI) const {
2580 Register TrackReg = Reg;
2581
2582 while (!VNI->isPHIDef()) {
2583 SlotIndex Def = VNI->def;
2584 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2585 assert(MI && "No defining instruction");
2586 if (!MI->isFullCopy())
2587 return std::make_pair(VNI, TrackReg);
2588 Register SrcReg = MI->getOperand(1).getReg();
2589 if (!SrcReg.isVirtual())
2590 return std::make_pair(VNI, TrackReg);
2591
2592 const LiveInterval &LI = LIS->getInterval(SrcReg);
2593 const VNInfo *ValueIn;
2594 // No subrange involved.
2595 if (!SubRangeJoin || !LI.hasSubRanges()) {
2596 LiveQueryResult LRQ = LI.Query(Def);
2597 ValueIn = LRQ.valueIn();
2598 } else {
2599 // Query subranges. Ensure that all matching ones take us to the same def
2600 // (allowing some of them to be undef).
2601 ValueIn = nullptr;
2602 for (const LiveInterval::SubRange &S : LI.subranges()) {
2603 // Transform lanemask to a mask in the joined live interval.
2604 LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2605 if ((SMask & LaneMask).none())
2606 continue;
2607 LiveQueryResult LRQ = S.Query(Def);
2608 if (!ValueIn) {
2609 ValueIn = LRQ.valueIn();
2610 continue;
2611 }
2612 if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2613 return std::make_pair(VNI, TrackReg);
2614 }
2615 }
2616 if (ValueIn == nullptr) {
2617 // Reaching an undefined value is legitimate, for example:
2618 //
2619 // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
2620 // 2 %1 = COPY %0 ;; %1 is defined here.
2621 // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
2622 // ;; but it's equivalent to "undef".
2623 return std::make_pair(nullptr, SrcReg);
2624 }
2625 VNI = ValueIn;
2626 TrackReg = SrcReg;
2627 }
2628 return std::make_pair(VNI, TrackReg);
2629}
2630
2631bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2632 const JoinVals &Other) const {
2633 const VNInfo *Orig0;
2634 Register Reg0;
2635 std::tie(Orig0, Reg0) = followCopyChain(Value0);
2636 if (Orig0 == Value1 && Reg0 == Other.Reg)
2637 return true;
2638
2639 const VNInfo *Orig1;
2640 Register Reg1;
2641 std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2642 // If both values are undefined, and the source registers are the same
2643 // register, the values are identical. Filter out cases where only one
2644 // value is defined.
2645 if (Orig0 == nullptr || Orig1 == nullptr)
2646 return Orig0 == Orig1 && Reg0 == Reg1;
2647
2648 // The values are equal if they are defined at the same place and use the
2649 // same register. Note that we cannot compare VNInfos directly as some of
2650 // them might be from a copy created in mergeSubRangeInto() while the other
2651 // is from the original LiveInterval.
2652 return Orig0->def == Orig1->def && Reg0 == Reg1;
2653}
2654
2655JoinVals::ConflictResolution
2656JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2657 Val &V = Vals[ValNo];
2658 assert(!V.isAnalyzed() && "Value has already been analyzed!");
2659 VNInfo *VNI = LR.getValNumInfo(ValNo);
2660 if (VNI->isUnused()) {
2661 V.WriteLanes = LaneBitmask::getAll();
2662 return CR_Keep;
2663 }
2664
2665 // Get the instruction defining this value, compute the lanes written.
2666 const MachineInstr *DefMI = nullptr;
2667 if (VNI->isPHIDef()) {
2668 // Conservatively assume that all lanes in a PHI are valid.
2669 LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2670 : TRI->getSubRegIndexLaneMask(SubIdx);
2671 V.ValidLanes = V.WriteLanes = Lanes;
2672 } else {
2673 DefMI = Indexes->getInstructionFromIndex(VNI->def);
2674 assert(DefMI != nullptr);
2675 if (SubRangeJoin) {
2676 // We don't care about the lanes when joining subregister ranges.
2677 V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2678 if (DefMI->isImplicitDef()) {
2679 V.ValidLanes = LaneBitmask::getNone();
2680 V.ErasableImplicitDef = true;
2681 }
2682 } else {
2683 bool Redef = false;
2684 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2685
2686 // If this is a read-modify-write instruction, there may be more valid
2687 // lanes than the ones written by this instruction.
2688 // This only covers partial redef operands. DefMI may have normal use
2689 // operands reading the register. They don't contribute valid lanes.
2690 //
2691 // This adds ssub1 to the set of valid lanes in %src:
2692 //
2693 // %src:ssub1 = FOO
2694 //
2695 // This leaves only ssub1 valid, making any other lanes undef:
2696 //
2697 // %src:ssub1<def,read-undef> = FOO %src:ssub2
2698 //
2699 // The <read-undef> flag on the def operand means that old lane values are
2700 // not important.
2701 if (Redef) {
2702 V.RedefVNI = LR.Query(VNI->def).valueIn();
2703 assert((TrackSubRegLiveness || V.RedefVNI) &&
2704 "Instruction is reading nonexistent value");
2705 if (V.RedefVNI != nullptr) {
2706 computeAssignment(V.RedefVNI->id, Other);
2707 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2708 }
2709 }
2710
2711 // An IMPLICIT_DEF writes undef values.
2712 if (DefMI->isImplicitDef()) {
2713 // We normally expect IMPLICIT_DEF values to be live only until the end
2714 // of their block. If the value is really live longer and gets pruned in
2715 // another block, this flag is cleared again.
2716 //
2717 // Clearing the valid lanes is deferred until it is sure this can be
2718 // erased.
2719 V.ErasableImplicitDef = true;
2720 }
2721 }
2722 }
2723
2724 // Find the value in Other that overlaps VNI->def, if any.
2725 LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2726
2727 // It is possible that both values are defined by the same instruction, or
2728 // the values are PHIs defined in the same block. When that happens, the two
2729 // values should be merged into one, but not into any preceding value.
2730 // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2731 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2732 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2733
2734 // One value stays, the other is merged. Keep the earlier one, or the first
2735 // one we see.
2736 if (OtherVNI->def < VNI->def)
2737 Other.computeAssignment(OtherVNI->id, *this);
2738 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2739 // This is an early-clobber def overlapping a live-in value in the other
2740 // register. Not mergeable.
2741 V.OtherVNI = OtherLRQ.valueIn();
2742 return CR_Impossible;
2743 }
2744 V.OtherVNI = OtherVNI;
2745 Val &OtherV = Other.Vals[OtherVNI->id];
2746 // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2747 // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2748 // is assigned.
2749 if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2750 return CR_Keep;
2751 // Both sides have been analyzed now.
2752 // Allow overlapping PHI values. Any real interference would show up in a
2753 // predecessor, the PHI itself can't introduce any conflicts.
2754 if (VNI->isPHIDef())
2755 return CR_Merge;
2756 if ((V.ValidLanes & OtherV.ValidLanes).any())
2757 // Overlapping lanes can't be resolved.
2758 return CR_Impossible;
2759 else
2760 return CR_Merge;
2761 }
2762
2763 // No simultaneous def. Is Other live at the def?
2764 V.OtherVNI = OtherLRQ.valueIn();
2765 if (!V.OtherVNI)
2766 // No overlap, no conflict.
2767 return CR_Keep;
2768
2769 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2770
2771 // We have overlapping values, or possibly a kill of Other.
2772 // Recursively compute assignments up the dominator tree.
2773 Other.computeAssignment(V.OtherVNI->id, *this);
2774 Val &OtherV = Other.Vals[V.OtherVNI->id];
2775
2776 if (OtherV.ErasableImplicitDef) {
2777 // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2778 // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2779 // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2780 // technically.
2781 //
2782 // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2783 // to erase the IMPLICIT_DEF instruction.
2784 MachineBasicBlock *OtherMBB = Indexes->getMBBFromIndex(V.OtherVNI->def);
2785 if (DefMI && DefMI->getParent() != OtherMBB) {
2786 LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2787 << " extends into "
2789 << ", keeping it.\n");
2790 OtherV.ErasableImplicitDef = false;
2791 } else if (OtherMBB->hasEHPadSuccessor()) {
2792 // If OtherV is defined in a basic block that has EH pad successors then
2793 // we get the same problem not just if OtherV is live beyond its basic
2794 // block, but beyond the last call instruction in its basic block. Handle
2795 // this case conservatively.
2796 LLVM_DEBUG(
2797 dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2798 << " may be live into EH pad successors, keeping it.\n");
2799 OtherV.ErasableImplicitDef = false;
2800 } else {
2801 // We deferred clearing these lanes in case we needed to save them
2802 OtherV.ValidLanes &= ~OtherV.WriteLanes;
2803 }
2804 }
2805
2806 // Allow overlapping PHI values. Any real interference would show up in a
2807 // predecessor, the PHI itself can't introduce any conflicts.
2808 if (VNI->isPHIDef())
2809 return CR_Replace;
2810
2811 // Check for simple erasable conflicts.
2812 if (DefMI->isImplicitDef())
2813 return CR_Erase;
2814
2815 // Include the non-conflict where DefMI is a coalescable copy that kills
2816 // OtherVNI. We still want the copy erased and value numbers merged.
2817 if (CP.isCoalescable(DefMI)) {
2818 // Some of the lanes copied from OtherVNI may be undef, making them undef
2819 // here too.
2820 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2821 return CR_Erase;
2822 }
2823
2824 // This may not be a real conflict if DefMI simply kills Other and defines
2825 // VNI.
2826 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2827 return CR_Keep;
2828
2829 // Handle the case where VNI and OtherVNI can be proven to be identical:
2830 //
2831 // %other = COPY %ext
2832 // %this = COPY %ext <-- Erase this copy
2833 //
2834 if (DefMI->isFullCopy() && !CP.isPartial() &&
2835 valuesIdentical(VNI, V.OtherVNI, Other)) {
2836 V.Identical = true;
2837 return CR_Erase;
2838 }
2839
2840 // The remaining checks apply to the lanes, which aren't tracked here. This
2841 // was already decided to be OK via the following CR_Replace condition.
2842 // CR_Replace.
2843 if (SubRangeJoin)
2844 return CR_Replace;
2845
2846 // If the lanes written by this instruction were all undef in OtherVNI, it is
2847 // still safe to join the live ranges. This can't be done with a simple value
2848 // mapping, though - OtherVNI will map to multiple values:
2849 //
2850 // 1 %dst:ssub0 = FOO <-- OtherVNI
2851 // 2 %src = BAR <-- VNI
2852 // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2853 // 4 BAZ killed %dst
2854 // 5 QUUX killed %src
2855 //
2856 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2857 // handles this complex value mapping.
2858 if ((V.WriteLanes & OtherV.ValidLanes).none())
2859 return CR_Replace;
2860
2861 // If the other live range is killed by DefMI and the live ranges are still
2862 // overlapping, it must be because we're looking at an early clobber def:
2863 //
2864 // %dst<def,early-clobber> = ASM killed %src
2865 //
2866 // In this case, it is illegal to merge the two live ranges since the early
2867 // clobber def would clobber %src before it was read.
2868 if (OtherLRQ.isKill()) {
2869 // This case where the def doesn't overlap the kill is handled above.
2870 assert(VNI->def.isEarlyClobber() &&
2871 "Only early clobber defs can overlap a kill");
2872 return CR_Impossible;
2873 }
2874
2875 // VNI is clobbering live lanes in OtherVNI, but there is still the
2876 // possibility that no instructions actually read the clobbered lanes.
2877 // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2878 // Otherwise Other.RI wouldn't be live here.
2879 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2880 return CR_Impossible;
2881
2882 if (TrackSubRegLiveness) {
2883 auto &OtherLI = LIS->getInterval(Other.Reg);
2884 // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
2885 // share the same live range, so we just need to check whether they have
2886 // any conflict bit in their LaneMask.
2887 if (!OtherLI.hasSubRanges()) {
2888 LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
2889 return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
2890 }
2891
2892 // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
2893 // impossible to resolve the conflict. Otherwise, we can just replace
2894 // OtherVNI because of no real conflict.
2895 for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
2896 LaneBitmask OtherMask =
2897 TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
2898 if ((OtherMask & V.WriteLanes).none())
2899 continue;
2900
2901 auto OtherSRQ = OtherSR.Query(VNI->def);
2902 if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
2903 // VNI is clobbering some lanes of OtherVNI, they have real conflict.
2904 return CR_Impossible;
2905 }
2906 }
2907
2908 // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
2909 return CR_Replace;
2910 }
2911
2912 // We need to verify that no instructions are reading the clobbered lanes.
2913 // To save compile time, we'll only check that locally. Don't allow the
2914 // tainted value to escape the basic block.
2915 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2916 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2917 return CR_Impossible;
2918
2919 // There are still some things that could go wrong besides clobbered lanes
2920 // being read, for example OtherVNI may be only partially redefined in MBB,
2921 // and some clobbered lanes could escape the block. Save this analysis for
2922 // resolveConflicts() when all values have been mapped. We need to know
2923 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2924 // that now - the recursive analyzeValue() calls must go upwards in the
2925 // dominator tree.
2926 return CR_Unresolved;
2927}
2928
2929void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2930 Val &V = Vals[ValNo];
2931 if (V.isAnalyzed()) {
2932 // Recursion should always move up the dominator tree, so ValNo is not
2933 // supposed to reappear before it has been assigned.
2934 assert(Assignments[ValNo] != -1 && "Bad recursion?");
2935 return;
2936 }
2937 switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2938 case CR_Erase:
2939 case CR_Merge:
2940 // Merge this ValNo into OtherVNI.
2941 assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2942 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2943 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2944 LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2945 << LR.getValNumInfo(ValNo)->def << " into "
2946 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2947 << V.OtherVNI->def << " --> @"
2948 << NewVNInfo[Assignments[ValNo]]->def << '\n');
2949 break;
2950 case CR_Replace:
2951 case CR_Unresolved: {
2952 // The other value is going to be pruned if this join is successful.
2953 assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2954 Val &OtherV = Other.Vals[V.OtherVNI->id];
2955 // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2956 // its lanes.
2957 if (OtherV.ErasableImplicitDef &&
2958 TrackSubRegLiveness &&
2959 (OtherV.WriteLanes & ~V.ValidLanes).any()) {
2960 LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
2961
2962 OtherV.ErasableImplicitDef = false;
2963 // The valid lanes written by the implicit_def were speculatively cleared
2964 // before, so make this more conservative. It may be better to track this,
2965 // I haven't found a testcase where it matters.
2966 OtherV.ValidLanes = LaneBitmask::getAll();
2967 }
2968
2969 OtherV.Pruned = true;
2970 [[fallthrough]];
2971 }
2972 default:
2973 // This value number needs to go in the final joined live range.
2974 Assignments[ValNo] = NewVNInfo.size();
2975 NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2976 break;
2977 }
2978}
2979
2980bool JoinVals::mapValues(JoinVals &Other) {
2981 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2982 computeAssignment(i, Other);
2983 if (Vals[i].Resolution == CR_Impossible) {
2984 LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2985 << '@' << LR.getValNumInfo(i)->def << '\n');
2986 return false;
2987 }
2988 }
2989 return true;
2990}
2991
2992bool JoinVals::
2993taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2994 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2995 VNInfo *VNI = LR.getValNumInfo(ValNo);
2996 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2997 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2998
2999 // Scan Other.LR from VNI.def to MBBEnd.
3000 LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
3001 assert(OtherI != Other.LR.end() && "No conflict?");
3002 do {
3003 // OtherI is pointing to a tainted value. Abort the join if the tainted
3004 // lanes escape the block.
3005 SlotIndex End = OtherI->end;
3006 if (End >= MBBEnd) {
3007 LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3008 << OtherI->valno->id << '@' << OtherI->start << '\n');
3009 return false;
3010 }
3011 LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3012 << OtherI->valno->id << '@' << OtherI->start << " to "
3013 << End << '\n');
3014 // A dead def is not a problem.
3015 if (End.isDead())
3016 break;
3017 TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3018
3019 // Check for another def in the MBB.
3020 if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3021 break;
3022
3023 // Lanes written by the new def are no longer tainted.
3024 const Val &OV = Other.Vals[OtherI->valno->id];
3025 TaintedLanes &= ~OV.WriteLanes;
3026 if (!OV.RedefVNI)
3027 break;
3028 } while (TaintedLanes.any());
3029 return true;
3030}
3031
3032bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3033 LaneBitmask Lanes) const {
3034 if (MI.isDebugOrPseudoInstr())
3035 return false;
3036 for (const MachineOperand &MO : MI.all_uses()) {
3037 if (MO.getReg() != Reg)
3038 continue;
3039 if (!MO.readsReg())
3040 continue;
3041 unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3042 if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3043 return true;
3044 }
3045 return false;
3046}
3047
3048bool JoinVals::resolveConflicts(JoinVals &Other) {
3049 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3050 Val &V = Vals[i];
3051 assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3052 if (V.Resolution != CR_Unresolved)
3053 continue;
3054 LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3055 << LR.getValNumInfo(i)->def
3056 << ' ' << PrintLaneMask(LaneMask) << '\n');
3057 if (SubRangeJoin)
3058 return false;
3059
3060 ++NumLaneConflicts;
3061 assert(V.OtherVNI && "Inconsistent conflict resolution.");
3062 VNInfo *VNI = LR.getValNumInfo(i);
3063 const Val &OtherV = Other.Vals[V.OtherVNI->id];
3064
3065 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3066 // join, those lanes will be tainted with a wrong value. Get the extent of
3067 // the tainted lanes.
3068 LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3070 if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3071 // Tainted lanes would extend beyond the basic block.
3072 return false;
3073
3074 assert(!TaintExtent.empty() && "There should be at least one conflict.");
3075
3076 // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3077 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3079 if (!VNI->isPHIDef()) {
3080 MI = Indexes->getInstructionFromIndex(VNI->def);
3081 if (!VNI->def.isEarlyClobber()) {
3082 // No need to check the instruction defining VNI for reads.
3083 ++MI;
3084 }
3085 }
3086 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3087 "Interference ends on VNI->def. Should have been handled earlier");
3088 MachineInstr *LastMI =
3089 Indexes->getInstructionFromIndex(TaintExtent.front().first);
3090 assert(LastMI && "Range must end at a proper instruction");
3091 unsigned TaintNum = 0;
3092 while (true) {
3093 assert(MI != MBB->end() && "Bad LastMI");
3094 if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3095 LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3096 return false;
3097 }
3098 // LastMI is the last instruction to use the current value.
3099 if (&*MI == LastMI) {
3100 if (++TaintNum == TaintExtent.size())
3101 break;
3102 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3103 assert(LastMI && "Range must end at a proper instruction");
3104 TaintedLanes = TaintExtent[TaintNum].second;
3105 }
3106 ++MI;
3107 }
3108
3109 // The tainted lanes are unused.
3110 V.Resolution = CR_Replace;
3111 ++NumLaneResolves;
3112 }
3113 return true;
3114}
3115
3116bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3117 Val &V = Vals[ValNo];
3118 if (V.Pruned || V.PrunedComputed)
3119 return V.Pruned;
3120
3121 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3122 return V.Pruned;
3123
3124 // Follow copies up the dominator tree and check if any intermediate value
3125 // has been pruned.
3126 V.PrunedComputed = true;
3127 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3128 return V.Pruned;
3129}
3130
3131void JoinVals::pruneValues(JoinVals &Other,
3132 SmallVectorImpl<SlotIndex> &EndPoints,
3133 bool changeInstrs) {
3134 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3135 SlotIndex Def = LR.getValNumInfo(i)->def;
3136 switch (Vals[i].Resolution) {
3137 case CR_Keep:
3138 break;
3139 case CR_Replace: {
3140 // This value takes precedence over the value in Other.LR.
3141 LIS->pruneValue(Other.LR, Def, &EndPoints);
3142 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3143 // instructions are only inserted to provide a live-out value for PHI
3144 // predecessors, so the instruction should simply go away once its value
3145 // has been replaced.
3146 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3147 bool EraseImpDef = OtherV.ErasableImplicitDef &&
3148 OtherV.Resolution == CR_Keep;
3149 if (!Def.isBlock()) {
3150 if (changeInstrs) {
3151 // Remove <def,read-undef> flags. This def is now a partial redef.
3152 // Also remove dead flags since the joined live range will
3153 // continue past this instruction.
3154 for (MachineOperand &MO :
3155 Indexes->getInstructionFromIndex(Def)->operands()) {
3156 if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
3157 if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3158 MO.setIsUndef(false);
3159 MO.setIsDead(false);
3160 }
3161 }
3162 }
3163 // This value will reach instructions below, but we need to make sure
3164 // the live range also reaches the instruction at Def.
3165 if (!EraseImpDef)
3166 EndPoints.push_back(Def);
3167 }
3168 LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3169 << ": " << Other.LR << '\n');
3170 break;
3171 }
3172 case CR_Erase:
3173 case CR_Merge:
3174 if (isPrunedValue(i, Other)) {
3175 // This value is ultimately a copy of a pruned value in LR or Other.LR.
3176 // We can no longer trust the value mapping computed by
3177 // computeAssignment(), the value that was originally copied could have
3178 // been replaced.
3179 LIS->pruneValue(LR, Def, &EndPoints);
3180 LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3181 << Def << ": " << LR << '\n');
3182 }
3183 break;
3184 case CR_Unresolved:
3185 case CR_Impossible:
3186 llvm_unreachable("Unresolved conflicts");
3187 }
3188 }
3189}
3190
3191// Check if the segment consists of a copied live-through value (i.e. the copy
3192// in the block only extended the liveness, of an undef value which we may need
3193// to handle).
3194static bool isLiveThrough(const LiveQueryResult Q) {
3195 return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3196}
3197
3198/// Consider the following situation when coalescing the copy between
3199/// %31 and %45 at 800. (The vertical lines represent live range segments.)
3200///
3201/// Main range Subrange 0004 (sub2)
3202/// %31 %45 %31 %45
3203/// 544 %45 = COPY %28 + +
3204/// | v1 | v1
3205/// 560B bb.1: + +
3206/// 624 = %45.sub2 | v2 | v2
3207/// 800 %31 = COPY %45 + + + +
3208/// | v0 | v0
3209/// 816 %31.sub1 = ... + |
3210/// 880 %30 = COPY %31 | v1 +
3211/// 928 %45 = COPY %30 | + +
3212/// | | v0 | v0 <--+
3213/// 992B ; backedge -> bb.1 | + + |
3214/// 1040 = %31.sub0 + |
3215/// This value must remain
3216/// live-out!
3217///
3218/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3219/// redundant, since it copies the value from %45 back into it. The
3220/// conflict resolution for the main range determines that %45.v0 is
3221/// to be erased, which is ok since %31.v1 is identical to it.
3222/// The problem happens with the subrange for sub2: it has to be live
3223/// on exit from the block, but since 928 was actually a point of
3224/// definition of %45.sub2, %45.sub2 was not live immediately prior
3225/// to that definition. As a result, when 928 was erased, the value v0
3226/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3227/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3228/// providing an incorrect value to the use at 624.
3229///
3230/// Since the main-range values %31.v1 and %45.v0 were proved to be
3231/// identical, the corresponding values in subranges must also be the
3232/// same. A redundant copy is removed because it's not needed, and not
3233/// because it copied an undefined value, so any liveness that originated
3234/// from that copy cannot disappear. When pruning a value that started
3235/// at the removed copy, the corresponding identical value must be
3236/// extended to replace it.
3237void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3238 // Look for values being erased.
3239 bool DidPrune = false;
3240 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3241 Val &V = Vals[i];
3242 // We should trigger in all cases in which eraseInstrs() does something.
3243 // match what eraseInstrs() is doing, print a message so
3244 if (V.Resolution != CR_Erase &&
3245 (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3246 continue;
3247
3248 // Check subranges at the point where the copy will be removed.
3249 SlotIndex Def = LR.getValNumInfo(i)->def;
3250 SlotIndex OtherDef;
3251 if (V.Identical)
3252 OtherDef = V.OtherVNI->def;
3253
3254 // Print message so mismatches with eraseInstrs() can be diagnosed.
3255 LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3256 << '\n');
3257 for (LiveInterval::SubRange &S : LI.subranges()) {
3258 LiveQueryResult Q = S.Query(Def);
3259
3260 // If a subrange starts at the copy then an undefined value has been
3261 // copied and we must remove that subrange value as well.
3262 VNInfo *ValueOut = Q.valueOutOrDead();
3263 if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
3264 (V.Identical && V.Resolution == CR_Erase &&
3265 ValueOut->def == Def))) {
3266 LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3267 << " at " << Def << "\n");
3268 SmallVector<SlotIndex,8> EndPoints;
3269 LIS->pruneValue(S, Def, &EndPoints);
3270 DidPrune = true;
3271 // Mark value number as unused.
3272 ValueOut->markUnused();
3273
3274 if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3275 // If V is identical to V.OtherVNI (and S was live at OtherDef),
3276 // then we can't simply prune V from S. V needs to be replaced
3277 // with V.OtherVNI.
3278 LIS->extendToIndices(S, EndPoints);
3279 }
3280
3281 // We may need to eliminate the subrange if the copy introduced a live
3282 // out undef value.
3283 if (ValueOut->isPHIDef())
3284 ShrinkMask |= S.LaneMask;
3285 continue;
3286 }
3287
3288 // If a subrange ends at the copy, then a value was copied but only
3289 // partially used later. Shrink the subregister range appropriately.
3290 //
3291 // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3292 // conservatively correct.
3293 if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3294 (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3295 LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3296 << PrintLaneMask(S.LaneMask) << " at " << Def
3297 << "\n");
3298 ShrinkMask |= S.LaneMask;
3299 }
3300 }
3301 }
3302 if (DidPrune)
3304}
3305
3306/// Check if any of the subranges of @p LI contain a definition at @p Def.
3308 for (LiveInterval::SubRange &SR : LI.subranges()) {
3309 if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3310 if (VNI->def == Def)
3311 return true;
3312 }
3313 return false;
3314}
3315
3316void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3317 assert(&static_cast<LiveRange&>(LI) == &LR);
3318
3319 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3320 if (Vals[i].Resolution != CR_Keep)
3321 continue;
3322 VNInfo *VNI = LR.getValNumInfo(i);
3323 if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3324 continue;
3325 Vals[i].Pruned = true;
3326 ShrinkMainRange = true;
3327 }
3328}
3329
3330void JoinVals::removeImplicitDefs() {
3331 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3332 Val &V = Vals[i];
3333 if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3334 continue;
3335
3336 VNInfo *VNI = LR.getValNumInfo(i);
3337 VNI->markUnused();
3338 LR.removeValNo(VNI);
3339 }
3340}
3341
3342void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3343 SmallVectorImpl<Register> &ShrinkRegs,
3344 LiveInterval *LI) {
3345 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3346 // Get the def location before markUnused() below invalidates it.
3347 VNInfo *VNI = LR.getValNumInfo(i);
3348 SlotIndex Def = VNI->def;
3349 switch (Vals[i].Resolution) {
3350 case CR_Keep: {
3351 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3352 // longer. The IMPLICIT_DEF instructions are only inserted by
3353 // PHIElimination to guarantee that all PHI predecessors have a value.
3354 if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3355 break;
3356 // Remove value number i from LR.
3357 // For intervals with subranges, removing a segment from the main range
3358 // may require extending the previous segment: for each definition of
3359 // a subregister, there will be a corresponding def in the main range.
3360 // That def may fall in the middle of a segment from another subrange.
3361 // In such cases, removing this def from the main range must be
3362 // complemented by extending the main range to account for the liveness
3363 // of the other subrange.
3364 // The new end point of the main range segment to be extended.
3365 SlotIndex NewEnd;
3366 if (LI != nullptr) {
3368 assert(I != LR.end());
3369 // Do not extend beyond the end of the segment being removed.
3370 // The segment may have been pruned in preparation for joining
3371 // live ranges.
3372 NewEnd = I->end;
3373 }
3374
3375 LR.removeValNo(VNI);
3376 // Note that this VNInfo is reused and still referenced in NewVNInfo,
3377 // make it appear like an unused value number.
3378 VNI->markUnused();
3379
3380 if (LI != nullptr && LI->hasSubRanges()) {
3381 assert(static_cast<LiveRange*>(LI) == &LR);
3382 // Determine the end point based on the subrange information:
3383 // minimum of (earliest def of next segment,
3384 // latest end point of containing segment)
3385 SlotIndex ED, LE;
3386 for (LiveInterval::SubRange &SR : LI->subranges()) {
3387 LiveRange::iterator I = SR.find(Def);
3388 if (I == SR.end())
3389 continue;
3390 if (I->start > Def)
3391 ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3392 else
3393 LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3394 }
3395 if (LE.isValid())
3396 NewEnd = std::min(NewEnd, LE);
3397 if (ED.isValid())
3398 NewEnd = std::min(NewEnd, ED);
3399
3400 // We only want to do the extension if there was a subrange that
3401 // was live across Def.
3402 if (LE.isValid()) {
3403 LiveRange::iterator S = LR.find(Def);
3404 if (S != LR.begin())
3405 std::prev(S)->end = NewEnd;
3406 }
3407 }
3408 LLVM_DEBUG({
3409 dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3410 if (LI != nullptr)
3411 dbgs() << "\t\t LHS = " << *LI << '\n';
3412 });
3413 [[fallthrough]];
3414 }
3415
3416 case CR_Erase: {
3417 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3418 assert(MI && "No instruction to erase");
3419 if (MI->isCopy()) {
3420 Register Reg = MI->getOperand(1).getReg();
3421 if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3422 ShrinkRegs.push_back(Reg);
3423 }
3424 ErasedInstrs.insert(MI);
3425 LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3427 MI->eraseFromParent();
3428 break;
3429 }
3430 default:
3431 break;
3432 }
3433 }
3434}
3435
3436void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3437 LaneBitmask LaneMask,
3438 const CoalescerPair &CP) {
3439 SmallVector<VNInfo*, 16> NewVNInfo;
3440 JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3441 NewVNInfo, CP, LIS, TRI, true, true);
3442 JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3443 NewVNInfo, CP, LIS, TRI, true, true);
3444
3445 // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3446 // We should be able to resolve all conflicts here as we could successfully do
3447 // it on the mainrange already. There is however a problem when multiple
3448 // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3449 // interferences.
3450 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3451 // We already determined that it is legal to merge the intervals, so this
3452 // should never fail.
3453 llvm_unreachable("*** Couldn't join subrange!\n");
3454 }
3455 if (!LHSVals.resolveConflicts(RHSVals) ||
3456 !RHSVals.resolveConflicts(LHSVals)) {
3457 // We already determined that it is legal to merge the intervals, so this
3458 // should never fail.
3459 llvm_unreachable("*** Couldn't join subrange!\n");
3460 }
3461
3462 // The merging algorithm in LiveInterval::join() can't handle conflicting
3463 // value mappings, so we need to remove any live ranges that overlap a
3464 // CR_Replace resolution. Collect a set of end points that can be used to
3465 // restore the live range after joining.
3466 SmallVector<SlotIndex, 8> EndPoints;
3467 LHSVals.pruneValues(RHSVals, EndPoints, false);
3468 RHSVals.pruneValues(LHSVals, EndPoints, false);
3469
3470 LHSVals.removeImplicitDefs();
3471 RHSVals.removeImplicitDefs();
3472
3473 LRange.verify();
3474 RRange.verify();
3475
3476 // Join RRange into LHS.
3477 LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3478 NewVNInfo);
3479
3480 LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3481 << ' ' << LRange << "\n");
3482 if (EndPoints.empty())
3483 return;
3484
3485 // Recompute the parts of the live range we had to remove because of
3486 // CR_Replace conflicts.
3487 LLVM_DEBUG({
3488 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3489 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3490 dbgs() << EndPoints[i];
3491 if (i != n-1)
3492 dbgs() << ',';
3493 }
3494 dbgs() << ": " << LRange << '\n';
3495 });
3496 LIS->extendToIndices(LRange, EndPoints);
3497}
3498
3499void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3500 const LiveRange &ToMerge,
3501 LaneBitmask LaneMask,
3502 CoalescerPair &CP,
3503 unsigned ComposeSubRegIdx) {
3505 LI.refineSubRanges(
3506 Allocator, LaneMask,
3507 [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3508 if (SR.empty()) {
3509 SR.assign(ToMerge, Allocator);
3510 } else {
3511 // joinSubRegRange() destroys the merged range, so we need a copy.
3512 LiveRange RangeCopy(ToMerge, Allocator);
3513 joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3514 }
3515 },
3516 *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3517}
3518
3519bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3521 return false;
3522 auto &Counter = LargeLIVisitCounter[LI.reg()];
3523 if (Counter < LargeIntervalFreqThreshold) {
3524 Counter++;
3525 return false;
3526 }
3527 return true;
3528}
3529
3530bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3531 SmallVector<VNInfo*, 16> NewVNInfo;
3532 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3533 LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3534 bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3535 JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3536 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3537 JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3538 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3539
3540 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3541
3542 if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3543 return false;
3544
3545 // First compute NewVNInfo and the simple value mappings.
3546 // Detect impossible conflicts early.
3547 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3548 return false;
3549
3550 // Some conflicts can only be resolved after all values have been mapped.
3551 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3552 return false;
3553
3554 // All clear, the live ranges can be merged.
3555 if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3557
3558 // Transform lanemasks from the LHS to masks in the coalesced register and
3559 // create initial subranges if necessary.
3560 unsigned DstIdx = CP.getDstIdx();
3561 if (!LHS.hasSubRanges()) {
3562 LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3563 : TRI->getSubRegIndexLaneMask(DstIdx);
3564 // LHS must support subregs or we wouldn't be in this codepath.
3565 assert(Mask.any());
3566 LHS.createSubRangeFrom(Allocator, Mask, LHS);
3567 } else if (DstIdx != 0) {
3568 // Transform LHS lanemasks to new register class if necessary.
3569 for (LiveInterval::SubRange &R : LHS.subranges()) {
3570 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3571 R.LaneMask = Mask;
3572 }
3573 }
3574 LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3575 << '\n');
3576
3577 // Determine lanemasks of RHS in the coalesced register and merge subranges.
3578 unsigned SrcIdx = CP.getSrcIdx();
3579 if (!RHS.hasSubRanges()) {
3580 LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3581 : TRI->getSubRegIndexLaneMask(SrcIdx);
3582 mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3583 } else {
3584 // Pair up subranges and merge.
3585 for (LiveInterval::SubRange &R : RHS.subranges()) {
3586 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3587 mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3588 }
3589 }
3590 LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3591
3592 // Pruning implicit defs from subranges may result in the main range
3593 // having stale segments.
3594 LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3595
3596 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3597 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3598 }
3599
3600 // The merging algorithm in LiveInterval::join() can't handle conflicting
3601 // value mappings, so we need to remove any live ranges that overlap a
3602 // CR_Replace resolution. Collect a set of end points that can be used to
3603 // restore the live range after joining.
3604 SmallVector<SlotIndex, 8> EndPoints;
3605 LHSVals.pruneValues(RHSVals, EndPoints, true);
3606 RHSVals.pruneValues(LHSVals, EndPoints, true);
3607
3608 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3609 // registers to require trimming.
3610 SmallVector<Register, 8> ShrinkRegs;
3611 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3612 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3613 while (!ShrinkRegs.empty())
3614 shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3615
3616 // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3617 checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3618
3619 // If the RHS covers any PHI locations that were tracked for debug-info, we
3620 // must update tracking information to reflect the join.
3621 auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3622 if (RegIt != RegToPHIIdx.end()) {
3623 // Iterate over all the debug instruction numbers assigned this register.
3624 for (unsigned InstID : RegIt->second) {
3625 auto PHIIt = PHIValToPos.find(InstID);
3626 assert(PHIIt != PHIValToPos.end());
3627 const SlotIndex &SI = PHIIt->second.SI;
3628
3629 // Does the RHS cover the position of this PHI?
3630 auto LII = RHS.find(SI);
3631 if (LII == RHS.end() || LII->start > SI)
3632 continue;
3633
3634 // Accept two kinds of subregister movement:
3635 // * When we merge from one register class into a larger register:
3636 // %1:gr16 = some-inst
3637 // ->
3638 // %2:gr32.sub_16bit = some-inst
3639 // * When the PHI is already in a subregister, and the larger class
3640 // is coalesced:
3641 // %2:gr32.sub_16bit = some-inst
3642 // %3:gr32 = COPY %2
3643 // ->
3644 // %3:gr32.sub_16bit = some-inst
3645 // Test for subregister move:
3646 if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3647 // If we're moving between different subregisters, ignore this join.
3648 // The PHI will not get a location, dropping variable locations.
3649 if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3650 continue;
3651
3652 // Update our tracking of where the PHI is.
3653 PHIIt->second.Reg = CP.getDstReg();
3654
3655 // If we merge into a sub-register of a larger class (test above),
3656 // update SubReg.
3657 if (CP.getSrcIdx() != 0)
3658 PHIIt->second.SubReg = CP.getSrcIdx();
3659 }
3660
3661 // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3662 // different VRegs now. Copy old collection of debug instruction numbers and
3663 // erase the old one:
3664 auto InstrNums = RegIt->second;
3665 RegToPHIIdx.erase(RegIt);
3666
3667 // There might already be PHIs being tracked in the destination VReg. Insert
3668 // into an existing tracking collection, or insert a new one.
3669 RegIt = RegToPHIIdx.find(CP.getDstReg());
3670 if (RegIt != RegToPHIIdx.end())
3671 RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3672 InstrNums.end());
3673 else
3674 RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3675 }
3676
3677 // Join RHS into LHS.
3678 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3679
3680 // Kill flags are going to be wrong if the live ranges were overlapping.
3681 // Eventually, we should simply clear all kill flags when computing live
3682 // ranges. They are reinserted after register allocation.
3683 MRI->clearKillFlags(LHS.reg());
3684 MRI->clearKillFlags(RHS.reg());
3685
3686 if (!EndPoints.empty()) {
3687 // Recompute the parts of the live range we had to remove because of
3688 // CR_Replace conflicts.
3689 LLVM_DEBUG({
3690 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3691 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3692 dbgs() << EndPoints[i];
3693 if (i != n-1)
3694 dbgs() << ',';
3695 }
3696 dbgs() << ": " << LHS << '\n';
3697 });
3698 LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3699 }
3700
3701 return true;
3702}
3703
3704bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3705 return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3706}
3707
3708void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
3709{
3710 const SlotIndexes &Slots = *LIS->getSlotIndexes();
3712
3713 // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3714 // vreg => DbgValueLoc map.
3715 auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3716 for (auto *X : ToInsert) {
3717 for (const auto &Op : X->debug_operands()) {
3718 if (Op.isReg() && Op.getReg().isVirtual())
3719 DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3720 }
3721 }
3722
3723 ToInsert.clear();
3724 };
3725
3726 // Iterate over all instructions, collecting them into the ToInsert vector.
3727 // Once a non-debug instruction is found, record the slot index of the
3728 // collected DBG_VALUEs.
3729 for (auto &MBB : MF) {
3730 SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3731
3732 for (auto &MI : MBB) {
3733 if (MI.isDebugValue()) {
3734 if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3735 return MO.isReg() && MO.getReg().isVirtual();
3736 }))
3737 ToInsert.push_back(&MI);
3738 } else if (!MI.isDebugOrPseudoInstr()) {
3739 CurrentSlot = Slots.getInstructionIndex(MI);
3740 CloseNewDVRange(CurrentSlot);
3741 }
3742 }
3743
3744 // Close range of DBG_VALUEs at the end of blocks.
3745 CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3746 }
3747
3748 // Sort all DBG_VALUEs we've seen by slot number.
3749 for (auto &Pair : DbgVRegToValues)
3750 llvm::sort(Pair.second);
3751}
3752
3753void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3754 LiveRange &LHS,
3755 JoinVals &LHSVals,
3756 LiveRange &RHS,
3757 JoinVals &RHSVals) {
3758 auto ScanForDstReg = [&](Register Reg) {
3759 checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3760 };
3761
3762 auto ScanForSrcReg = [&](Register Reg) {
3763 checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3764 };
3765
3766 // Scan for unsound updates of both the source and destination register.
3767 ScanForSrcReg(CP.getSrcReg());
3768 ScanForDstReg(CP.getDstReg());
3769}
3770
3771void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3772 LiveRange &OtherLR,
3773 LiveRange &RegLR,
3774 JoinVals &RegVals) {
3775 // Are there any DBG_VALUEs to examine?
3776 auto VRegMapIt = DbgVRegToValues.find(Reg);
3777 if (VRegMapIt == DbgVRegToValues.end())
3778 return;
3779
3780 auto &DbgValueSet = VRegMapIt->second;
3781 auto DbgValueSetIt = DbgValueSet.begin();
3782 auto SegmentIt = OtherLR.begin();
3783
3784 bool LastUndefResult = false;
3785 SlotIndex LastUndefIdx;
3786
3787 // If the "Other" register is live at a slot Idx, test whether Reg can
3788 // safely be merged with it, or should be marked undef.
3789 auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3790 &LastUndefIdx](SlotIndex Idx) -> bool {
3791 // Our worst-case performance typically happens with asan, causing very
3792 // many DBG_VALUEs of the same location. Cache a copy of the most recent
3793 // result for this edge-case.
3794 if (LastUndefIdx == Idx)
3795 return LastUndefResult;
3796
3797 // If the other range was live, and Reg's was not, the register coalescer
3798 // will not have tried to resolve any conflicts. We don't know whether
3799 // the DBG_VALUE will refer to the same value number, so it must be made
3800 // undef.
3801 auto OtherIt = RegLR.find(Idx);
3802 if (OtherIt == RegLR.end())
3803 return true;
3804
3805 // Both the registers were live: examine the conflict resolution record for
3806 // the value number Reg refers to. CR_Keep meant that this value number
3807 // "won" and the merged register definitely refers to that value. CR_Erase
3808 // means the value number was a redundant copy of the other value, which
3809 // was coalesced and Reg deleted. It's safe to refer to the other register
3810 // (which will be the source of the copy).
3811 auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3812 LastUndefResult = Resolution != JoinVals::CR_Keep &&
3813 Resolution != JoinVals::CR_Erase;
3814 LastUndefIdx = Idx;
3815 return LastUndefResult;
3816 };
3817
3818 // Iterate over both the live-range of the "Other" register, and the set of
3819 // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3820 // slot index. This relies on the DbgValueSet being ordered.
3821 while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3822 if (DbgValueSetIt->first < SegmentIt->end) {
3823 // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3824 // set it undef.
3825 if (DbgValueSetIt->first >= SegmentIt->start) {
3826 bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3827 bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3828 if (HasReg && ShouldUndefReg) {
3829 // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3830 DbgValueSetIt->second->setDebugValueUndef();
3831 continue;
3832 }
3833 }
3834 ++DbgValueSetIt;
3835 } else {
3836 ++SegmentIt;
3837 }
3838 }
3839}
3840
3841namespace {
3842
3843/// Information concerning MBB coalescing priority.
3844struct MBBPriorityInfo {
3846 unsigned Depth;
3847 bool IsSplit;
3848
3849 MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3850 : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3851};
3852
3853} // end anonymous namespace
3854
3855/// C-style comparator that sorts first based on the loop depth of the basic
3856/// block (the unsigned), and then on the MBB number.
3857///
3858/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3859static int compareMBBPriority(const MBBPriorityInfo *LHS,
3860 const MBBPriorityInfo *RHS) {
3861 // Deeper loops first
3862 if (LHS->Depth != RHS->Depth)
3863 return LHS->Depth > RHS->Depth ? -1 : 1;
3864
3865 // Try to unsplit critical edges next.
3866 if (LHS->IsSplit != RHS->IsSplit)
3867 return LHS->IsSplit ? -1 : 1;
3868
3869 // Prefer blocks that are more connected in the CFG. This takes care of
3870 // the most difficult copies first while intervals are short.
3871 unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3872 unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3873 if (cl != cr)
3874 return cl > cr ? -1 : 1;
3875
3876 // As a last resort, sort by block number.
3877 return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3878}
3879
3880/// \returns true if the given copy uses or defines a local live range.
3881static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3882 if (!Copy->isCopy())
3883 return false;
3884
3885 if (Copy->getOperand(1).isUndef())
3886 return false;
3887
3888 Register SrcReg = Copy->getOperand(1).getReg();
3889 Register DstReg = Copy->getOperand(0).getReg();
3890 if (SrcReg.isPhysical() || DstReg.isPhysical())
3891 return false;
3892
3893 return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3894 || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3895}
3896
3897void RegisterCoalescer::lateLiveIntervalUpdate() {
3898 for (Register reg : ToBeUpdated) {
3899 if (!LIS->hasInterval(reg))
3900 continue;
3901 LiveInterval &LI = LIS->getInterval(reg);
3902 shrinkToUses(&LI, &DeadDefs);
3903 if (!DeadDefs.empty())
3904 eliminateDeadDefs();
3905 }
3906 ToBeUpdated.clear();
3907}
3908
3909bool RegisterCoalescer::
3910copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3911 bool Progress = false;
3912 for (MachineInstr *&MI : CurrList) {
3913 if (!MI)
3914 continue;
3915 // Skip instruction pointers that have already been erased, for example by
3916 // dead code elimination.
3917 if (ErasedInstrs.count(MI)) {
3918 MI = nullptr;
3919 continue;
3920 }
3921 bool Again = false;
3922 bool Success = joinCopy(MI, Again);
3923 Progress |= Success;
3924 if (Success || !Again)
3925 MI = nullptr;
3926 }
3927 return Progress;
3928}
3929
3930/// Check if DstReg is a terminal node.
3931/// I.e., it does not have any affinity other than \p Copy.
3932static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
3933 const MachineRegisterInfo *MRI) {
3934 assert(Copy.isCopyLike());
3935 // Check if the destination of this copy as any other affinity.
3936 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3937 if (&MI != &Copy && MI.isCopyLike())
3938 return false;
3939 return true;
3940}
3941
3942bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3943 assert(Copy.isCopyLike());
3944 if (!UseTerminalRule)
3945 return false;
3946 Register SrcReg, DstReg;
3947 unsigned SrcSubReg = 0, DstSubReg = 0;
3948 if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
3949 return false;
3950 // Check if the destination of this copy has any other affinity.
3951 if (DstReg.isPhysical() ||
3952 // If SrcReg is a physical register, the copy won't be coalesced.
3953 // Ignoring it may have other side effect (like missing
3954 // rematerialization). So keep it.
3955 SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
3956 return false;
3957
3958 // DstReg is a terminal node. Check if it interferes with any other
3959 // copy involving SrcReg.
3960 const MachineBasicBlock *OrigBB = Copy.getParent();
3961 const LiveInterval &DstLI = LIS->getInterval(DstReg);
3962 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3963 // Technically we should check if the weight of the new copy is
3964 // interesting compared to the other one and update the weight
3965 // of the copies accordingly. However, this would only work if
3966 // we would gather all the copies first then coalesce, whereas
3967 // right now we interleave both actions.
3968 // For now, just consider the copies that are in the same block.
3969 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
3970 continue;
3971 Register OtherSrcReg, OtherReg;
3972 unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
3973 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3974 OtherSubReg))
3975 return false;
3976 if (OtherReg == SrcReg)
3977 OtherReg = OtherSrcReg;
3978 // Check if OtherReg is a non-terminal.
3979 if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
3980 continue;
3981 // Check that OtherReg interfere with DstReg.
3982 if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3983 LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
3984 << '\n');
3985 return true;
3986 }
3987 }
3988 return false;
3989}
3990
3991void
3992RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3993 LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
3994
3995 // Collect all copy-like instructions in MBB. Don't start coalescing anything
3996 // yet, it might invalidate the iterator.
3997 const unsigned PrevSize = WorkList.size();
3998 if (JoinGlobalCopies) {
3999 SmallVector<MachineInstr*, 2> LocalTerminals;
4000 SmallVector<MachineInstr*, 2> GlobalTerminals;
4001 // Coalesce copies bottom-up to coalesce local defs before local uses. They
4002 // are not inherently easier to resolve, but slightly preferable until we
4003 // have local live range splitting. In particular this is required by
4004 // cmp+jmp macro fusion.
4005 for (MachineInstr &MI : *MBB) {
4006 if (!MI.isCopyLike())
4007 continue;
4008 bool ApplyTerminalRule = applyTerminalRule(MI);
4009 if (isLocalCopy(&MI, LIS)) {
4010 if (ApplyTerminalRule)
4011 LocalTerminals.push_back(&MI);
4012 else
4013 LocalWorkList.push_back(&MI);
4014 } else {
4015 if (ApplyTerminalRule)
4016 GlobalTerminals.push_back(&MI);
4017 else
4018 WorkList.push_back(&MI);
4019 }
4020 }
4021 // Append the copies evicted by the terminal rule at the end of the list.
4022 LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4023 WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4024 }
4025 else {
4027 for (MachineInstr &MII : *MBB)
4028 if (MII.isCopyLike()) {
4029 if (applyTerminalRule(MII))
4030 Terminals.push_back(&MII);
4031 else
4032 WorkList.push_back(&MII);
4033 }
4034 // Append the copies evicted by the terminal rule at the end of the list.
4035 WorkList.append(Terminals.begin(), Terminals.end());
4036 }
4037 // Try coalescing the collected copies immediately, and remove the nulls.
4038 // This prevents the WorkList from getting too large since most copies are
4039 // joinable on the first attempt.
4041 CurrList(WorkList.begin() + PrevSize, WorkList.end());
4042 if (copyCoalesceWorkList(CurrList))
4043 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
4044 nullptr), WorkList.end());
4045}
4046
4047void RegisterCoalescer::coalesceLocals() {
4048 copyCoalesceWorkList(LocalWorkList);
4049 for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
4050 if (LocalWorkList[j])
4051 WorkList.push_back(LocalWorkList[j]);
4052 }
4053 LocalWorkList.clear();
4054}
4055
4056void RegisterCoalescer::joinAllIntervals() {
4057 LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4058 assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4059
4060 std::vector<MBBPriorityInfo> MBBs;
4061 MBBs.reserve(MF->size());
4062 for (MachineBasicBlock &MBB : *MF) {
4063 MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4064 JoinSplitEdges && isSplitEdge(&MBB)));
4065 }
4066 array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4067
4068 // Coalesce intervals in MBB priority order.
4069 unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4070 for (MBBPriorityInfo &MBB : MBBs) {
4071 // Try coalescing the collected local copies for deeper loops.
4072 if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4073 coalesceLocals();
4074 CurrDepth = MBB.Depth;
4075 }
4076 copyCoalesceInMBB(MBB.MBB);
4077 }
4078 lateLiveIntervalUpdate();
4079 coalesceLocals();
4080
4081 // Joining intervals can allow other intervals to be joined. Iteratively join
4082 // until we make no progress.
4083 while (copyCoalesceWorkList(WorkList))
4084 /* empty */ ;
4085 lateLiveIntervalUpdate();
4086}
4087
4088void RegisterCoalescer::releaseMemory() {
4089 ErasedInstrs.clear();
4090 WorkList.clear();
4091 DeadDefs.clear();
4092 InflateRegs.clear();
4093 LargeLIVisitCounter.clear();
4094}
4095
4096bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4097 LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
4098 << "********** Function: " << fn.getName() << '\n');
4099
4100 // Variables changed between a setjmp and a longjump can have undefined value
4101 // after the longjmp. This behaviour can be observed if such a variable is
4102 // spilled, so longjmp won't restore the value in the spill slot.
4103 // RegisterCoalescer should not run in functions with a setjmp to avoid
4104 // merging such undefined variables with predictable ones.
4105 //
4106 // TODO: Could specifically disable coalescing registers live across setjmp
4107 // calls
4108 if (fn.exposesReturnsTwice()) {
4109 LLVM_DEBUG(
4110 dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4111 return false;
4112 }
4113
4114 MF = &fn;
4115 MRI = &fn.getRegInfo();
4116 const TargetSubtargetInfo &STI = fn.getSubtarget();
4117 TRI = STI.getRegisterInfo();
4118 TII = STI.getInstrInfo();
4119 LIS = &getAnalysis<LiveIntervals>();
4120 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4121 Loops = &getAnalysis<MachineLoopInfo>();
4123 JoinGlobalCopies = STI.enableJoinGlobalCopies();
4124 else
4125 JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4126
4127 // If there are PHIs tracked by debug-info, they will need updating during
4128 // coalescing. Build an index of those PHIs to ease updating.
4129 SlotIndexes *Slots = LIS->getSlotIndexes();
4130 for (const auto &DebugPHI : MF->DebugPHIPositions) {
4131 MachineBasicBlock *MBB = DebugPHI.second.MBB;
4132 Register Reg = DebugPHI.second.Reg;
4133 unsigned SubReg = DebugPHI.second.SubReg;
4134 SlotIndex SI = Slots->getMBBStartIdx(MBB);
4135 PHIValPos P = {SI, Reg, SubReg};
4136 PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4137 RegToPHIIdx[Reg].push_back(DebugPHI.first);
4138 }
4139
4140 // The MachineScheduler does not currently require JoinSplitEdges. This will
4141 // either be enabled unconditionally or replaced by a more general live range
4142 // splitting optimization.
4143 JoinSplitEdges = EnableJoinSplits;
4144
4145 if (VerifyCoalescing)
4146 MF->verify(this, "Before register coalescing");
4147
4148 DbgVRegToValues.clear();
4150
4151 RegClassInfo.runOnMachineFunction(fn);
4152
4153 // Join (coalesce) intervals if requested.
4154 if (EnableJoining)
4155 joinAllIntervals();
4156
4157 // After deleting a lot of copies, register classes may be less constrained.
4158 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4159 // DPR inflation.
4160 array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4161 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
4162 InflateRegs.end());
4163 LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4164 << " regs.\n");
4165 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
4166 Register Reg = InflateRegs[i];
4167 if (MRI->reg_nodbg_empty(Reg))
4168 continue;
4169 if (MRI->recomputeRegClass(Reg)) {
4170 LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4171 << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4172 ++NumInflated;
4173
4174 LiveInterval &LI = LIS->getInterval(Reg);
4175 if (LI.hasSubRanges()) {
4176 // If the inflated register class does not support subregisters anymore
4177 // remove the subranges.
4178 if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4179 LI.clearSubRanges();
4180 } else {
4181#ifndef NDEBUG
4182 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4183 // If subranges are still supported, then the same subregs
4184 // should still be supported.
4185 for (LiveInterval::SubRange &S : LI.subranges()) {
4186 assert((S.LaneMask & ~MaxMask).none());
4187 }
4188#endif
4189 }
4190 }
4191 }
4192 }
4193
4194 // After coalescing, update any PHIs that are being tracked by debug-info
4195 // with their new VReg locations.
4196 for (auto &p : MF->DebugPHIPositions) {
4197 auto it = PHIValToPos.find(p.first);
4198 assert(it != PHIValToPos.end());
4199 p.second.Reg = it->second.Reg;
4200 p.second.SubReg = it->second.SubReg;
4201 }
4202
4203 PHIValToPos.clear();
4204 RegToPHIIdx.clear();
4205
4206 LLVM_DEBUG(dump());
4207 if (VerifyCoalescing)
4208 MF->verify(this, "After register coalescing");
4209 return true;
4210}
4211
4212void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
4213 LIS->print(O, m);
4214}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1269
bool End
Definition: ELF_riscv.cpp:464
SmallVector< uint32_t, 0 > Writes
Definition: ELF_riscv.cpp:481
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
unsigned UseOpIdx
Basic Register Allocator
simple register coalescing
static cl::opt< cl::boolOrDefault > EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), cl::init(cl::BOU_UNSET), cl::Hidden)
Temporary flag to test global copy optimization.
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS)
simple register Simple Register Coalescing
static bool isSplitEdge(const MachineBasicBlock *MBB)
Return true if this block should be vacated by the coalescer to eliminate branches.
static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS)
C-style comparator that sorts first based on the loop depth of the basic block (the unsigned),...
static cl::opt< unsigned > LargeIntervalSizeThreshold("large-interval-size-threshold", cl::Hidden, cl::desc("If the valnos size of an interval is larger than the threshold, " "it is regarded as a large interval. "), cl::init(100))
simple register Simple Register static false bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, Register &Src, Register &Dst, unsigned &SrcSub, unsigned &DstSub)
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def)
Check if any of the subranges of LI contain a definition at Def.
static cl::opt< unsigned > LargeIntervalFreqThreshold("large-interval-freq-threshold", cl::Hidden, cl::desc("For a large interval, if it is coalesed with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256))
static std::pair< bool, bool > addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo)
Copy segments with value number SrcValNo from liverange Src to live range @Dst and use value number D...
static bool isLiveThrough(const LiveQueryResult Q)
static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI)
Check if DstReg is a terminal node.
static cl::opt< bool > VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden)
static cl::opt< bool > EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden)
Temporary flag to test critical edge unsplitting.
static cl::opt< bool > EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden)
static bool definesFullReg(const MachineInstr &MI, Register Reg)
Returns true if MI defines the full vreg Reg, as opposed to just defining a subregister.
static cl::opt< unsigned > LateRematUpdateThreshold("late-remat-update-threshold", cl::Hidden, cl::desc("During rematerialization for a copy, if the def instruction has " "many other copy uses to be rematerialized, delay the multiple " "separate live interval update work and do them all at once after " "all those rematerialization are done. It will save a lot of " "repeated work. "), cl::init(100))
static cl::opt< bool > UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), cl::init(false), cl::Hidden)
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static DenseMap< Register, std::vector< std::pair< SlotIndex, MachineInstr * > > > buildVRegToDbgValueMap(MachineFunction &MF, const LiveIntervals *Liveness)
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS)
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
bool test(unsigned Idx) const
Definition: BitVector.h:461
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
A helper class for register coalescers.
bool flip()
Swap SrcReg and DstReg.
bool isCoalescable(const MachineInstr *) const
Return true if MI is a copy instruction that will become an identity copy after coalescing.
bool setRegisters(const MachineInstr *)
Set registers to match the copy instruction MI.
The location of a single variable, composed of an expression and 0 or more DbgValueLocEntries.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
bool isAsCheapAsAMove(const MachineInstr &MI) const override
A live range for subregisters.
Definition: LiveInterval.h:693
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
Register reg() const
Definition: LiveInterval.h:717
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
SubRange * createSubRangeFrom(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, const LiveRange &CopyFrom)
Like createSubRange() but the new range is filled with a copy of the liveness information in CopyFrom...
Definition: LiveInterval.h:794
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply, const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, unsigned ComposeSubRegIdx=0)
Refines the subranges to support LaneMask.
void computeSubRangeUndefs(SmallVectorImpl< SlotIndex > &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const
For a given lane mask LaneMask, compute indexes at which the lane is marked undefined by subregister ...
SubRange * createSubRange(BumpPtrAllocator &Allocator, LaneBitmask LaneMask)
Creates a new empty subregister live range.
Definition: LiveInterval.h:785
void clearSubRanges()
Removes all subregister liveness information.
bool hasInterval(Register Reg) const
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const
Returns true if VNI is killed by any PHI-def values in LI.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VNInfo::Allocator & getVNInfoAllocator()
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveRange * getCachedRegUnit(unsigned Unit)
Return the live range for register unit Unit if it has already been computed, or nullptr if it hasn't...
LiveInterval & getInterval(Register Reg)
void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl< SlotIndex > *EndPoints)
If LR has a live value at Kill, prune its live range by removing any liveness reachable from Kill.
void removeInterval(Register Reg)
Interval removal.
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos)
Remove value numbers and related live segments starting at position Pos that are part of any liverang...
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void print(raw_ostream &O, const Module *=nullptr) const override
Implement the dump method.
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueOutOrDead() const
Returns the value alive at the end of the instruction, if any.
Definition: LiveInterval.h:129
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
Definition: LiveInterval.h:123
VNInfo * valueDefined() const
Return the value defined by this instruction, if any.
Definition: LiveInterval.h:135
SlotIndex endPoint() const
Return the end point of the last live range segment to interact with the instruction,...
Definition: LiveInterval.h:147
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
Callback methods for LiveRangeEdit owners.
Definition: LiveRangeEdit.h:45
virtual void LRE_WillEraseInstruction(MachineInstr *MI)
Called immediately before erasing a dead machine instruction.
Definition: LiveRangeEdit.h:52
void eliminateDeadDefs(SmallVectorImpl< MachineInstr * > &Dead, ArrayRef< Register > RegsBeingSpilled=std::nullopt)
eliminateDeadDefs - Try to delete machine instructions that are now dead (allDefsAreDead returns true...
SlotIndex rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, const Remat &RM, const TargetRegisterInfo &, bool Late=false, unsigned SubIdx=0, MachineInstr *ReplaceIndexMI=nullptr)
rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an instruction into MBB before...
bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI)
checkRematerializable - Manually add VNI to the list of rematerializable values if DefMI may be remat...
bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx, bool cheapAsAMove)
canRematerializeAt - Determine if ParentVNI can be rematerialized at UseIdx.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:317
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void join(LiveRange &Other, const int *ValNoAssignments, const int *RHSValNoAssignments, SmallVectorImpl< VNInfo * > &NewVNInfo)
join - Join two live ranges (this, and other) together.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc)
createDeadDef - Make sure the range has a value defined at Def.
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
void verify() const
Walk the range and assert if any invariants fail to hold.
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
VNInfo * MergeValueNumberInto(VNInfo *V1, VNInfo *V2)
MergeValueNumberInto - This method is called when two value numbers are found to be equivalent.
unsigned getNumValNums() const
Definition: LiveInterval.h:313
iterator begin()
Definition: LiveInterval.h:215
Segments segments
Definition: LiveInterval.h:203
VNInfoList valnos
Definition: LiveInterval.h:204
bool containsOneValue() const
Definition: LiveInterval.h:311
size_t size() const
Definition: LiveInterval.h:305
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none.
Definition: LiveInterval.h:436
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
MCRegUnitRootIterator enumerates the root registers of a register unit.
bool isValid() const
Check if the iterator is at the end of the list.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
bool isSafeToMove(AAResults *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool isImplicitDef() const
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:320
bool isCopyLike() const
Return true if the instruction behaves like a copy.
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
std::pair< bool, bool > readsWritesVirtualRegister(Register Reg, SmallVectorImpl< unsigned > *Ops=nullptr) const
Return a pair of bools (reads, writes) indicating if this instruction reads or writes Reg.
bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx=nullptr) const
Return true if the use operand of the specified index is tied to a def operand.
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:526
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:520
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:648
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:452
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:533
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:719
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
bool allDefsAreDead() const
Return true if all the defs of this instruction are dead.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
void setIsUndef(bool Val=true)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
bool isEarlyClobber() const
isEarlyClobber - Returns true if this is an early-clobber slot.
Definition: SlotIndexes.h:234
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:246
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
Definition: SlotIndexes.h:294
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:259
bool isDead() const
isDead - Returns true if this is a dead def kill slot.
Definition: SlotIndexes.h:241
SlotIndexes pass.
Definition: SlotIndexes.h:319
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
Returns the basic block which the given index falls in.
Definition: SlotIndexes.h:519
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:481
SlotIndex getNextNonNullIndex(SlotIndex Index)
Returns the next non-null index, if one exists.
Definition: SlotIndexes.h:414
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:390
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:471
SlotIndex getIndexBefore(const MachineInstr &MI) const
getIndexBefore - Returns the index of the last indexed instruction before MI, or the start index of i...
Definition: SlotIndexes.h:427
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:408
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...