LLVM 19.0.0git
RegisterCoalescer.cpp
Go to the documentation of this file.
1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the generic RegisterCoalescer interface which
10// is used as the common interface used by all clients and
11// implementations of register coalescing.
12//
13//===----------------------------------------------------------------------===//
14
15#include "RegisterCoalescer.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/CodeGen/Passes.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/LaneBitmask.h"
45#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <limits>
57#include <tuple>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "regalloc"
64
65STATISTIC(numJoins , "Number of interval joins performed");
66STATISTIC(numCrossRCs , "Number of cross class joins performed");
67STATISTIC(numCommutes , "Number of instruction commuting performed");
68STATISTIC(numExtends , "Number of copies extended");
69STATISTIC(NumReMats , "Number of instructions re-materialized");
70STATISTIC(NumInflated , "Number of register classes inflated");
71STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
73STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
74
75static cl::opt<bool> EnableJoining("join-liveintervals",
76 cl::desc("Coalesce copies (default=true)"),
77 cl::init(true), cl::Hidden);
78
79static cl::opt<bool> UseTerminalRule("terminal-rule",
80 cl::desc("Apply the terminal rule"),
81 cl::init(false), cl::Hidden);
82
83/// Temporary flag to test critical edge unsplitting.
84static cl::opt<bool>
85EnableJoinSplits("join-splitedges",
86 cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87
88/// Temporary flag to test global copy optimization.
90EnableGlobalCopies("join-globalcopies",
91 cl::desc("Coalesce copies that span blocks (default=subtarget)"),
93
94static cl::opt<bool>
95VerifyCoalescing("verify-coalescing",
96 cl::desc("Verify machine instrs before and after register coalescing"),
98
100 "late-remat-update-threshold", cl::Hidden,
101 cl::desc("During rematerialization for a copy, if the def instruction has "
102 "many other copy uses to be rematerialized, delay the multiple "
103 "separate live interval update work and do them all at once after "
104 "all those rematerialization are done. It will save a lot of "
105 "repeated work. "),
106 cl::init(100));
107
109 "large-interval-size-threshold", cl::Hidden,
110 cl::desc("If the valnos size of an interval is larger than the threshold, "
111 "it is regarded as a large interval. "),
112 cl::init(100));
113
115 "large-interval-freq-threshold", cl::Hidden,
116 cl::desc("For a large interval, if it is coalesed with other live "
117 "intervals many times more than the threshold, stop its "
118 "coalescing to control the compile time. "),
119 cl::init(256));
120
121namespace {
122
123 class JoinVals;
124
125 class RegisterCoalescer : public MachineFunctionPass,
127 MachineFunction* MF = nullptr;
128 MachineRegisterInfo* MRI = nullptr;
129 const TargetRegisterInfo* TRI = nullptr;
130 const TargetInstrInfo* TII = nullptr;
131 LiveIntervals *LIS = nullptr;
132 const MachineLoopInfo* Loops = nullptr;
133 AliasAnalysis *AA = nullptr;
134 RegisterClassInfo RegClassInfo;
135
136 /// Position and VReg of a PHI instruction during coalescing.
137 struct PHIValPos {
138 SlotIndex SI; ///< Slot where this PHI occurs.
139 Register Reg; ///< VReg the PHI occurs in.
140 unsigned SubReg; ///< Qualifying subregister for Reg.
141 };
142
143 /// Map from debug instruction number to PHI position during coalescing.
145 /// Index of, for each VReg, which debug instruction numbers and
146 /// corresponding PHIs are sensitive to coalescing. Each VReg may have
147 /// multiple PHI defs, at different positions.
149
150 /// Debug variable location tracking -- for each VReg, maintain an
151 /// ordered-by-slot-index set of DBG_VALUEs, to help quick
152 /// identification of whether coalescing may change location validity.
153 using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
155
156 /// A LaneMask to remember on which subregister live ranges we need to call
157 /// shrinkToUses() later.
158 LaneBitmask ShrinkMask;
159
160 /// True if the main range of the currently coalesced intervals should be
161 /// checked for smaller live intervals.
162 bool ShrinkMainRange = false;
163
164 /// True if the coalescer should aggressively coalesce global copies
165 /// in favor of keeping local copies.
166 bool JoinGlobalCopies = false;
167
168 /// True if the coalescer should aggressively coalesce fall-thru
169 /// blocks exclusively containing copies.
170 bool JoinSplitEdges = false;
171
172 /// Copy instructions yet to be coalesced.
174 SmallVector<MachineInstr*, 8> LocalWorkList;
175
176 /// Set of instruction pointers that have been erased, and
177 /// that may be present in WorkList.
179
180 /// Dead instructions that are about to be deleted.
182
183 /// Virtual registers to be considered for register class inflation.
184 SmallVector<Register, 8> InflateRegs;
185
186 /// The collection of live intervals which should have been updated
187 /// immediately after rematerialiation but delayed until
188 /// lateLiveIntervalUpdate is called.
189 DenseSet<Register> ToBeUpdated;
190
191 /// Record how many times the large live interval with many valnos
192 /// has been tried to join with other live interval.
193 DenseMap<Register, unsigned long> LargeLIVisitCounter;
194
195 /// Recursively eliminate dead defs in DeadDefs.
196 void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
197
198 /// LiveRangeEdit callback for eliminateDeadDefs().
200
201 /// Coalesce the LocalWorkList.
202 void coalesceLocals();
203
204 /// Join compatible live intervals
205 void joinAllIntervals();
206
207 /// Coalesce copies in the specified MBB, putting
208 /// copies that cannot yet be coalesced into WorkList.
209 void copyCoalesceInMBB(MachineBasicBlock *MBB);
210
211 /// Tries to coalesce all copies in CurrList. Returns true if any progress
212 /// was made.
213 bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
214
215 /// If one def has many copy like uses, and those copy uses are all
216 /// rematerialized, the live interval update needed for those
217 /// rematerializations will be delayed and done all at once instead
218 /// of being done multiple times. This is to save compile cost because
219 /// live interval update is costly.
220 void lateLiveIntervalUpdate();
221
222 /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
223 /// has no value defined in the predecessors. If the incoming value is the
224 /// same as defined by the copy itself, the value is considered undefined.
225 bool copyValueUndefInPredecessors(LiveRange &S,
226 const MachineBasicBlock *MBB,
227 LiveQueryResult SLRQ);
228
229 /// Set necessary undef flags on subregister uses after pruning out undef
230 /// lane segments from the subrange.
231 void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
232 LaneBitmask PrunedLanes);
233
234 /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
235 /// src/dst of the copy instruction CopyMI. This returns true if the copy
236 /// was successfully coalesced away. If it is not currently possible to
237 /// coalesce this interval, but it may be possible if other things get
238 /// coalesced, then it returns true by reference in 'Again'.
239 bool joinCopy(MachineInstr *CopyMI, bool &Again,
240 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs);
241
242 /// Attempt to join these two intervals. On failure, this
243 /// returns false. The output "SrcInt" will not have been modified, so we
244 /// can use this information below to update aliases.
245 bool joinIntervals(CoalescerPair &CP);
246
247 /// Attempt joining two virtual registers. Return true on success.
248 bool joinVirtRegs(CoalescerPair &CP);
249
250 /// If a live interval has many valnos and is coalesced with other
251 /// live intervals many times, we regard such live interval as having
252 /// high compile time cost.
253 bool isHighCostLiveInterval(LiveInterval &LI);
254
255 /// Attempt joining with a reserved physreg.
256 bool joinReservedPhysReg(CoalescerPair &CP);
257
258 /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
259 /// Subranges in @p LI which only partially interfere with the desired
260 /// LaneMask are split as necessary. @p LaneMask are the lanes that
261 /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
262 /// lanemasks already adjusted to the coalesced register.
263 void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
264 LaneBitmask LaneMask, CoalescerPair &CP,
265 unsigned DstIdx);
266
267 /// Join the liveranges of two subregisters. Joins @p RRange into
268 /// @p LRange, @p RRange may be invalid afterwards.
269 void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
270 LaneBitmask LaneMask, const CoalescerPair &CP);
271
272 /// We found a non-trivially-coalescable copy. If the source value number is
273 /// defined by a copy from the destination reg see if we can merge these two
274 /// destination reg valno# into a single value number, eliminating a copy.
275 /// This returns true if an interval was modified.
276 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
277
278 /// Return true if there are definitions of IntB
279 /// other than BValNo val# that can reach uses of AValno val# of IntA.
280 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
281 VNInfo *AValNo, VNInfo *BValNo);
282
283 /// We found a non-trivially-coalescable copy.
284 /// If the source value number is defined by a commutable instruction and
285 /// its other operand is coalesced to the copy dest register, see if we
286 /// can transform the copy into a noop by commuting the definition.
287 /// This returns a pair of two flags:
288 /// - the first element is true if an interval was modified,
289 /// - the second element is true if the destination interval needs
290 /// to be shrunk after deleting the copy.
291 std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
292 MachineInstr *CopyMI);
293
294 /// We found a copy which can be moved to its less frequent predecessor.
295 bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
296
297 /// If the source of a copy is defined by a
298 /// trivial computation, replace the copy by rematerialize the definition.
299 bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
300 bool &IsDefCopy);
301
302 /// Return true if a copy involving a physreg should be joined.
303 bool canJoinPhys(const CoalescerPair &CP);
304
305 /// Replace all defs and uses of SrcReg to DstReg and update the subregister
306 /// number if it is not zero. If DstReg is a physical register and the
307 /// existing subregister number of the def / use being updated is not zero,
308 /// make sure to set it to the correct physical subregister.
309 void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
310
311 /// If the given machine operand reads only undefined lanes add an undef
312 /// flag.
313 /// This can happen when undef uses were previously concealed by a copy
314 /// which we coalesced. Example:
315 /// %0:sub0<def,read-undef> = ...
316 /// %1 = COPY %0 <-- Coalescing COPY reveals undef
317 /// = use %1:sub1 <-- hidden undef use
318 void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
319 MachineOperand &MO, unsigned SubRegIdx);
320
321 /// Handle copies of undef values. If the undef value is an incoming
322 /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
323 /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
324 /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
325 MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
326
327 /// Check whether or not we should apply the terminal rule on the
328 /// destination (Dst) of \p Copy.
329 /// When the terminal rule applies, Copy is not profitable to
330 /// coalesce.
331 /// Dst is terminal if it has exactly one affinity (Dst, Src) and
332 /// at least one interference (Dst, Dst2). If Dst is terminal, the
333 /// terminal rule consists in checking that at least one of
334 /// interfering node, say Dst2, has an affinity of equal or greater
335 /// weight with Src.
336 /// In that case, Dst2 and Dst will not be able to be both coalesced
337 /// with Src. Since Dst2 exposes more coalescing opportunities than
338 /// Dst, we can drop \p Copy.
339 bool applyTerminalRule(const MachineInstr &Copy) const;
340
341 /// Wrapper method for \see LiveIntervals::shrinkToUses.
342 /// This method does the proper fixing of the live-ranges when the afore
343 /// mentioned method returns true.
344 void shrinkToUses(LiveInterval *LI,
345 SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
346 NumShrinkToUses++;
347 if (LIS->shrinkToUses(LI, Dead)) {
348 /// Check whether or not \p LI is composed by multiple connected
349 /// components and if that is the case, fix that.
351 LIS->splitSeparateComponents(*LI, SplitLIs);
352 }
353 }
354
355 /// Wrapper Method to do all the necessary work when an Instruction is
356 /// deleted.
357 /// Optimizations should use this to make sure that deleted instructions
358 /// are always accounted for.
359 void deleteInstr(MachineInstr* MI) {
360 ErasedInstrs.insert(MI);
362 MI->eraseFromParent();
363 }
364
365 /// Walk over function and initialize the DbgVRegToValues map.
367
368 /// Test whether, after merging, any DBG_VALUEs would refer to a
369 /// different value number than before merging, and whether this can
370 /// be resolved. If not, mark the DBG_VALUE as being undef.
371 void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
372 JoinVals &LHSVals, LiveRange &RHS,
373 JoinVals &RHSVals);
374
375 void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
376 LiveRange &RegRange, JoinVals &Vals2);
377
378 public:
379 static char ID; ///< Class identification, replacement for typeinfo
380
381 RegisterCoalescer() : MachineFunctionPass(ID) {
383 }
384
385 void getAnalysisUsage(AnalysisUsage &AU) const override;
386
389 MachineFunctionProperties::Property::IsSSA);
390 }
391
392 void releaseMemory() override;
393
394 /// This is the pass entry point.
396
397 /// Implement the dump method.
398 void print(raw_ostream &O, const Module* = nullptr) const override;
399 };
400
401} // end anonymous namespace
402
403char RegisterCoalescer::ID = 0;
404
405char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
406
407INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
408 "Register Coalescer", false, false)
413INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
415
416[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
418 Register &Dst, unsigned &SrcSub,
419 unsigned &DstSub) {
420 if (MI->isCopy()) {
421 Dst = MI->getOperand(0).getReg();
422 DstSub = MI->getOperand(0).getSubReg();
423 Src = MI->getOperand(1).getReg();
424 SrcSub = MI->getOperand(1).getSubReg();
425 } else if (MI->isSubregToReg()) {
426 Dst = MI->getOperand(0).getReg();
427 DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
428 MI->getOperand(3).getImm());
429 Src = MI->getOperand(2).getReg();
430 SrcSub = MI->getOperand(2).getSubReg();
431 } else
432 return false;
433 return true;
434}
435
436/// Return true if this block should be vacated by the coalescer to eliminate
437/// branches. The important cases to handle in the coalescer are critical edges
438/// split during phi elimination which contain only copies. Simple blocks that
439/// contain non-branches should also be vacated, but this can be handled by an
440/// earlier pass similar to early if-conversion.
441static bool isSplitEdge(const MachineBasicBlock *MBB) {
442 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
443 return false;
444
445 for (const auto &MI : *MBB) {
446 if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
447 return false;
448 }
449 return true;
450}
451
453 SrcReg = DstReg = Register();
454 SrcIdx = DstIdx = 0;
455 NewRC = nullptr;
456 Flipped = CrossClass = false;
457
458 Register Src, Dst;
459 unsigned SrcSub = 0, DstSub = 0;
460 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
461 return false;
462 Partial = SrcSub || DstSub;
463
464 // If one register is a physreg, it must be Dst.
465 if (Src.isPhysical()) {
466 if (Dst.isPhysical())
467 return false;
468 std::swap(Src, Dst);
469 std::swap(SrcSub, DstSub);
470 Flipped = true;
471 }
472
473 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
474
475 if (Dst.isPhysical()) {
476 // Eliminate DstSub on a physreg.
477 if (DstSub) {
478 Dst = TRI.getSubReg(Dst, DstSub);
479 if (!Dst) return false;
480 DstSub = 0;
481 }
482
483 // Eliminate SrcSub by picking a corresponding Dst superregister.
484 if (SrcSub) {
485 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
486 if (!Dst) return false;
487 } else if (!MRI.getRegClass(Src)->contains(Dst)) {
488 return false;
489 }
490 } else {
491 // Both registers are virtual.
492 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
493 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
494
495 // Both registers have subreg indices.
496 if (SrcSub && DstSub) {
497 // Copies between different sub-registers are never coalescable.
498 if (Src == Dst && SrcSub != DstSub)
499 return false;
500
501 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
502 SrcIdx, DstIdx);
503 if (!NewRC)
504 return false;
505 } else if (DstSub) {
506 // SrcReg will be merged with a sub-register of DstReg.
507 SrcIdx = DstSub;
508 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
509 } else if (SrcSub) {
510 // DstReg will be merged with a sub-register of SrcReg.
511 DstIdx = SrcSub;
512 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
513 } else {
514 // This is a straight copy without sub-registers.
515 NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
516 }
517
518 // The combined constraint may be impossible to satisfy.
519 if (!NewRC)
520 return false;
521
522 // Prefer SrcReg to be a sub-register of DstReg.
523 // FIXME: Coalescer should support subregs symmetrically.
524 if (DstIdx && !SrcIdx) {
525 std::swap(Src, Dst);
526 std::swap(SrcIdx, DstIdx);
527 Flipped = !Flipped;
528 }
529
530 CrossClass = NewRC != DstRC || NewRC != SrcRC;
531 }
532 // Check our invariants
533 assert(Src.isVirtual() && "Src must be virtual");
534 assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
535 SrcReg = Src;
536 DstReg = Dst;
537 return true;
538}
539
541 if (DstReg.isPhysical())
542 return false;
543 std::swap(SrcReg, DstReg);
544 std::swap(SrcIdx, DstIdx);
545 Flipped = !Flipped;
546 return true;
547}
548
550 if (!MI)
551 return false;
552 Register Src, Dst;
553 unsigned SrcSub = 0, DstSub = 0;
554 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
555 return false;
556
557 // Find the virtual register that is SrcReg.
558 if (Dst == SrcReg) {
559 std::swap(Src, Dst);
560 std::swap(SrcSub, DstSub);
561 } else if (Src != SrcReg) {
562 return false;
563 }
564
565 // Now check that Dst matches DstReg.
566 if (DstReg.isPhysical()) {
567 if (!Dst.isPhysical())
568 return false;
569 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
570 // DstSub could be set for a physreg from INSERT_SUBREG.
571 if (DstSub)
572 Dst = TRI.getSubReg(Dst, DstSub);
573 // Full copy of Src.
574 if (!SrcSub)
575 return DstReg == Dst;
576 // This is a partial register copy. Check that the parts match.
577 return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
578 } else {
579 // DstReg is virtual.
580 if (DstReg != Dst)
581 return false;
582 // Registers match, do the subregisters line up?
583 return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
584 TRI.composeSubRegIndices(DstIdx, DstSub);
585 }
586}
587
588void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
589 AU.setPreservesCFG();
598}
599
600void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
601 if (Edit) {
602 Edit->eliminateDeadDefs(DeadDefs);
603 return;
604 }
606 LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
607 nullptr, this).eliminateDeadDefs(DeadDefs);
608}
609
610void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
611 // MI may be in WorkList. Make sure we don't visit it.
612 ErasedInstrs.insert(MI);
613}
614
615bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
616 MachineInstr *CopyMI) {
617 assert(!CP.isPartial() && "This doesn't work for partial copies.");
618 assert(!CP.isPhys() && "This doesn't work for physreg copies.");
619
620 LiveInterval &IntA =
621 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
622 LiveInterval &IntB =
623 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
624 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
625
626 // We have a non-trivially-coalescable copy with IntA being the source and
627 // IntB being the dest, thus this defines a value number in IntB. If the
628 // source value number (in IntA) is defined by a copy from B, see if we can
629 // merge these two pieces of B into a single value number, eliminating a copy.
630 // For example:
631 //
632 // A3 = B0
633 // ...
634 // B1 = A3 <- this copy
635 //
636 // In this case, B0 can be extended to where the B1 copy lives, allowing the
637 // B1 value number to be replaced with B0 (which simplifies the B
638 // liveinterval).
639
640 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
641 // the example above.
643 if (BS == IntB.end()) return false;
644 VNInfo *BValNo = BS->valno;
645
646 // Get the location that B is defined at. Two options: either this value has
647 // an unknown definition point or it is defined at CopyIdx. If unknown, we
648 // can't process it.
649 if (BValNo->def != CopyIdx) return false;
650
651 // AValNo is the value number in A that defines the copy, A3 in the example.
652 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
653 LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
654 // The live segment might not exist after fun with physreg coalescing.
655 if (AS == IntA.end()) return false;
656 VNInfo *AValNo = AS->valno;
657
658 // If AValNo is defined as a copy from IntB, we can potentially process this.
659 // Get the instruction that defines this value number.
660 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
661 // Don't allow any partial copies, even if isCoalescable() allows them.
662 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
663 return false;
664
665 // Get the Segment in IntB that this value number starts with.
667 IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
668 if (ValS == IntB.end())
669 return false;
670
671 // Make sure that the end of the live segment is inside the same block as
672 // CopyMI.
673 MachineInstr *ValSEndInst =
674 LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
675 if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
676 return false;
677
678 // Okay, we now know that ValS ends in the same block that the CopyMI
679 // live-range starts. If there are no intervening live segments between them
680 // in IntB, we can merge them.
681 if (ValS+1 != BS) return false;
682
683 LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
684
685 SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
686 // We are about to delete CopyMI, so need to remove it as the 'instruction
687 // that defines this value #'. Update the valnum with the new defining
688 // instruction #.
689 BValNo->def = FillerStart;
690
691 // Okay, we can merge them. We need to insert a new liverange:
692 // [ValS.end, BS.begin) of either value number, then we merge the
693 // two value numbers.
694 IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
695
696 // Okay, merge "B1" into the same value number as "B0".
697 if (BValNo != ValS->valno)
698 IntB.MergeValueNumberInto(BValNo, ValS->valno);
699
700 // Do the same for the subregister segments.
701 for (LiveInterval::SubRange &S : IntB.subranges()) {
702 // Check for SubRange Segments of the form [1234r,1234d:0) which can be
703 // removed to prevent creating bogus SubRange Segments.
704 LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
705 if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
706 S.removeSegment(*SS, true);
707 continue;
708 }
709 // The subrange may have ended before FillerStart. If so, extend it.
710 if (!S.getVNInfoAt(FillerStart)) {
711 SlotIndex BBStart =
712 LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
713 S.extendInBlock(BBStart, FillerStart);
714 }
715 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
716 S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
717 VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
718 if (SubBValNo != SubValSNo)
719 S.MergeValueNumberInto(SubBValNo, SubValSNo);
720 }
721
722 LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
723
724 // If the source instruction was killing the source register before the
725 // merge, unset the isKill marker given the live range has been extended.
726 int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
727 if (UIdx != -1) {
728 ValSEndInst->getOperand(UIdx).setIsKill(false);
729 }
730
731 // Rewrite the copy.
732 CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
733 // If the copy instruction was killing the destination register or any
734 // subrange before the merge trim the live range.
735 bool RecomputeLiveRange = AS->end == CopyIdx;
736 if (!RecomputeLiveRange) {
737 for (LiveInterval::SubRange &S : IntA.subranges()) {
738 LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
739 if (SS != S.end() && SS->end == CopyIdx) {
740 RecomputeLiveRange = true;
741 break;
742 }
743 }
744 }
745 if (RecomputeLiveRange)
746 shrinkToUses(&IntA);
747
748 ++numExtends;
749 return true;
750}
751
752bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
753 LiveInterval &IntB,
754 VNInfo *AValNo,
755 VNInfo *BValNo) {
756 // If AValNo has PHI kills, conservatively assume that IntB defs can reach
757 // the PHI values.
758 if (LIS->hasPHIKill(IntA, AValNo))
759 return true;
760
761 for (LiveRange::Segment &ASeg : IntA.segments) {
762 if (ASeg.valno != AValNo) continue;
764 if (BI != IntB.begin())
765 --BI;
766 for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
767 if (BI->valno == BValNo)
768 continue;
769 if (BI->start <= ASeg.start && BI->end > ASeg.start)
770 return true;
771 if (BI->start > ASeg.start && BI->start < ASeg.end)
772 return true;
773 }
774 }
775 return false;
776}
777
778/// Copy segments with value number @p SrcValNo from liverange @p Src to live
779/// range @Dst and use value number @p DstValNo there.
780static std::pair<bool,bool>
781addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
782 const VNInfo *SrcValNo) {
783 bool Changed = false;
784 bool MergedWithDead = false;
785 for (const LiveRange::Segment &S : Src.segments) {
786 if (S.valno != SrcValNo)
787 continue;
788 // This is adding a segment from Src that ends in a copy that is about
789 // to be removed. This segment is going to be merged with a pre-existing
790 // segment in Dst. This works, except in cases when the corresponding
791 // segment in Dst is dead. For example: adding [192r,208r:1) from Src
792 // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
793 // Recognized such cases, so that the segments can be shrunk.
794 LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
795 LiveRange::Segment &Merged = *Dst.addSegment(Added);
796 if (Merged.end.isDead())
797 MergedWithDead = true;
798 Changed = true;
799 }
800 return std::make_pair(Changed, MergedWithDead);
801}
802
803std::pair<bool,bool>
804RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
805 MachineInstr *CopyMI) {
806 assert(!CP.isPhys());
807
808 LiveInterval &IntA =
809 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
810 LiveInterval &IntB =
811 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
812
813 // We found a non-trivially-coalescable copy with IntA being the source and
814 // IntB being the dest, thus this defines a value number in IntB. If the
815 // source value number (in IntA) is defined by a commutable instruction and
816 // its other operand is coalesced to the copy dest register, see if we can
817 // transform the copy into a noop by commuting the definition. For example,
818 //
819 // A3 = op A2 killed B0
820 // ...
821 // B1 = A3 <- this copy
822 // ...
823 // = op A3 <- more uses
824 //
825 // ==>
826 //
827 // B2 = op B0 killed A2
828 // ...
829 // B1 = B2 <- now an identity copy
830 // ...
831 // = op B2 <- more uses
832
833 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
834 // the example above.
835 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
836 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
837 assert(BValNo != nullptr && BValNo->def == CopyIdx);
838
839 // AValNo is the value number in A that defines the copy, A3 in the example.
840 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
841 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
842 if (AValNo->isPHIDef())
843 return { false, false };
845 if (!DefMI)
846 return { false, false };
847 if (!DefMI->isCommutable())
848 return { false, false };
849 // If DefMI is a two-address instruction then commuting it will change the
850 // destination register.
851 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
852 assert(DefIdx != -1);
853 unsigned UseOpIdx;
854 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
855 return { false, false };
856
857 // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
858 // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
859 // passed to the method. That _other_ operand is chosen by
860 // the findCommutedOpIndices() method.
861 //
862 // That is obviously an area for improvement in case of instructions having
863 // more than 2 operands. For example, if some instruction has 3 commutable
864 // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
865 // op#2<->op#3) of commute transformation should be considered/tried here.
866 unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
867 if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
868 return { false, false };
869
870 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
871 Register NewReg = NewDstMO.getReg();
872 if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
873 return { false, false };
874
875 // Make sure there are no other definitions of IntB that would reach the
876 // uses which the new definition can reach.
877 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
878 return { false, false };
879
880 // If some of the uses of IntA.reg is already coalesced away, return false.
881 // It's not possible to determine whether it's safe to perform the coalescing.
882 for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
883 MachineInstr *UseMI = MO.getParent();
884 unsigned OpNo = &MO - &UseMI->getOperand(0);
885 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
887 if (US == IntA.end() || US->valno != AValNo)
888 continue;
889 // If this use is tied to a def, we can't rewrite the register.
890 if (UseMI->isRegTiedToDefOperand(OpNo))
891 return { false, false };
892 }
893
894 LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
895 << *DefMI);
896
897 // At this point we have decided that it is legal to do this
898 // transformation. Start by commuting the instruction.
900 MachineInstr *NewMI =
901 TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
902 if (!NewMI)
903 return { false, false };
904 if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
905 !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
906 return { false, false };
907 if (NewMI != DefMI) {
908 LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
910 MBB->insert(Pos, NewMI);
911 MBB->erase(DefMI);
912 }
913
914 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
915 // A = or A, B
916 // ...
917 // B = A
918 // ...
919 // C = killed A
920 // ...
921 // = B
922
923 // Update uses of IntA of the specific Val# with IntB.
924 for (MachineOperand &UseMO :
925 llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
926 if (UseMO.isUndef())
927 continue;
928 MachineInstr *UseMI = UseMO.getParent();
929 if (UseMI->isDebugInstr()) {
930 // FIXME These don't have an instruction index. Not clear we have enough
931 // info to decide whether to do this replacement or not. For now do it.
932 UseMO.setReg(NewReg);
933 continue;
934 }
935 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
937 assert(US != IntA.end() && "Use must be live");
938 if (US->valno != AValNo)
939 continue;
940 // Kill flags are no longer accurate. They are recomputed after RA.
941 UseMO.setIsKill(false);
942 if (NewReg.isPhysical())
943 UseMO.substPhysReg(NewReg, *TRI);
944 else
945 UseMO.setReg(NewReg);
946 if (UseMI == CopyMI)
947 continue;
948 if (!UseMI->isCopy())
949 continue;
950 if (UseMI->getOperand(0).getReg() != IntB.reg() ||
952 continue;
953
954 // This copy will become a noop. If it's defining a new val#, merge it into
955 // BValNo.
956 SlotIndex DefIdx = UseIdx.getRegSlot();
957 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
958 if (!DVNI)
959 continue;
960 LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
961 assert(DVNI->def == DefIdx);
962 BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
963 for (LiveInterval::SubRange &S : IntB.subranges()) {
964 VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
965 if (!SubDVNI)
966 continue;
967 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
968 assert(SubBValNo->def == CopyIdx);
969 S.MergeValueNumberInto(SubDVNI, SubBValNo);
970 }
971
972 deleteInstr(UseMI);
973 }
974
975 // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
976 // is updated.
977 bool ShrinkB = false;
979 if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
980 if (!IntA.hasSubRanges()) {
981 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
982 IntA.createSubRangeFrom(Allocator, Mask, IntA);
983 } else if (!IntB.hasSubRanges()) {
984 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
985 IntB.createSubRangeFrom(Allocator, Mask, IntB);
986 }
987 SlotIndex AIdx = CopyIdx.getRegSlot(true);
988 LaneBitmask MaskA;
989 const SlotIndexes &Indexes = *LIS->getSlotIndexes();
990 for (LiveInterval::SubRange &SA : IntA.subranges()) {
991 VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
992 // Even if we are dealing with a full copy, some lanes can
993 // still be undefined.
994 // E.g.,
995 // undef A.subLow = ...
996 // B = COPY A <== A.subHigh is undefined here and does
997 // not have a value number.
998 if (!ASubValNo)
999 continue;
1000 MaskA |= SA.LaneMask;
1001
1002 IntB.refineSubRanges(
1003 Allocator, SA.LaneMask,
1004 [&Allocator, &SA, CopyIdx, ASubValNo,
1005 &ShrinkB](LiveInterval::SubRange &SR) {
1006 VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1007 : SR.getVNInfoAt(CopyIdx);
1008 assert(BSubValNo != nullptr);
1009 auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1010 ShrinkB |= P.second;
1011 if (P.first)
1012 BSubValNo->def = ASubValNo->def;
1013 },
1014 Indexes, *TRI);
1015 }
1016 // Go over all subranges of IntB that have not been covered by IntA,
1017 // and delete the segments starting at CopyIdx. This can happen if
1018 // IntA has undef lanes that are defined in IntB.
1019 for (LiveInterval::SubRange &SB : IntB.subranges()) {
1020 if ((SB.LaneMask & MaskA).any())
1021 continue;
1022 if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1023 if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1024 SB.removeSegment(*S, true);
1025 }
1026 }
1027
1028 BValNo->def = AValNo->def;
1029 auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1030 ShrinkB |= P.second;
1031 LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1032
1033 LIS->removeVRegDefAt(IntA, AValNo->def);
1034
1035 LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
1036 ++numCommutes;
1037 return { true, ShrinkB };
1038}
1039
1040/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1041/// predecessor of BB2, and if B is not redefined on the way from A = B
1042/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1043/// execution goes through the path from BB0 to BB2. We may move B = A
1044/// to the predecessor without such reversed copy.
1045/// So we will transform the program from:
1046/// BB0:
1047/// A = B; BB1:
1048/// ... ...
1049/// / \ /
1050/// BB2:
1051/// ...
1052/// B = A;
1053///
1054/// to:
1055///
1056/// BB0: BB1:
1057/// A = B; ...
1058/// ... B = A;
1059/// / \ /
1060/// BB2:
1061/// ...
1062///
1063/// A special case is when BB0 and BB2 are the same BB which is the only
1064/// BB in a loop:
1065/// BB1:
1066/// ...
1067/// BB0/BB2: ----
1068/// B = A; |
1069/// ... |
1070/// A = B; |
1071/// |-------
1072/// |
1073/// We may hoist B = A from BB0/BB2 to BB1.
1074///
1075/// The major preconditions for correctness to remove such partial
1076/// redundancy include:
1077/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1078/// the PHI is defined by the reversed copy A = B in BB0.
1079/// 2. No B is referenced from the start of BB2 to B = A.
1080/// 3. No B is defined from A = B to the end of BB0.
1081/// 4. BB1 has only one successor.
1082///
1083/// 2 and 4 implicitly ensure B is not live at the end of BB1.
1084/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1085/// colder place, which not only prevent endless loop, but also make sure
1086/// the movement of copy is beneficial.
1087bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1088 MachineInstr &CopyMI) {
1089 assert(!CP.isPhys());
1090 if (!CopyMI.isFullCopy())
1091 return false;
1092
1093 MachineBasicBlock &MBB = *CopyMI.getParent();
1094 // If this block is the target of an invoke/inlineasm_br, moving the copy into
1095 // the predecessor is tricker, and we don't handle it.
1097 return false;
1098
1099 if (MBB.pred_size() != 2)
1100 return false;
1101
1102 LiveInterval &IntA =
1103 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1104 LiveInterval &IntB =
1105 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1106
1107 // A is defined by PHI at the entry of MBB.
1108 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1109 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1110 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1111 if (!AValNo->isPHIDef())
1112 return false;
1113
1114 // No B is referenced before CopyMI in MBB.
1115 if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1116 return false;
1117
1118 // MBB has two predecessors: one contains A = B so no copy will be inserted
1119 // for it. The other one will have a copy moved from MBB.
1120 bool FoundReverseCopy = false;
1121 MachineBasicBlock *CopyLeftBB = nullptr;
1122 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1123 VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1125 if (!DefMI || !DefMI->isFullCopy()) {
1126 CopyLeftBB = Pred;
1127 continue;
1128 }
1129 // Check DefMI is a reverse copy and it is in BB Pred.
1130 if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1131 DefMI->getOperand(1).getReg() != IntB.reg() ||
1132 DefMI->getParent() != Pred) {
1133 CopyLeftBB = Pred;
1134 continue;
1135 }
1136 // If there is any other def of B after DefMI and before the end of Pred,
1137 // we need to keep the copy of B = A at the end of Pred if we remove
1138 // B = A from MBB.
1139 bool ValB_Changed = false;
1140 for (auto *VNI : IntB.valnos) {
1141 if (VNI->isUnused())
1142 continue;
1143 if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1144 ValB_Changed = true;
1145 break;
1146 }
1147 }
1148 if (ValB_Changed) {
1149 CopyLeftBB = Pred;
1150 continue;
1151 }
1152 FoundReverseCopy = true;
1153 }
1154
1155 // If no reverse copy is found in predecessors, nothing to do.
1156 if (!FoundReverseCopy)
1157 return false;
1158
1159 // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1160 // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1161 // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1162 // update IntA/IntB.
1163 //
1164 // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1165 // MBB is hotter than CopyLeftBB.
1166 if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1167 return false;
1168
1169 // Now (almost sure it's) ok to move copy.
1170 if (CopyLeftBB) {
1171 // Position in CopyLeftBB where we should insert new copy.
1172 auto InsPos = CopyLeftBB->getFirstTerminator();
1173
1174 // Make sure that B isn't referenced in the terminators (if any) at the end
1175 // of the predecessor since we're about to insert a new definition of B
1176 // before them.
1177 if (InsPos != CopyLeftBB->end()) {
1178 SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1179 if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1180 return false;
1181 }
1182
1183 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1184 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1185
1186 // Insert new copy to CopyLeftBB.
1187 MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1188 TII->get(TargetOpcode::COPY), IntB.reg())
1189 .addReg(IntA.reg());
1190 SlotIndex NewCopyIdx =
1191 LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1192 IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1193 for (LiveInterval::SubRange &SR : IntB.subranges())
1194 SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1195
1196 // If the newly created Instruction has an address of an instruction that was
1197 // deleted before (object recycled by the allocator) it needs to be removed from
1198 // the deleted list.
1199 ErasedInstrs.erase(NewCopyMI);
1200 } else {
1201 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1202 << printMBBReference(MBB) << '\t' << CopyMI);
1203 }
1204
1205 const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
1206
1207 // Remove CopyMI.
1208 // Note: This is fine to remove the copy before updating the live-ranges.
1209 // While updating the live-ranges, we only look at slot indices and
1210 // never go back to the instruction.
1211 // Mark instructions as deleted.
1212 deleteInstr(&CopyMI);
1213
1214 // Update the liveness.
1215 SmallVector<SlotIndex, 8> EndPoints;
1216 VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1217 LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1218 &EndPoints);
1219 BValNo->markUnused();
1220
1221 if (IsUndefCopy) {
1222 // We're introducing an undef phi def, and need to set undef on any users of
1223 // the previously local def to avoid artifically extending the lifetime
1224 // through the block.
1225 for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
1226 const MachineInstr &MI = *MO.getParent();
1227 SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1228 if (!IntB.liveAt(UseIdx))
1229 MO.setIsUndef(true);
1230 }
1231 }
1232
1233 // Extend IntB to the EndPoints of its original live interval.
1234 LIS->extendToIndices(IntB, EndPoints);
1235
1236 // Now, do the same for its subranges.
1237 for (LiveInterval::SubRange &SR : IntB.subranges()) {
1238 EndPoints.clear();
1239 VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1240 assert(BValNo && "All sublanes should be live");
1241 LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1242 BValNo->markUnused();
1243 // We can have a situation where the result of the original copy is live,
1244 // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1245 // the copy appear as an endpoint from pruneValue(), but we don't want it
1246 // to because the copy has been removed. We can go ahead and remove that
1247 // endpoint; there is no other situation here that there could be a use at
1248 // the same place as we know that the copy is a full copy.
1249 for (unsigned I = 0; I != EndPoints.size(); ) {
1250 if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1251 EndPoints[I] = EndPoints.back();
1252 EndPoints.pop_back();
1253 continue;
1254 }
1255 ++I;
1256 }
1258 IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1259 *LIS->getSlotIndexes());
1260 LIS->extendToIndices(SR, EndPoints, Undefs);
1261 }
1262 // If any dead defs were extended, truncate them.
1263 shrinkToUses(&IntB);
1264
1265 // Finally, update the live-range of IntA.
1266 shrinkToUses(&IntA);
1267 return true;
1268}
1269
1270/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1271/// defining a subregister.
1272static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1273 assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1274
1275 for (const MachineOperand &Op : MI.all_defs()) {
1276 if (Op.getReg() != Reg)
1277 continue;
1278 // Return true if we define the full register or don't care about the value
1279 // inside other subregisters.
1280 if (Op.getSubReg() == 0 || Op.isUndef())
1281 return true;
1282 }
1283 return false;
1284}
1285
1286bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1287 MachineInstr *CopyMI,
1288 bool &IsDefCopy) {
1289 IsDefCopy = false;
1290 Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1291 unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1292 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1293 unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1294 if (SrcReg.isPhysical())
1295 return false;
1296
1297 LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1298 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1299 VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1300 if (!ValNo)
1301 return false;
1302 if (ValNo->isPHIDef() || ValNo->isUnused())
1303 return false;
1305 if (!DefMI)
1306 return false;
1307 if (DefMI->isCopyLike()) {
1308 IsDefCopy = true;
1309 return false;
1310 }
1311 if (!TII->isAsCheapAsAMove(*DefMI))
1312 return false;
1313
1315 LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1316 if (!Edit.checkRematerializable(ValNo, DefMI))
1317 return false;
1318
1319 if (!definesFullReg(*DefMI, SrcReg))
1320 return false;
1321 bool SawStore = false;
1322 if (!DefMI->isSafeToMove(AA, SawStore))
1323 return false;
1324 const MCInstrDesc &MCID = DefMI->getDesc();
1325 if (MCID.getNumDefs() != 1)
1326 return false;
1327 // Only support subregister destinations when the def is read-undef.
1328 MachineOperand &DstOperand = CopyMI->getOperand(0);
1329 Register CopyDstReg = DstOperand.getReg();
1330 if (DstOperand.getSubReg() && !DstOperand.isUndef())
1331 return false;
1332
1333 // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1334 // the register substantially (beyond both source and dest size). This is bad
1335 // for performance since it can cascade through a function, introducing many
1336 // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1337 // around after a few subreg copies).
1338 if (SrcIdx && DstIdx)
1339 return false;
1340
1341 [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
1342 const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1343 if (!DefMI->isImplicitDef()) {
1344 if (DstReg.isPhysical()) {
1345 Register NewDstReg = DstReg;
1346
1347 unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1348 DefMI->getOperand(0).getSubReg());
1349 if (NewDstIdx)
1350 NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1351
1352 // Finally, make sure that the physical subregister that will be
1353 // constructed later is permitted for the instruction.
1354 if (!DefRC->contains(NewDstReg))
1355 return false;
1356 } else {
1357 // Theoretically, some stack frame reference could exist. Just make sure
1358 // it hasn't actually happened.
1359 assert(DstReg.isVirtual() &&
1360 "Only expect to deal with virtual or physical registers");
1361 }
1362 }
1363
1364 LiveRangeEdit::Remat RM(ValNo);
1365 RM.OrigMI = DefMI;
1366 if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1367 return false;
1368
1369 DebugLoc DL = CopyMI->getDebugLoc();
1370 MachineBasicBlock *MBB = CopyMI->getParent();
1372 std::next(MachineBasicBlock::iterator(CopyMI));
1373 Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1374 MachineInstr &NewMI = *std::prev(MII);
1375 NewMI.setDebugLoc(DL);
1376
1377 // In a situation like the following:
1378 // %0:subreg = instr ; DefMI, subreg = DstIdx
1379 // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
1380 // instead of widening %1 to the register class of %0 simply do:
1381 // %1 = instr
1382 const TargetRegisterClass *NewRC = CP.getNewRC();
1383 if (DstIdx != 0) {
1384 MachineOperand &DefMO = NewMI.getOperand(0);
1385 if (DefMO.getSubReg() == DstIdx) {
1386 assert(SrcIdx == 0 && CP.isFlipped()
1387 && "Shouldn't have SrcIdx+DstIdx at this point");
1388 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1389 const TargetRegisterClass *CommonRC =
1390 TRI->getCommonSubClass(DefRC, DstRC);
1391 if (CommonRC != nullptr) {
1392 NewRC = CommonRC;
1393
1394 // Instruction might contain "undef %0:subreg" as use operand:
1395 // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1396 //
1397 // Need to check all operands.
1398 for (MachineOperand &MO : NewMI.operands()) {
1399 if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1400 MO.setSubReg(0);
1401 }
1402 }
1403
1404 DstIdx = 0;
1405 DefMO.setIsUndef(false); // Only subregs can have def+undef.
1406 }
1407 }
1408 }
1409
1410 // CopyMI may have implicit operands, save them so that we can transfer them
1411 // over to the newly materialized instruction after CopyMI is removed.
1413 ImplicitOps.reserve(CopyMI->getNumOperands() -
1414 CopyMI->getDesc().getNumOperands());
1415 for (unsigned I = CopyMI->getDesc().getNumOperands(),
1416 E = CopyMI->getNumOperands();
1417 I != E; ++I) {
1418 MachineOperand &MO = CopyMI->getOperand(I);
1419 if (MO.isReg()) {
1420 assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1421 assert((MO.getReg().isPhysical() ||
1422 (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
1423 "unexpected implicit virtual register def");
1424 ImplicitOps.push_back(MO);
1425 }
1426 }
1427
1428 CopyMI->eraseFromParent();
1429 ErasedInstrs.insert(CopyMI);
1430
1431 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1432 // We need to remember these so we can add intervals once we insert
1433 // NewMI into SlotIndexes.
1434 //
1435 // We also expect to have tied implicit-defs of super registers originating
1436 // from SUBREG_TO_REG, such as:
1437 // $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
1438 // undef %0.sub_32bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
1439 //
1440 // The implicit-def of the super register may have been reduced to
1441 // subregisters depending on the uses.
1442
1443 bool NewMIDefinesFullReg = false;
1444
1445 SmallVector<MCRegister, 4> NewMIImplDefs;
1446 for (unsigned i = NewMI.getDesc().getNumOperands(),
1447 e = NewMI.getNumOperands();
1448 i != e; ++i) {
1449 MachineOperand &MO = NewMI.getOperand(i);
1450 if (MO.isReg() && MO.isDef()) {
1451 assert(MO.isImplicit());
1452 if (MO.getReg().isPhysical()) {
1453 if (MO.getReg() == DstReg)
1454 NewMIDefinesFullReg = true;
1455
1456 assert(MO.isImplicit() && MO.getReg().isPhysical() &&
1457 (MO.isDead() ||
1458 (DefSubIdx &&
1459 ((TRI->getSubReg(MO.getReg(), DefSubIdx) ==
1460 MCRegister((unsigned)NewMI.getOperand(0).getReg())) ||
1461 TRI->isSubRegisterEq(NewMI.getOperand(0).getReg(),
1462 MO.getReg())))));
1463 NewMIImplDefs.push_back(MO.getReg().asMCReg());
1464 } else {
1465 assert(MO.getReg() == NewMI.getOperand(0).getReg());
1466
1467 // We're only expecting another def of the main output, so the range
1468 // should get updated with the regular output range.
1469 //
1470 // FIXME: The range updating below probably needs updating to look at
1471 // the super register if subranges are tracked.
1472 assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1473 "subrange update for implicit-def of super register may not be "
1474 "properly handled");
1475 }
1476 }
1477 }
1478
1479 if (DstReg.isVirtual()) {
1480 unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1481
1482 if (DefRC != nullptr) {
1483 if (NewIdx)
1484 NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1485 else
1486 NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1487 assert(NewRC && "subreg chosen for remat incompatible with instruction");
1488 }
1489 // Remap subranges to new lanemask and change register class.
1490 LiveInterval &DstInt = LIS->getInterval(DstReg);
1491 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1492 SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1493 }
1494 MRI->setRegClass(DstReg, NewRC);
1495
1496 // Update machine operands and add flags.
1497 updateRegDefsUses(DstReg, DstReg, DstIdx);
1498 NewMI.getOperand(0).setSubReg(NewIdx);
1499 // updateRegDefUses can add an "undef" flag to the definition, since
1500 // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1501 // sure that "undef" is not set.
1502 if (NewIdx == 0)
1503 NewMI.getOperand(0).setIsUndef(false);
1504 // Add dead subregister definitions if we are defining the whole register
1505 // but only part of it is live.
1506 // This could happen if the rematerialization instruction is rematerializing
1507 // more than actually is used in the register.
1508 // An example would be:
1509 // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1510 // ; Copying only part of the register here, but the rest is undef.
1511 // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1512 // ==>
1513 // ; Materialize all the constants but only using one
1514 // %2 = LOAD_CONSTANTS 5, 8
1515 //
1516 // at this point for the part that wasn't defined before we could have
1517 // subranges missing the definition.
1518 if (NewIdx == 0 && DstInt.hasSubRanges()) {
1519 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1520 SlotIndex DefIndex =
1521 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1522 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1524 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1525 if (!SR.liveAt(DefIndex))
1526 SR.createDeadDef(DefIndex, Alloc);
1527 MaxMask &= ~SR.LaneMask;
1528 }
1529 if (MaxMask.any()) {
1530 LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1531 SR->createDeadDef(DefIndex, Alloc);
1532 }
1533 }
1534
1535 // Make sure that the subrange for resultant undef is removed
1536 // For example:
1537 // %1:sub1<def,read-undef> = LOAD CONSTANT 1
1538 // %2 = COPY %1
1539 // ==>
1540 // %2:sub1<def, read-undef> = LOAD CONSTANT 1
1541 // ; Correct but need to remove the subrange for %2:sub0
1542 // ; as it is now undef
1543 if (NewIdx != 0 && DstInt.hasSubRanges()) {
1544 // The affected subregister segments can be removed.
1545 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1546 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1547 bool UpdatedSubRanges = false;
1548 SlotIndex DefIndex =
1549 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1551 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1552 if ((SR.LaneMask & DstMask).none()) {
1554 << "Removing undefined SubRange "
1555 << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1556
1557 if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1558 // VNI is in ValNo - remove any segments in this SubRange that have
1559 // this ValNo
1560 SR.removeValNo(RmValNo);
1561 }
1562
1563 // We may not have a defined value at this point, but still need to
1564 // clear out any empty subranges tentatively created by
1565 // updateRegDefUses. The original subrange def may have only undefed
1566 // some lanes.
1567 UpdatedSubRanges = true;
1568 } else {
1569 // We know that this lane is defined by this instruction,
1570 // but at this point it may be empty because it is not used by
1571 // anything. This happens when updateRegDefUses adds the missing
1572 // lanes. Assign that lane a dead def so that the interferences
1573 // are properly modeled.
1574 if (SR.empty())
1575 SR.createDeadDef(DefIndex, Alloc);
1576 }
1577 }
1578 if (UpdatedSubRanges)
1579 DstInt.removeEmptySubRanges();
1580 }
1581 } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1582 // The New instruction may be defining a sub-register of what's actually
1583 // been asked for. If so it must implicitly define the whole thing.
1584 assert(DstReg.isPhysical() &&
1585 "Only expect virtual or physical registers in remat");
1586 NewMI.getOperand(0).setIsDead(true);
1587
1588 if (!NewMIDefinesFullReg) {
1590 CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1591 }
1592
1593 // Record small dead def live-ranges for all the subregisters
1594 // of the destination register.
1595 // Otherwise, variables that live through may miss some
1596 // interferences, thus creating invalid allocation.
1597 // E.g., i386 code:
1598 // %1 = somedef ; %1 GR8
1599 // %2 = remat ; %2 GR32
1600 // CL = COPY %2.sub_8bit
1601 // = somedef %1 ; %1 GR8
1602 // =>
1603 // %1 = somedef ; %1 GR8
1604 // dead ECX = remat ; implicit-def CL
1605 // = somedef %1 ; %1 GR8
1606 // %1 will see the interferences with CL but not with CH since
1607 // no live-ranges would have been created for ECX.
1608 // Fix that!
1609 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1610 for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
1611 if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1612 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1613 }
1614
1615 NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
1616
1617 // Transfer over implicit operands to the rematerialized instruction.
1618 for (MachineOperand &MO : ImplicitOps)
1619 NewMI.addOperand(MO);
1620
1621 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1622 for (MCRegister Reg : NewMIImplDefs) {
1623 for (MCRegUnit Unit : TRI->regunits(Reg))
1624 if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1625 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1626 }
1627
1628 LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1629 ++NumReMats;
1630
1631 // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1632 // to describe DstReg instead.
1633 if (MRI->use_nodbg_empty(SrcReg)) {
1634 for (MachineOperand &UseMO :
1635 llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1636 MachineInstr *UseMI = UseMO.getParent();
1637 if (UseMI->isDebugInstr()) {
1638 if (DstReg.isPhysical())
1639 UseMO.substPhysReg(DstReg, *TRI);
1640 else
1641 UseMO.setReg(DstReg);
1642 // Move the debug value directly after the def of the rematerialized
1643 // value in DstReg.
1644 MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1645 LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1646 }
1647 }
1648 }
1649
1650 if (ToBeUpdated.count(SrcReg))
1651 return true;
1652
1653 unsigned NumCopyUses = 0;
1654 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1655 if (UseMO.getParent()->isCopyLike())
1656 NumCopyUses++;
1657 }
1658 if (NumCopyUses < LateRematUpdateThreshold) {
1659 // The source interval can become smaller because we removed a use.
1660 shrinkToUses(&SrcInt, &DeadDefs);
1661 if (!DeadDefs.empty())
1662 eliminateDeadDefs(&Edit);
1663 } else {
1664 ToBeUpdated.insert(SrcReg);
1665 }
1666 return true;
1667}
1668
1669MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1670 // ProcessImplicitDefs may leave some copies of <undef> values, it only
1671 // removes local variables. When we have a copy like:
1672 //
1673 // %1 = COPY undef %2
1674 //
1675 // We delete the copy and remove the corresponding value number from %1.
1676 // Any uses of that value number are marked as <undef>.
1677
1678 // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1679 // CoalescerPair may have a new register class with adjusted subreg indices
1680 // at this point.
1681 Register SrcReg, DstReg;
1682 unsigned SrcSubIdx = 0, DstSubIdx = 0;
1683 if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1684 return nullptr;
1685
1686 SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1687 const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1688 // CopyMI is undef iff SrcReg is not live before the instruction.
1689 if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1690 LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1691 for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1692 if ((SR.LaneMask & SrcMask).none())
1693 continue;
1694 if (SR.liveAt(Idx))
1695 return nullptr;
1696 }
1697 } else if (SrcLI.liveAt(Idx))
1698 return nullptr;
1699
1700 // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1701 // then replace it with an IMPLICIT_DEF.
1702 LiveInterval &DstLI = LIS->getInterval(DstReg);
1703 SlotIndex RegIndex = Idx.getRegSlot();
1704 LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1705 assert(Seg != nullptr && "No segment for defining instruction");
1706 VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1707
1708 // The source interval may also have been on an undef use, in which case the
1709 // copy introduced a live value.
1710 if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1711 for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1712 MachineOperand &MO = CopyMI->getOperand(i-1);
1713 if (MO.isReg()) {
1714 if (MO.isUse())
1715 CopyMI->removeOperand(i - 1);
1716 } else {
1717 assert(MO.isImm() &&
1718 CopyMI->getOpcode() == TargetOpcode::SUBREG_TO_REG);
1719 CopyMI->removeOperand(i-1);
1720 }
1721 }
1722
1723 CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1724 LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1725 "implicit def\n");
1726 return CopyMI;
1727 }
1728
1729 // Remove any DstReg segments starting at the instruction.
1730 LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1731
1732 // Remove value or merge with previous one in case of a subregister def.
1733 if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1734 VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1735 DstLI.MergeValueNumberInto(VNI, PrevVNI);
1736
1737 // The affected subregister segments can be removed.
1738 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1739 for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1740 if ((SR.LaneMask & DstMask).none())
1741 continue;
1742
1743 VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1744 assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1745 SR.removeValNo(SVNI);
1746 }
1747 DstLI.removeEmptySubRanges();
1748 } else
1749 LIS->removeVRegDefAt(DstLI, RegIndex);
1750
1751 // Mark uses as undef.
1752 for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1753 if (MO.isDef() /*|| MO.isUndef()*/)
1754 continue;
1755 const MachineInstr &MI = *MO.getParent();
1756 SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1757 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1758 bool isLive;
1759 if (!UseMask.all() && DstLI.hasSubRanges()) {
1760 isLive = false;
1761 for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1762 if ((SR.LaneMask & UseMask).none())
1763 continue;
1764 if (SR.liveAt(UseIdx)) {
1765 isLive = true;
1766 break;
1767 }
1768 }
1769 } else
1770 isLive = DstLI.liveAt(UseIdx);
1771 if (isLive)
1772 continue;
1773 MO.setIsUndef(true);
1774 LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1775 }
1776
1777 // A def of a subregister may be a use of the other subregisters, so
1778 // deleting a def of a subregister may also remove uses. Since CopyMI
1779 // is still part of the function (but about to be erased), mark all
1780 // defs of DstReg in it as <undef>, so that shrinkToUses would
1781 // ignore them.
1782 for (MachineOperand &MO : CopyMI->all_defs())
1783 if (MO.getReg() == DstReg)
1784 MO.setIsUndef(true);
1785 LIS->shrinkToUses(&DstLI);
1786
1787 return CopyMI;
1788}
1789
1790void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1791 MachineOperand &MO, unsigned SubRegIdx) {
1792 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1793 if (MO.isDef())
1794 Mask = ~Mask;
1795 bool IsUndef = true;
1796 for (const LiveInterval::SubRange &S : Int.subranges()) {
1797 if ((S.LaneMask & Mask).none())
1798 continue;
1799 if (S.liveAt(UseIdx)) {
1800 IsUndef = false;
1801 break;
1802 }
1803 }
1804 if (IsUndef) {
1805 MO.setIsUndef(true);
1806 // We found out some subregister use is actually reading an undefined
1807 // value. In some cases the whole vreg has become undefined at this
1808 // point so we have to potentially shrink the main range if the
1809 // use was ending a live segment there.
1810 LiveQueryResult Q = Int.Query(UseIdx);
1811 if (Q.valueOut() == nullptr)
1812 ShrinkMainRange = true;
1813 }
1814}
1815
1816void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1817 unsigned SubIdx) {
1818 bool DstIsPhys = DstReg.isPhysical();
1819 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1820
1821 if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1822 for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1823 unsigned SubReg = MO.getSubReg();
1824 if (SubReg == 0 || MO.isUndef())
1825 continue;
1826 MachineInstr &MI = *MO.getParent();
1827 if (MI.isDebugInstr())
1828 continue;
1829 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1830 addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1831 }
1832 }
1833
1836 I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1837 I != E; ) {
1838 MachineInstr *UseMI = &*(I++);
1839
1840 // Each instruction can only be rewritten once because sub-register
1841 // composition is not always idempotent. When SrcReg != DstReg, rewriting
1842 // the UseMI operands removes them from the SrcReg use-def chain, but when
1843 // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1844 // operands mentioning the virtual register.
1845 if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1846 continue;
1847
1849 bool Reads, Writes;
1850 std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1851
1852 // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1853 // because SrcReg is a sub-register.
1854 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1855 Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1856
1857 // Replace SrcReg with DstReg in all UseMI operands.
1858 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1859 MachineOperand &MO = UseMI->getOperand(Ops[i]);
1860
1861 // Adjust <undef> flags in case of sub-register joins. We don't want to
1862 // turn a full def into a read-modify-write sub-register def and vice
1863 // versa.
1864 if (SubIdx && MO.isDef())
1865 MO.setIsUndef(!Reads);
1866
1867 // A subreg use of a partially undef (super) register may be a complete
1868 // undef use now and then has to be marked that way.
1869 if (MO.isUse() && !DstIsPhys) {
1870 unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1871 if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1872 if (!DstInt->hasSubRanges()) {
1874 LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1875 LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1876 LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1877 DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1878 // The unused lanes are just empty live-ranges at this point.
1879 // It is the caller responsibility to set the proper
1880 // dead segments if there is an actual dead def of the
1881 // unused lanes. This may happen with rematerialization.
1882 DstInt->createSubRange(Allocator, UnusedLanes);
1883 }
1884 SlotIndex MIIdx = UseMI->isDebugInstr()
1886 : LIS->getInstructionIndex(*UseMI);
1887 SlotIndex UseIdx = MIIdx.getRegSlot(true);
1888 addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1889 }
1890 }
1891
1892 if (DstIsPhys)
1893 MO.substPhysReg(DstReg, *TRI);
1894 else
1895 MO.substVirtReg(DstReg, SubIdx, *TRI);
1896 }
1897
1898 LLVM_DEBUG({
1899 dbgs() << "\t\tupdated: ";
1900 if (!UseMI->isDebugInstr())
1901 dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1902 dbgs() << *UseMI;
1903 });
1904 }
1905}
1906
1907bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1908 // Always join simple intervals that are defined by a single copy from a
1909 // reserved register. This doesn't increase register pressure, so it is
1910 // always beneficial.
1911 if (!MRI->isReserved(CP.getDstReg())) {
1912 LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1913 return false;
1914 }
1915
1916 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1917 if (JoinVInt.containsOneValue())
1918 return true;
1919
1920 LLVM_DEBUG(
1921 dbgs() << "\tCannot join complex intervals into reserved register.\n");
1922 return false;
1923}
1924
1925bool RegisterCoalescer::copyValueUndefInPredecessors(
1927 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1928 SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1929 if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1930 // If this is a self loop, we may be reading the same value.
1931 if (V->id != SLRQ.valueOutOrDead()->id)
1932 return false;
1933 }
1934 }
1935
1936 return true;
1937}
1938
1939void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1940 Register Reg,
1941 LaneBitmask PrunedLanes) {
1942 // If we had other instructions in the segment reading the undef sublane
1943 // value, we need to mark them with undef.
1944 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1945 unsigned SubRegIdx = MO.getSubReg();
1946 if (SubRegIdx == 0 || MO.isUndef())
1947 continue;
1948
1949 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1950 SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
1951 for (LiveInterval::SubRange &S : LI.subranges()) {
1952 if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
1953 MO.setIsUndef();
1954 break;
1955 }
1956 }
1957 }
1958
1960
1961 // A def of a subregister may be a use of other register lanes. Replacing
1962 // such a def with a def of a different register will eliminate the use,
1963 // and may cause the recorded live range to be larger than the actual
1964 // liveness in the program IR.
1965 LIS->shrinkToUses(&LI);
1966}
1967
1968bool RegisterCoalescer::joinCopy(
1969 MachineInstr *CopyMI, bool &Again,
1970 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
1971 Again = false;
1972 LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1973
1975 if (!CP.setRegisters(CopyMI)) {
1976 LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1977 return false;
1978 }
1979
1980 if (CP.getNewRC()) {
1981 auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1982 auto DstRC = MRI->getRegClass(CP.getDstReg());
1983 unsigned SrcIdx = CP.getSrcIdx();
1984 unsigned DstIdx = CP.getDstIdx();
1985 if (CP.isFlipped()) {
1986 std::swap(SrcIdx, DstIdx);
1987 std::swap(SrcRC, DstRC);
1988 }
1989 if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1990 CP.getNewRC(), *LIS)) {
1991 LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1992 return false;
1993 }
1994 }
1995
1996 // Dead code elimination. This really should be handled by MachineDCE, but
1997 // sometimes dead copies slip through, and we can't generate invalid live
1998 // ranges.
1999 if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
2000 LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
2001 DeadDefs.push_back(CopyMI);
2002 eliminateDeadDefs();
2003 return true;
2004 }
2005
2006 // Eliminate undefs.
2007 if (!CP.isPhys()) {
2008 // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
2009 if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
2010 if (UndefMI->isImplicitDef())
2011 return false;
2012 deleteInstr(CopyMI);
2013 return false; // Not coalescable.
2014 }
2015 }
2016
2017 // Coalesced copies are normally removed immediately, but transformations
2018 // like removeCopyByCommutingDef() can inadvertently create identity copies.
2019 // When that happens, just join the values and remove the copy.
2020 if (CP.getSrcReg() == CP.getDstReg()) {
2021 LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
2022 LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
2023 const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
2024 LiveQueryResult LRQ = LI.Query(CopyIdx);
2025 if (VNInfo *DefVNI = LRQ.valueDefined()) {
2026 VNInfo *ReadVNI = LRQ.valueIn();
2027 assert(ReadVNI && "No value before copy and no <undef> flag.");
2028 assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
2029
2030 // Track incoming undef lanes we need to eliminate from the subrange.
2031 LaneBitmask PrunedLanes;
2032 MachineBasicBlock *MBB = CopyMI->getParent();
2033
2034 // Process subregister liveranges.
2035 for (LiveInterval::SubRange &S : LI.subranges()) {
2036 LiveQueryResult SLRQ = S.Query(CopyIdx);
2037 if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
2038 if (VNInfo *SReadVNI = SLRQ.valueIn())
2039 SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
2040
2041 // If this copy introduced an undef subrange from an incoming value,
2042 // we need to eliminate the undef live in values from the subrange.
2043 if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
2044 LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
2045 PrunedLanes |= S.LaneMask;
2046 S.removeValNo(SDefVNI);
2047 }
2048 }
2049 }
2050
2051 LI.MergeValueNumberInto(DefVNI, ReadVNI);
2052 if (PrunedLanes.any()) {
2053 LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
2054 << PrunedLanes << '\n');
2055 setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
2056 }
2057
2058 LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
2059 }
2060 deleteInstr(CopyMI);
2061 return true;
2062 }
2063
2064 // Enforce policies.
2065 if (CP.isPhys()) {
2066 LLVM_DEBUG(dbgs() << "\tConsidering merging "
2067 << printReg(CP.getSrcReg(), TRI) << " with "
2068 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
2069 if (!canJoinPhys(CP)) {
2070 // Before giving up coalescing, if definition of source is defined by
2071 // trivial computation, try rematerializing it.
2072 bool IsDefCopy = false;
2073 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2074 return true;
2075 if (IsDefCopy)
2076 Again = true; // May be possible to coalesce later.
2077 return false;
2078 }
2079 } else {
2080 // When possible, let DstReg be the larger interval.
2081 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2082 LIS->getInterval(CP.getDstReg()).size())
2083 CP.flip();
2084
2085 LLVM_DEBUG({
2086 dbgs() << "\tConsidering merging to "
2087 << TRI->getRegClassName(CP.getNewRC()) << " with ";
2088 if (CP.getDstIdx() && CP.getSrcIdx())
2089 dbgs() << printReg(CP.getDstReg()) << " in "
2090 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2091 << printReg(CP.getSrcReg()) << " in "
2092 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2093 else
2094 dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2095 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2096 });
2097 }
2098
2099 ShrinkMask = LaneBitmask::getNone();
2100 ShrinkMainRange = false;
2101
2102 // Okay, attempt to join these two intervals. On failure, this returns false.
2103 // Otherwise, if one of the intervals being joined is a physreg, this method
2104 // always canonicalizes DstInt to be it. The output "SrcInt" will not have
2105 // been modified, so we can use this information below to update aliases.
2106 if (!joinIntervals(CP)) {
2107 // Coalescing failed.
2108
2109 // If definition of source is defined by trivial computation, try
2110 // rematerializing it.
2111 bool IsDefCopy = false;
2112 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2113 return true;
2114
2115 // If we can eliminate the copy without merging the live segments, do so
2116 // now.
2117 if (!CP.isPartial() && !CP.isPhys()) {
2118 bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2119 bool Shrink = false;
2120 if (!Changed)
2121 std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2122 if (Changed) {
2123 deleteInstr(CopyMI);
2124 if (Shrink) {
2125 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2126 LiveInterval &DstLI = LIS->getInterval(DstReg);
2127 shrinkToUses(&DstLI);
2128 LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
2129 }
2130 LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2131 return true;
2132 }
2133 }
2134
2135 // Try and see if we can partially eliminate the copy by moving the copy to
2136 // its predecessor.
2137 if (!CP.isPartial() && !CP.isPhys())
2138 if (removePartialRedundancy(CP, *CopyMI))
2139 return true;
2140
2141 // Otherwise, we are unable to join the intervals.
2142 LLVM_DEBUG(dbgs() << "\tInterference!\n");
2143 Again = true; // May be possible to coalesce later.
2144 return false;
2145 }
2146
2147 // Coalescing to a virtual register that is of a sub-register class of the
2148 // other. Make sure the resulting register is set to the right register class.
2149 if (CP.isCrossClass()) {
2150 ++numCrossRCs;
2151 MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2152 }
2153
2154 // Removing sub-register copies can ease the register class constraints.
2155 // Make sure we attempt to inflate the register class of DstReg.
2156 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2157 InflateRegs.push_back(CP.getDstReg());
2158
2159 // CopyMI has been erased by joinIntervals at this point. Remove it from
2160 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2161 // to the work list. This keeps ErasedInstrs from growing needlessly.
2162 if (ErasedInstrs.erase(CopyMI))
2163 // But we may encounter the instruction again in this iteration.
2164 CurrentErasedInstrs.insert(CopyMI);
2165
2166 // Rewrite all SrcReg operands to DstReg.
2167 // Also update DstReg operands to include DstIdx if it is set.
2168 if (CP.getDstIdx())
2169 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2170 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2171
2172 // Shrink subregister ranges if necessary.
2173 if (ShrinkMask.any()) {
2174 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2175 for (LiveInterval::SubRange &S : LI.subranges()) {
2176 if ((S.LaneMask & ShrinkMask).none())
2177 continue;
2178 LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2179 << ")\n");
2180 LIS->shrinkToUses(S, LI.reg());
2181 ShrinkMainRange = true;
2182 }
2184 }
2185
2186 // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2187 // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2188 // is not up-to-date, need to update the merged live interval here.
2189 if (ToBeUpdated.count(CP.getSrcReg()))
2190 ShrinkMainRange = true;
2191
2192 if (ShrinkMainRange) {
2193 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2194 shrinkToUses(&LI);
2195 }
2196
2197 // SrcReg is guaranteed to be the register whose live interval that is
2198 // being merged.
2199 LIS->removeInterval(CP.getSrcReg());
2200
2201 // Update regalloc hint.
2202 TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2203
2204 LLVM_DEBUG({
2205 dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2206 << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2207 dbgs() << "\tResult = ";
2208 if (CP.isPhys())
2209 dbgs() << printReg(CP.getDstReg(), TRI);
2210 else
2211 dbgs() << LIS->getInterval(CP.getDstReg());
2212 dbgs() << '\n';
2213 });
2214
2215 ++numJoins;
2216 return true;
2217}
2218
2219bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2220 Register DstReg = CP.getDstReg();
2221 Register SrcReg = CP.getSrcReg();
2222 assert(CP.isPhys() && "Must be a physreg copy");
2223 assert(MRI->isReserved(DstReg) && "Not a reserved register");
2224 LiveInterval &RHS = LIS->getInterval(SrcReg);
2225 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2226
2227 assert(RHS.containsOneValue() && "Invalid join with reserved register");
2228
2229 // Optimization for reserved registers like ESP. We can only merge with a
2230 // reserved physreg if RHS has a single value that is a copy of DstReg.
2231 // The live range of the reserved register will look like a set of dead defs
2232 // - we don't properly track the live range of reserved registers.
2233
2234 // Deny any overlapping intervals. This depends on all the reserved
2235 // register live ranges to look like dead defs.
2236 if (!MRI->isConstantPhysReg(DstReg)) {
2237 for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2238 // Abort if not all the regunits are reserved.
2239 for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
2240 if (!MRI->isReserved(*RI))
2241 return false;
2242 }
2243 if (RHS.overlaps(LIS->getRegUnit(Unit))) {
2244 LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
2245 << '\n');
2246 return false;
2247 }
2248 }
2249
2250 // We must also check for overlaps with regmask clobbers.
2251 BitVector RegMaskUsable;
2252 if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2253 !RegMaskUsable.test(DstReg)) {
2254 LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2255 return false;
2256 }
2257 }
2258
2259 // Skip any value computations, we are not adding new values to the
2260 // reserved register. Also skip merging the live ranges, the reserved
2261 // register live range doesn't need to be accurate as long as all the
2262 // defs are there.
2263
2264 // Delete the identity copy.
2265 MachineInstr *CopyMI;
2266 if (CP.isFlipped()) {
2267 // Physreg is copied into vreg
2268 // %y = COPY %physreg_x
2269 // ... //< no other def of %physreg_x here
2270 // use %y
2271 // =>
2272 // ...
2273 // use %physreg_x
2274 CopyMI = MRI->getVRegDef(SrcReg);
2275 deleteInstr(CopyMI);
2276 } else {
2277 // VReg is copied into physreg:
2278 // %y = def
2279 // ... //< no other def or use of %physreg_x here
2280 // %physreg_x = COPY %y
2281 // =>
2282 // %physreg_x = def
2283 // ...
2284 if (!MRI->hasOneNonDBGUse(SrcReg)) {
2285 LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2286 return false;
2287 }
2288
2289 if (!LIS->intervalIsInOneMBB(RHS)) {
2290 LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2291 return false;
2292 }
2293
2294 MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2295 CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2296 SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2297 SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2298
2299 if (!MRI->isConstantPhysReg(DstReg)) {
2300 // We checked above that there are no interfering defs of the physical
2301 // register. However, for this case, where we intend to move up the def of
2302 // the physical register, we also need to check for interfering uses.
2303 SlotIndexes *Indexes = LIS->getSlotIndexes();
2304 for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2305 SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2307 if (MI->readsRegister(DstReg, TRI)) {
2308 LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2309 return false;
2310 }
2311 }
2312 }
2313
2314 // We're going to remove the copy which defines a physical reserved
2315 // register, so remove its valno, etc.
2316 LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2317 << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2318
2319 LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2320 deleteInstr(CopyMI);
2321
2322 // Create a new dead def at the new def location.
2323 for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2324 LiveRange &LR = LIS->getRegUnit(Unit);
2325 LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2326 }
2327 }
2328
2329 // We don't track kills for reserved registers.
2330 MRI->clearKillFlags(CP.getSrcReg());
2331
2332 return true;
2333}
2334
2335//===----------------------------------------------------------------------===//
2336// Interference checking and interval joining
2337//===----------------------------------------------------------------------===//
2338//
2339// In the easiest case, the two live ranges being joined are disjoint, and
2340// there is no interference to consider. It is quite common, though, to have
2341// overlapping live ranges, and we need to check if the interference can be
2342// resolved.
2343//
2344// The live range of a single SSA value forms a sub-tree of the dominator tree.
2345// This means that two SSA values overlap if and only if the def of one value
2346// is contained in the live range of the other value. As a special case, the
2347// overlapping values can be defined at the same index.
2348//
2349// The interference from an overlapping def can be resolved in these cases:
2350//
2351// 1. Coalescable copies. The value is defined by a copy that would become an
2352// identity copy after joining SrcReg and DstReg. The copy instruction will
2353// be removed, and the value will be merged with the source value.
2354//
2355// There can be several copies back and forth, causing many values to be
2356// merged into one. We compute a list of ultimate values in the joined live
2357// range as well as a mappings from the old value numbers.
2358//
2359// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2360// predecessors have a live out value. It doesn't cause real interference,
2361// and can be merged into the value it overlaps. Like a coalescable copy, it
2362// can be erased after joining.
2363//
2364// 3. Copy of external value. The overlapping def may be a copy of a value that
2365// is already in the other register. This is like a coalescable copy, but
2366// the live range of the source register must be trimmed after erasing the
2367// copy instruction:
2368//
2369// %src = COPY %ext
2370// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
2371//
2372// 4. Clobbering undefined lanes. Vector registers are sometimes built by
2373// defining one lane at a time:
2374//
2375// %dst:ssub0<def,read-undef> = FOO
2376// %src = BAR
2377// %dst:ssub1 = COPY %src
2378//
2379// The live range of %src overlaps the %dst value defined by FOO, but
2380// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2381// which was undef anyway.
2382//
2383// The value mapping is more complicated in this case. The final live range
2384// will have different value numbers for both FOO and BAR, but there is no
2385// simple mapping from old to new values. It may even be necessary to add
2386// new PHI values.
2387//
2388// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2389// is live, but never read. This can happen because we don't compute
2390// individual live ranges per lane.
2391//
2392// %dst = FOO
2393// %src = BAR
2394// %dst:ssub1 = COPY %src
2395//
2396// This kind of interference is only resolved locally. If the clobbered
2397// lane value escapes the block, the join is aborted.
2398
2399namespace {
2400
2401/// Track information about values in a single virtual register about to be
2402/// joined. Objects of this class are always created in pairs - one for each
2403/// side of the CoalescerPair (or one for each lane of a side of the coalescer
2404/// pair)
2405class JoinVals {
2406 /// Live range we work on.
2407 LiveRange &LR;
2408
2409 /// (Main) register we work on.
2410 const Register Reg;
2411
2412 /// Reg (and therefore the values in this liverange) will end up as
2413 /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2414 /// CP.SrcIdx.
2415 const unsigned SubIdx;
2416
2417 /// The LaneMask that this liverange will occupy the coalesced register. May
2418 /// be smaller than the lanemask produced by SubIdx when merging subranges.
2419 const LaneBitmask LaneMask;
2420
2421 /// This is true when joining sub register ranges, false when joining main
2422 /// ranges.
2423 const bool SubRangeJoin;
2424
2425 /// Whether the current LiveInterval tracks subregister liveness.
2426 const bool TrackSubRegLiveness;
2427
2428 /// Values that will be present in the final live range.
2429 SmallVectorImpl<VNInfo*> &NewVNInfo;
2430
2431 const CoalescerPair &CP;
2432 LiveIntervals *LIS;
2433 SlotIndexes *Indexes;
2434 const TargetRegisterInfo *TRI;
2435
2436 /// Value number assignments. Maps value numbers in LI to entries in
2437 /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2438 SmallVector<int, 8> Assignments;
2439
2440 public:
2441 /// Conflict resolution for overlapping values.
2442 enum ConflictResolution {
2443 /// No overlap, simply keep this value.
2444 CR_Keep,
2445
2446 /// Merge this value into OtherVNI and erase the defining instruction.
2447 /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2448 /// values.
2449 CR_Erase,
2450
2451 /// Merge this value into OtherVNI but keep the defining instruction.
2452 /// This is for the special case where OtherVNI is defined by the same
2453 /// instruction.
2454 CR_Merge,
2455
2456 /// Keep this value, and have it replace OtherVNI where possible. This
2457 /// complicates value mapping since OtherVNI maps to two different values
2458 /// before and after this def.
2459 /// Used when clobbering undefined or dead lanes.
2460 CR_Replace,
2461
2462 /// Unresolved conflict. Visit later when all values have been mapped.
2463 CR_Unresolved,
2464
2465 /// Unresolvable conflict. Abort the join.
2466 CR_Impossible
2467 };
2468
2469 private:
2470 /// Per-value info for LI. The lane bit masks are all relative to the final
2471 /// joined register, so they can be compared directly between SrcReg and
2472 /// DstReg.
2473 struct Val {
2474 ConflictResolution Resolution = CR_Keep;
2475
2476 /// Lanes written by this def, 0 for unanalyzed values.
2477 LaneBitmask WriteLanes;
2478
2479 /// Lanes with defined values in this register. Other lanes are undef and
2480 /// safe to clobber.
2481 LaneBitmask ValidLanes;
2482
2483 /// Value in LI being redefined by this def.
2484 VNInfo *RedefVNI = nullptr;
2485
2486 /// Value in the other live range that overlaps this def, if any.
2487 VNInfo *OtherVNI = nullptr;
2488
2489 /// Is this value an IMPLICIT_DEF that can be erased?
2490 ///
2491 /// IMPLICIT_DEF values should only exist at the end of a basic block that
2492 /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2493 /// safely erased if they are overlapping a live value in the other live
2494 /// interval.
2495 ///
2496 /// Weird control flow graphs and incomplete PHI handling in
2497 /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2498 /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2499 /// normal values.
2500 bool ErasableImplicitDef = false;
2501
2502 /// True when the live range of this value will be pruned because of an
2503 /// overlapping CR_Replace value in the other live range.
2504 bool Pruned = false;
2505
2506 /// True once Pruned above has been computed.
2507 bool PrunedComputed = false;
2508
2509 /// True if this value is determined to be identical to OtherVNI
2510 /// (in valuesIdentical). This is used with CR_Erase where the erased
2511 /// copy is redundant, i.e. the source value is already the same as
2512 /// the destination. In such cases the subranges need to be updated
2513 /// properly. See comment at pruneSubRegValues for more info.
2514 bool Identical = false;
2515
2516 Val() = default;
2517
2518 bool isAnalyzed() const { return WriteLanes.any(); }
2519
2520 /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an
2521 /// ordinary value.
2522 void mustKeepImplicitDef(const TargetRegisterInfo &TRI,
2523 const MachineInstr &ImpDef) {
2524 assert(ImpDef.isImplicitDef());
2525 ErasableImplicitDef = false;
2526 ValidLanes = TRI.getSubRegIndexLaneMask(ImpDef.getOperand(0).getSubReg());
2527 }
2528 };
2529
2530 /// One entry per value number in LI.
2532
2533 /// Compute the bitmask of lanes actually written by DefMI.
2534 /// Set Redef if there are any partial register definitions that depend on the
2535 /// previous value of the register.
2536 LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2537
2538 /// Find the ultimate value that VNI was copied from.
2539 std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2540
2541 bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2542
2543 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2544 /// Return a conflict resolution when possible, but leave the hard cases as
2545 /// CR_Unresolved.
2546 /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2547 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2548 /// The recursion always goes upwards in the dominator tree, making loops
2549 /// impossible.
2550 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2551
2552 /// Compute the value assignment for ValNo in RI.
2553 /// This may be called recursively by analyzeValue(), but never for a ValNo on
2554 /// the stack.
2555 void computeAssignment(unsigned ValNo, JoinVals &Other);
2556
2557 /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2558 /// the extent of the tainted lanes in the block.
2559 ///
2560 /// Multiple values in Other.LR can be affected since partial redefinitions
2561 /// can preserve previously tainted lanes.
2562 ///
2563 /// 1 %dst = VLOAD <-- Define all lanes in %dst
2564 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
2565 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
2566 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2567 ///
2568 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2569 /// entry to TaintedVals.
2570 ///
2571 /// Returns false if the tainted lanes extend beyond the basic block.
2572 bool
2573 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2574 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2575
2576 /// Return true if MI uses any of the given Lanes from Reg.
2577 /// This does not include partial redefinitions of Reg.
2578 bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2579
2580 /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2581 /// be pruned:
2582 ///
2583 /// %dst = COPY %src
2584 /// %src = COPY %dst <-- This value to be pruned.
2585 /// %dst = COPY %src <-- This value is a copy of a pruned value.
2586 bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2587
2588public:
2589 JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2590 SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2591 LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2592 bool TrackSubRegLiveness)
2593 : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2594 SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2595 NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2596 TRI(TRI), Assignments(LR.getNumValNums(), -1),
2597 Vals(LR.getNumValNums()) {}
2598
2599 /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2600 /// Returns false if any conflicts were impossible to resolve.
2601 bool mapValues(JoinVals &Other);
2602
2603 /// Try to resolve conflicts that require all values to be mapped.
2604 /// Returns false if any conflicts were impossible to resolve.
2605 bool resolveConflicts(JoinVals &Other);
2606
2607 /// Prune the live range of values in Other.LR where they would conflict with
2608 /// CR_Replace values in LR. Collect end points for restoring the live range
2609 /// after joining.
2610 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2611 bool changeInstrs);
2612
2613 /// Removes subranges starting at copies that get removed. This sometimes
2614 /// happens when undefined subranges are copied around. These ranges contain
2615 /// no useful information and can be removed.
2616 void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2617
2618 /// Pruning values in subranges can lead to removing segments in these
2619 /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2620 /// the main range also need to be removed. This function will mark
2621 /// the corresponding values in the main range as pruned, so that
2622 /// eraseInstrs can do the final cleanup.
2623 /// The parameter @p LI must be the interval whose main range is the
2624 /// live range LR.
2625 void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2626
2627 /// Erase any machine instructions that have been coalesced away.
2628 /// Add erased instructions to ErasedInstrs.
2629 /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2630 /// the erased instrs.
2632 SmallVectorImpl<Register> &ShrinkRegs,
2633 LiveInterval *LI = nullptr);
2634
2635 /// Remove liverange defs at places where implicit defs will be removed.
2636 void removeImplicitDefs();
2637
2638 /// Get the value assignments suitable for passing to LiveInterval::join.
2639 const int *getAssignments() const { return Assignments.data(); }
2640
2641 /// Get the conflict resolution for a value number.
2642 ConflictResolution getResolution(unsigned Num) const {
2643 return Vals[Num].Resolution;
2644 }
2645};
2646
2647} // end anonymous namespace
2648
2649LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2650 const {
2651 LaneBitmask L;
2652 for (const MachineOperand &MO : DefMI->all_defs()) {
2653 if (MO.getReg() != Reg)
2654 continue;
2655 L |= TRI->getSubRegIndexLaneMask(
2656 TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2657 if (MO.readsReg())
2658 Redef = true;
2659 }
2660 return L;
2661}
2662
2663std::pair<const VNInfo *, Register>
2664JoinVals::followCopyChain(const VNInfo *VNI) const {
2665 Register TrackReg = Reg;
2666
2667 while (!VNI->isPHIDef()) {
2668 SlotIndex Def = VNI->def;
2669 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2670 assert(MI && "No defining instruction");
2671 if (!MI->isFullCopy())
2672 return std::make_pair(VNI, TrackReg);
2673 Register SrcReg = MI->getOperand(1).getReg();
2674 if (!SrcReg.isVirtual())
2675 return std::make_pair(VNI, TrackReg);
2676
2677 const LiveInterval &LI = LIS->getInterval(SrcReg);
2678 const VNInfo *ValueIn;
2679 // No subrange involved.
2680 if (!SubRangeJoin || !LI.hasSubRanges()) {
2681 LiveQueryResult LRQ = LI.Query(Def);
2682 ValueIn = LRQ.valueIn();
2683 } else {
2684 // Query subranges. Ensure that all matching ones take us to the same def
2685 // (allowing some of them to be undef).
2686 ValueIn = nullptr;
2687 for (const LiveInterval::SubRange &S : LI.subranges()) {
2688 // Transform lanemask to a mask in the joined live interval.
2689 LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2690 if ((SMask & LaneMask).none())
2691 continue;
2692 LiveQueryResult LRQ = S.Query(Def);
2693 if (!ValueIn) {
2694 ValueIn = LRQ.valueIn();
2695 continue;
2696 }
2697 if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2698 return std::make_pair(VNI, TrackReg);
2699 }
2700 }
2701 if (ValueIn == nullptr) {
2702 // Reaching an undefined value is legitimate, for example:
2703 //
2704 // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
2705 // 2 %1 = COPY %0 ;; %1 is defined here.
2706 // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
2707 // ;; but it's equivalent to "undef".
2708 return std::make_pair(nullptr, SrcReg);
2709 }
2710 VNI = ValueIn;
2711 TrackReg = SrcReg;
2712 }
2713 return std::make_pair(VNI, TrackReg);
2714}
2715
2716bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2717 const JoinVals &Other) const {
2718 const VNInfo *Orig0;
2719 Register Reg0;
2720 std::tie(Orig0, Reg0) = followCopyChain(Value0);
2721 if (Orig0 == Value1 && Reg0 == Other.Reg)
2722 return true;
2723
2724 const VNInfo *Orig1;
2725 Register Reg1;
2726 std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2727 // If both values are undefined, and the source registers are the same
2728 // register, the values are identical. Filter out cases where only one
2729 // value is defined.
2730 if (Orig0 == nullptr || Orig1 == nullptr)
2731 return Orig0 == Orig1 && Reg0 == Reg1;
2732
2733 // The values are equal if they are defined at the same place and use the
2734 // same register. Note that we cannot compare VNInfos directly as some of
2735 // them might be from a copy created in mergeSubRangeInto() while the other
2736 // is from the original LiveInterval.
2737 return Orig0->def == Orig1->def && Reg0 == Reg1;
2738}
2739
2740JoinVals::ConflictResolution
2741JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2742 Val &V = Vals[ValNo];
2743 assert(!V.isAnalyzed() && "Value has already been analyzed!");
2744 VNInfo *VNI = LR.getValNumInfo(ValNo);
2745 if (VNI->isUnused()) {
2746 V.WriteLanes = LaneBitmask::getAll();
2747 return CR_Keep;
2748 }
2749
2750 // Get the instruction defining this value, compute the lanes written.
2751 const MachineInstr *DefMI = nullptr;
2752 if (VNI->isPHIDef()) {
2753 // Conservatively assume that all lanes in a PHI are valid.
2754 LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2755 : TRI->getSubRegIndexLaneMask(SubIdx);
2756 V.ValidLanes = V.WriteLanes = Lanes;
2757 } else {
2758 DefMI = Indexes->getInstructionFromIndex(VNI->def);
2759 assert(DefMI != nullptr);
2760 if (SubRangeJoin) {
2761 // We don't care about the lanes when joining subregister ranges.
2762 V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2763 if (DefMI->isImplicitDef()) {
2764 V.ValidLanes = LaneBitmask::getNone();
2765 V.ErasableImplicitDef = true;
2766 }
2767 } else {
2768 bool Redef = false;
2769 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2770
2771 // If this is a read-modify-write instruction, there may be more valid
2772 // lanes than the ones written by this instruction.
2773 // This only covers partial redef operands. DefMI may have normal use
2774 // operands reading the register. They don't contribute valid lanes.
2775 //
2776 // This adds ssub1 to the set of valid lanes in %src:
2777 //
2778 // %src:ssub1 = FOO
2779 //
2780 // This leaves only ssub1 valid, making any other lanes undef:
2781 //
2782 // %src:ssub1<def,read-undef> = FOO %src:ssub2
2783 //
2784 // The <read-undef> flag on the def operand means that old lane values are
2785 // not important.
2786 if (Redef) {
2787 V.RedefVNI = LR.Query(VNI->def).valueIn();
2788 assert((TrackSubRegLiveness || V.RedefVNI) &&
2789 "Instruction is reading nonexistent value");
2790 if (V.RedefVNI != nullptr) {
2791 computeAssignment(V.RedefVNI->id, Other);
2792 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2793 }
2794 }
2795
2796 // An IMPLICIT_DEF writes undef values.
2797 if (DefMI->isImplicitDef()) {
2798 // We normally expect IMPLICIT_DEF values to be live only until the end
2799 // of their block. If the value is really live longer and gets pruned in
2800 // another block, this flag is cleared again.
2801 //
2802 // Clearing the valid lanes is deferred until it is sure this can be
2803 // erased.
2804 V.ErasableImplicitDef = true;
2805 }
2806 }
2807 }
2808
2809 // Find the value in Other that overlaps VNI->def, if any.
2810 LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2811
2812 // It is possible that both values are defined by the same instruction, or
2813 // the values are PHIs defined in the same block. When that happens, the two
2814 // values should be merged into one, but not into any preceding value.
2815 // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2816 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2817 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2818
2819 // One value stays, the other is merged. Keep the earlier one, or the first
2820 // one we see.
2821 if (OtherVNI->def < VNI->def)
2822 Other.computeAssignment(OtherVNI->id, *this);
2823 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2824 // This is an early-clobber def overlapping a live-in value in the other
2825 // register. Not mergeable.
2826 V.OtherVNI = OtherLRQ.valueIn();
2827 return CR_Impossible;
2828 }
2829 V.OtherVNI = OtherVNI;
2830 Val &OtherV = Other.Vals[OtherVNI->id];
2831 // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2832 // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2833 // is assigned.
2834 if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2835 return CR_Keep;
2836 // Both sides have been analyzed now.
2837 // Allow overlapping PHI values. Any real interference would show up in a
2838 // predecessor, the PHI itself can't introduce any conflicts.
2839 if (VNI->isPHIDef())
2840 return CR_Merge;
2841 if ((V.ValidLanes & OtherV.ValidLanes).any())
2842 // Overlapping lanes can't be resolved.
2843 return CR_Impossible;
2844 else
2845 return CR_Merge;
2846 }
2847
2848 // No simultaneous def. Is Other live at the def?
2849 V.OtherVNI = OtherLRQ.valueIn();
2850 if (!V.OtherVNI)
2851 // No overlap, no conflict.
2852 return CR_Keep;
2853
2854 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2855
2856 // We have overlapping values, or possibly a kill of Other.
2857 // Recursively compute assignments up the dominator tree.
2858 Other.computeAssignment(V.OtherVNI->id, *this);
2859 Val &OtherV = Other.Vals[V.OtherVNI->id];
2860
2861 if (OtherV.ErasableImplicitDef) {
2862 // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2863 // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2864 // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2865 // technically.
2866 //
2867 // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2868 // to erase the IMPLICIT_DEF instruction.
2869 //
2870 // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming
2871 // value.
2872
2873 MachineInstr *OtherImpDef =
2874 Indexes->getInstructionFromIndex(V.OtherVNI->def);
2875 MachineBasicBlock *OtherMBB = OtherImpDef->getParent();
2876 if (DefMI &&
2877 (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, OtherMBB))) {
2878 LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2879 << " extends into "
2881 << ", keeping it.\n");
2882 OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2883 } else if (OtherMBB->hasEHPadSuccessor()) {
2884 // If OtherV is defined in a basic block that has EH pad successors then
2885 // we get the same problem not just if OtherV is live beyond its basic
2886 // block, but beyond the last call instruction in its basic block. Handle
2887 // this case conservatively.
2888 LLVM_DEBUG(
2889 dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2890 << " may be live into EH pad successors, keeping it.\n");
2891 OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2892 } else {
2893 // We deferred clearing these lanes in case we needed to save them
2894 OtherV.ValidLanes &= ~OtherV.WriteLanes;
2895 }
2896 }
2897
2898 // Allow overlapping PHI values. Any real interference would show up in a
2899 // predecessor, the PHI itself can't introduce any conflicts.
2900 if (VNI->isPHIDef())
2901 return CR_Replace;
2902
2903 // Check for simple erasable conflicts.
2904 if (DefMI->isImplicitDef())
2905 return CR_Erase;
2906
2907 // Include the non-conflict where DefMI is a coalescable copy that kills
2908 // OtherVNI. We still want the copy erased and value numbers merged.
2909 if (CP.isCoalescable(DefMI)) {
2910 // Some of the lanes copied from OtherVNI may be undef, making them undef
2911 // here too.
2912 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2913 return CR_Erase;
2914 }
2915
2916 // This may not be a real conflict if DefMI simply kills Other and defines
2917 // VNI.
2918 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2919 return CR_Keep;
2920
2921 // Handle the case where VNI and OtherVNI can be proven to be identical:
2922 //
2923 // %other = COPY %ext
2924 // %this = COPY %ext <-- Erase this copy
2925 //
2926 if (DefMI->isFullCopy() && !CP.isPartial() &&
2927 valuesIdentical(VNI, V.OtherVNI, Other)) {
2928 V.Identical = true;
2929 return CR_Erase;
2930 }
2931
2932 // The remaining checks apply to the lanes, which aren't tracked here. This
2933 // was already decided to be OK via the following CR_Replace condition.
2934 // CR_Replace.
2935 if (SubRangeJoin)
2936 return CR_Replace;
2937
2938 // If the lanes written by this instruction were all undef in OtherVNI, it is
2939 // still safe to join the live ranges. This can't be done with a simple value
2940 // mapping, though - OtherVNI will map to multiple values:
2941 //
2942 // 1 %dst:ssub0 = FOO <-- OtherVNI
2943 // 2 %src = BAR <-- VNI
2944 // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2945 // 4 BAZ killed %dst
2946 // 5 QUUX killed %src
2947 //
2948 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2949 // handles this complex value mapping.
2950 if ((V.WriteLanes & OtherV.ValidLanes).none())
2951 return CR_Replace;
2952
2953 // If the other live range is killed by DefMI and the live ranges are still
2954 // overlapping, it must be because we're looking at an early clobber def:
2955 //
2956 // %dst<def,early-clobber> = ASM killed %src
2957 //
2958 // In this case, it is illegal to merge the two live ranges since the early
2959 // clobber def would clobber %src before it was read.
2960 if (OtherLRQ.isKill()) {
2961 // This case where the def doesn't overlap the kill is handled above.
2962 assert(VNI->def.isEarlyClobber() &&
2963 "Only early clobber defs can overlap a kill");
2964 return CR_Impossible;
2965 }
2966
2967 // VNI is clobbering live lanes in OtherVNI, but there is still the
2968 // possibility that no instructions actually read the clobbered lanes.
2969 // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2970 // Otherwise Other.RI wouldn't be live here.
2971 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2972 return CR_Impossible;
2973
2974 if (TrackSubRegLiveness) {
2975 auto &OtherLI = LIS->getInterval(Other.Reg);
2976 // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
2977 // share the same live range, so we just need to check whether they have
2978 // any conflict bit in their LaneMask.
2979 if (!OtherLI.hasSubRanges()) {
2980 LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
2981 return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
2982 }
2983
2984 // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
2985 // impossible to resolve the conflict. Otherwise, we can just replace
2986 // OtherVNI because of no real conflict.
2987 for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
2988 LaneBitmask OtherMask =
2989 TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
2990 if ((OtherMask & V.WriteLanes).none())
2991 continue;
2992
2993 auto OtherSRQ = OtherSR.Query(VNI->def);
2994 if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
2995 // VNI is clobbering some lanes of OtherVNI, they have real conflict.
2996 return CR_Impossible;
2997 }
2998 }
2999
3000 // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
3001 return CR_Replace;
3002 }
3003
3004 // We need to verify that no instructions are reading the clobbered lanes.
3005 // To save compile time, we'll only check that locally. Don't allow the
3006 // tainted value to escape the basic block.
3007 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3008 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
3009 return CR_Impossible;
3010
3011 // There are still some things that could go wrong besides clobbered lanes
3012 // being read, for example OtherVNI may be only partially redefined in MBB,
3013 // and some clobbered lanes could escape the block. Save this analysis for
3014 // resolveConflicts() when all values have been mapped. We need to know
3015 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
3016 // that now - the recursive analyzeValue() calls must go upwards in the
3017 // dominator tree.
3018 return CR_Unresolved;
3019}
3020
3021void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
3022 Val &V = Vals[ValNo];
3023 if (V.isAnalyzed()) {
3024 // Recursion should always move up the dominator tree, so ValNo is not
3025 // supposed to reappear before it has been assigned.
3026 assert(Assignments[ValNo] != -1 && "Bad recursion?");
3027 return;
3028 }
3029 switch ((V.Resolution = analyzeValue(ValNo, Other))) {
3030 case CR_Erase:
3031 case CR_Merge:
3032 // Merge this ValNo into OtherVNI.
3033 assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
3034 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
3035 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
3036 LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
3037 << LR.getValNumInfo(ValNo)->def << " into "
3038 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
3039 << V.OtherVNI->def << " --> @"
3040 << NewVNInfo[Assignments[ValNo]]->def << '\n');
3041 break;
3042 case CR_Replace:
3043 case CR_Unresolved: {
3044 // The other value is going to be pruned if this join is successful.
3045 assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
3046 Val &OtherV = Other.Vals[V.OtherVNI->id];
3047 OtherV.Pruned = true;
3048 [[fallthrough]];
3049 }
3050 default:
3051 // This value number needs to go in the final joined live range.
3052 Assignments[ValNo] = NewVNInfo.size();
3053 NewVNInfo.push_back(LR.getValNumInfo(ValNo));
3054 break;
3055 }
3056}
3057
3058bool JoinVals::mapValues(JoinVals &Other) {
3059 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3060 computeAssignment(i, Other);
3061 if (Vals[i].Resolution == CR_Impossible) {
3062 LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
3063 << '@' << LR.getValNumInfo(i)->def << '\n');
3064 return false;
3065 }
3066 }
3067 return true;
3068}
3069
3070bool JoinVals::
3071taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
3072 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
3073 VNInfo *VNI = LR.getValNumInfo(ValNo);
3074 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3075 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
3076
3077 // Scan Other.LR from VNI.def to MBBEnd.
3078 LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
3079 assert(OtherI != Other.LR.end() && "No conflict?");
3080 do {
3081 // OtherI is pointing to a tainted value. Abort the join if the tainted
3082 // lanes escape the block.
3083 SlotIndex End = OtherI->end;
3084 if (End >= MBBEnd) {
3085 LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3086 << OtherI->valno->id << '@' << OtherI->start << '\n');
3087 return false;
3088 }
3089 LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3090 << OtherI->valno->id << '@' << OtherI->start << " to "
3091 << End << '\n');
3092 // A dead def is not a problem.
3093 if (End.isDead())
3094 break;
3095 TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3096
3097 // Check for another def in the MBB.
3098 if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3099 break;
3100
3101 // Lanes written by the new def are no longer tainted.
3102 const Val &OV = Other.Vals[OtherI->valno->id];
3103 TaintedLanes &= ~OV.WriteLanes;
3104 if (!OV.RedefVNI)
3105 break;
3106 } while (TaintedLanes.any());
3107 return true;
3108}
3109
3110bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3111 LaneBitmask Lanes) const {
3112 if (MI.isDebugOrPseudoInstr())
3113 return false;
3114 for (const MachineOperand &MO : MI.all_uses()) {
3115 if (MO.getReg() != Reg)
3116 continue;
3117 if (!MO.readsReg())
3118 continue;
3119 unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3120 if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3121 return true;
3122 }
3123 return false;
3124}
3125
3126bool JoinVals::resolveConflicts(JoinVals &Other) {
3127 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3128 Val &V = Vals[i];
3129 assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3130 if (V.Resolution != CR_Unresolved)
3131 continue;
3132 LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3133 << LR.getValNumInfo(i)->def
3134 << ' ' << PrintLaneMask(LaneMask) << '\n');
3135 if (SubRangeJoin)
3136 return false;
3137
3138 ++NumLaneConflicts;
3139 assert(V.OtherVNI && "Inconsistent conflict resolution.");
3140 VNInfo *VNI = LR.getValNumInfo(i);
3141 const Val &OtherV = Other.Vals[V.OtherVNI->id];
3142
3143 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3144 // join, those lanes will be tainted with a wrong value. Get the extent of
3145 // the tainted lanes.
3146 LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3148 if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3149 // Tainted lanes would extend beyond the basic block.
3150 return false;
3151
3152 assert(!TaintExtent.empty() && "There should be at least one conflict.");
3153
3154 // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3155 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3157 if (!VNI->isPHIDef()) {
3158 MI = Indexes->getInstructionFromIndex(VNI->def);
3159 if (!VNI->def.isEarlyClobber()) {
3160 // No need to check the instruction defining VNI for reads.
3161 ++MI;
3162 }
3163 }
3164 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3165 "Interference ends on VNI->def. Should have been handled earlier");
3166 MachineInstr *LastMI =
3167 Indexes->getInstructionFromIndex(TaintExtent.front().first);
3168 assert(LastMI && "Range must end at a proper instruction");
3169 unsigned TaintNum = 0;
3170 while (true) {
3171 assert(MI != MBB->end() && "Bad LastMI");
3172 if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3173 LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3174 return false;
3175 }
3176 // LastMI is the last instruction to use the current value.
3177 if (&*MI == LastMI) {
3178 if (++TaintNum == TaintExtent.size())
3179 break;
3180 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3181 assert(LastMI && "Range must end at a proper instruction");
3182 TaintedLanes = TaintExtent[TaintNum].second;
3183 }
3184 ++MI;
3185 }
3186
3187 // The tainted lanes are unused.
3188 V.Resolution = CR_Replace;
3189 ++NumLaneResolves;
3190 }
3191 return true;
3192}
3193
3194bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3195 Val &V = Vals[ValNo];
3196 if (V.Pruned || V.PrunedComputed)
3197 return V.Pruned;
3198
3199 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3200 return V.Pruned;
3201
3202 // Follow copies up the dominator tree and check if any intermediate value
3203 // has been pruned.
3204 V.PrunedComputed = true;
3205 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3206 return V.Pruned;
3207}
3208
3209void JoinVals::pruneValues(JoinVals &Other,
3210 SmallVectorImpl<SlotIndex> &EndPoints,
3211 bool changeInstrs) {
3212 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3213 SlotIndex Def = LR.getValNumInfo(i)->def;
3214 switch (Vals[i].Resolution) {
3215 case CR_Keep:
3216 break;
3217 case CR_Replace: {
3218 // This value takes precedence over the value in Other.LR.
3219 LIS->pruneValue(Other.LR, Def, &EndPoints);
3220 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3221 // instructions are only inserted to provide a live-out value for PHI
3222 // predecessors, so the instruction should simply go away once its value
3223 // has been replaced.
3224 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3225 bool EraseImpDef = OtherV.ErasableImplicitDef &&
3226 OtherV.Resolution == CR_Keep;
3227 if (!Def.isBlock()) {
3228 if (changeInstrs) {
3229 // Remove <def,read-undef> flags. This def is now a partial redef.
3230 // Also remove dead flags since the joined live range will
3231 // continue past this instruction.
3232 for (MachineOperand &MO :
3233 Indexes->getInstructionFromIndex(Def)->operands()) {
3234 if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
3235 if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3236 MO.setIsUndef(false);
3237 MO.setIsDead(false);
3238 }
3239 }
3240 }
3241 // This value will reach instructions below, but we need to make sure
3242 // the live range also reaches the instruction at Def.
3243 if (!EraseImpDef)
3244 EndPoints.push_back(Def);
3245 }
3246 LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3247 << ": " << Other.LR << '\n');
3248 break;
3249 }
3250 case CR_Erase:
3251 case CR_Merge:
3252 if (isPrunedValue(i, Other)) {
3253 // This value is ultimately a copy of a pruned value in LR or Other.LR.
3254 // We can no longer trust the value mapping computed by
3255 // computeAssignment(), the value that was originally copied could have
3256 // been replaced.
3257 LIS->pruneValue(LR, Def, &EndPoints);
3258 LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3259 << Def << ": " << LR << '\n');
3260 }
3261 break;
3262 case CR_Unresolved:
3263 case CR_Impossible:
3264 llvm_unreachable("Unresolved conflicts");
3265 }
3266 }
3267}
3268
3269// Check if the segment consists of a copied live-through value (i.e. the copy
3270// in the block only extended the liveness, of an undef value which we may need
3271// to handle).
3272static bool isLiveThrough(const LiveQueryResult Q) {
3273 return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3274}
3275
3276/// Consider the following situation when coalescing the copy between
3277/// %31 and %45 at 800. (The vertical lines represent live range segments.)
3278///
3279/// Main range Subrange 0004 (sub2)
3280/// %31 %45 %31 %45
3281/// 544 %45 = COPY %28 + +
3282/// | v1 | v1
3283/// 560B bb.1: + +
3284/// 624 = %45.sub2 | v2 | v2
3285/// 800 %31 = COPY %45 + + + +
3286/// | v0 | v0
3287/// 816 %31.sub1 = ... + |
3288/// 880 %30 = COPY %31 | v1 +
3289/// 928 %45 = COPY %30 | + +
3290/// | | v0 | v0 <--+
3291/// 992B ; backedge -> bb.1 | + + |
3292/// 1040 = %31.sub0 + |
3293/// This value must remain
3294/// live-out!
3295///
3296/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3297/// redundant, since it copies the value from %45 back into it. The
3298/// conflict resolution for the main range determines that %45.v0 is
3299/// to be erased, which is ok since %31.v1 is identical to it.
3300/// The problem happens with the subrange for sub2: it has to be live
3301/// on exit from the block, but since 928 was actually a point of
3302/// definition of %45.sub2, %45.sub2 was not live immediately prior
3303/// to that definition. As a result, when 928 was erased, the value v0
3304/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3305/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3306/// providing an incorrect value to the use at 624.
3307///
3308/// Since the main-range values %31.v1 and %45.v0 were proved to be
3309/// identical, the corresponding values in subranges must also be the
3310/// same. A redundant copy is removed because it's not needed, and not
3311/// because it copied an undefined value, so any liveness that originated
3312/// from that copy cannot disappear. When pruning a value that started
3313/// at the removed copy, the corresponding identical value must be
3314/// extended to replace it.
3315void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3316 // Look for values being erased.
3317 bool DidPrune = false;
3318 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3319 Val &V = Vals[i];
3320 // We should trigger in all cases in which eraseInstrs() does something.
3321 // match what eraseInstrs() is doing, print a message so
3322 if (V.Resolution != CR_Erase &&
3323 (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3324 continue;
3325
3326 // Check subranges at the point where the copy will be removed.
3327 SlotIndex Def = LR.getValNumInfo(i)->def;
3328 SlotIndex OtherDef;
3329 if (V.Identical)
3330 OtherDef = V.OtherVNI->def;
3331
3332 // Print message so mismatches with eraseInstrs() can be diagnosed.
3333 LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3334 << '\n');
3335 for (LiveInterval::SubRange &S : LI.subranges()) {
3336 LiveQueryResult Q = S.Query(Def);
3337
3338 // If a subrange starts at the copy then an undefined value has been
3339 // copied and we must remove that subrange value as well.
3340 VNInfo *ValueOut = Q.valueOutOrDead();
3341 if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
3342 (V.Identical && V.Resolution == CR_Erase &&
3343 ValueOut->def == Def))) {
3344 LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3345 << " at " << Def << "\n");
3346 SmallVector<SlotIndex,8> EndPoints;
3347 LIS->pruneValue(S, Def, &EndPoints);
3348 DidPrune = true;
3349 // Mark value number as unused.
3350 ValueOut->markUnused();
3351
3352 if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3353 // If V is identical to V.OtherVNI (and S was live at OtherDef),
3354 // then we can't simply prune V from S. V needs to be replaced
3355 // with V.OtherVNI.
3356 LIS->extendToIndices(S, EndPoints);
3357 }
3358
3359 // We may need to eliminate the subrange if the copy introduced a live
3360 // out undef value.
3361 if (ValueOut->isPHIDef())
3362 ShrinkMask |= S.LaneMask;
3363 continue;
3364 }
3365
3366 // If a subrange ends at the copy, then a value was copied but only
3367 // partially used later. Shrink the subregister range appropriately.
3368 //
3369 // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3370 // conservatively correct.
3371 if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3372 (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3373 LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3374 << PrintLaneMask(S.LaneMask) << " at " << Def
3375 << "\n");
3376 ShrinkMask |= S.LaneMask;
3377 }
3378 }
3379 }
3380 if (DidPrune)
3382}
3383
3384/// Check if any of the subranges of @p LI contain a definition at @p Def.
3386 for (LiveInterval::SubRange &SR : LI.subranges()) {
3387 if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3388 if (VNI->def == Def)
3389 return true;
3390 }
3391 return false;
3392}
3393
3394void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3395 assert(&static_cast<LiveRange&>(LI) == &LR);
3396
3397 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3398 if (Vals[i].Resolution != CR_Keep)
3399 continue;
3400 VNInfo *VNI = LR.getValNumInfo(i);
3401 if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3402 continue;
3403 Vals[i].Pruned = true;
3404 ShrinkMainRange = true;
3405 }
3406}
3407
3408void JoinVals::removeImplicitDefs() {
3409 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3410 Val &V = Vals[i];
3411 if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3412 continue;
3413
3414 VNInfo *VNI = LR.getValNumInfo(i);
3415 VNI->markUnused();
3416 LR.removeValNo(VNI);
3417 }
3418}
3419
3420void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3421 SmallVectorImpl<Register> &ShrinkRegs,
3422 LiveInterval *LI) {
3423 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3424 // Get the def location before markUnused() below invalidates it.
3425 VNInfo *VNI = LR.getValNumInfo(i);
3426 SlotIndex Def = VNI->def;
3427 switch (Vals[i].Resolution) {
3428 case CR_Keep: {
3429 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3430 // longer. The IMPLICIT_DEF instructions are only inserted by
3431 // PHIElimination to guarantee that all PHI predecessors have a value.
3432 if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3433 break;
3434 // Remove value number i from LR.
3435 // For intervals with subranges, removing a segment from the main range
3436 // may require extending the previous segment: for each definition of
3437 // a subregister, there will be a corresponding def in the main range.
3438 // That def may fall in the middle of a segment from another subrange.
3439 // In such cases, removing this def from the main range must be
3440 // complemented by extending the main range to account for the liveness
3441 // of the other subrange.
3442 // The new end point of the main range segment to be extended.
3443 SlotIndex NewEnd;
3444 if (LI != nullptr) {
3446 assert(I != LR.end());
3447 // Do not extend beyond the end of the segment being removed.
3448 // The segment may have been pruned in preparation for joining
3449 // live ranges.
3450 NewEnd = I->end;
3451 }
3452
3453 LR.removeValNo(VNI);
3454 // Note that this VNInfo is reused and still referenced in NewVNInfo,
3455 // make it appear like an unused value number.
3456 VNI->markUnused();
3457
3458 if (LI != nullptr && LI->hasSubRanges()) {
3459 assert(static_cast<LiveRange*>(LI) == &LR);
3460 // Determine the end point based on the subrange information:
3461 // minimum of (earliest def of next segment,
3462 // latest end point of containing segment)
3463 SlotIndex ED, LE;
3464 for (LiveInterval::SubRange &SR : LI->subranges()) {
3465 LiveRange::iterator I = SR.find(Def);
3466 if (I == SR.end())
3467 continue;
3468 if (I->start > Def)
3469 ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3470 else
3471 LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3472 }
3473 if (LE.isValid())
3474 NewEnd = std::min(NewEnd, LE);
3475 if (ED.isValid())
3476 NewEnd = std::min(NewEnd, ED);
3477
3478 // We only want to do the extension if there was a subrange that
3479 // was live across Def.
3480 if (LE.isValid()) {
3481 LiveRange::iterator S = LR.find(Def);
3482 if (S != LR.begin())
3483 std::prev(S)->end = NewEnd;
3484 }
3485 }
3486 LLVM_DEBUG({
3487 dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3488 if (LI != nullptr)
3489 dbgs() << "\t\t LHS = " << *LI << '\n';
3490 });
3491 [[fallthrough]];
3492 }
3493
3494 case CR_Erase: {
3495 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3496 assert(MI && "No instruction to erase");
3497 if (MI->isCopy()) {
3498 Register Reg = MI->getOperand(1).getReg();
3499 if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3500 ShrinkRegs.push_back(Reg);
3501 }
3502 ErasedInstrs.insert(MI);
3503 LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3505 MI->eraseFromParent();
3506 break;
3507 }
3508 default:
3509 break;
3510 }
3511 }
3512}
3513
3514void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3515 LaneBitmask LaneMask,
3516 const CoalescerPair &CP) {
3517 SmallVector<VNInfo*, 16> NewVNInfo;
3518 JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3519 NewVNInfo, CP, LIS, TRI, true, true);
3520 JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3521 NewVNInfo, CP, LIS, TRI, true, true);
3522
3523 // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3524 // We should be able to resolve all conflicts here as we could successfully do
3525 // it on the mainrange already. There is however a problem when multiple
3526 // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3527 // interferences.
3528 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3529 // We already determined that it is legal to merge the intervals, so this
3530 // should never fail.
3531 llvm_unreachable("*** Couldn't join subrange!\n");
3532 }
3533 if (!LHSVals.resolveConflicts(RHSVals) ||
3534 !RHSVals.resolveConflicts(LHSVals)) {
3535 // We already determined that it is legal to merge the intervals, so this
3536 // should never fail.
3537 llvm_unreachable("*** Couldn't join subrange!\n");
3538 }
3539
3540 // The merging algorithm in LiveInterval::join() can't handle conflicting
3541 // value mappings, so we need to remove any live ranges that overlap a
3542 // CR_Replace resolution. Collect a set of end points that can be used to
3543 // restore the live range after joining.
3544 SmallVector<SlotIndex, 8> EndPoints;
3545 LHSVals.pruneValues(RHSVals, EndPoints, false);
3546 RHSVals.pruneValues(LHSVals, EndPoints, false);
3547
3548 LHSVals.removeImplicitDefs();
3549 RHSVals.removeImplicitDefs();
3550
3551 LRange.verify();
3552 RRange.verify();
3553
3554 // Join RRange into LHS.
3555 LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3556 NewVNInfo);
3557
3558 LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3559 << ' ' << LRange << "\n");
3560 if (EndPoints.empty())
3561 return;
3562
3563 // Recompute the parts of the live range we had to remove because of
3564 // CR_Replace conflicts.
3565 LLVM_DEBUG({
3566 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3567 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3568 dbgs() << EndPoints[i];
3569 if (i != n-1)
3570 dbgs() << ',';
3571 }
3572 dbgs() << ": " << LRange << '\n';
3573 });
3574 LIS->extendToIndices(LRange, EndPoints);
3575}
3576
3577void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3578 const LiveRange &ToMerge,
3579 LaneBitmask LaneMask,
3580 CoalescerPair &CP,
3581 unsigned ComposeSubRegIdx) {
3583 LI.refineSubRanges(
3584 Allocator, LaneMask,
3585 [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3586 if (SR.empty()) {
3587 SR.assign(ToMerge, Allocator);
3588 } else {
3589 // joinSubRegRange() destroys the merged range, so we need a copy.
3590 LiveRange RangeCopy(ToMerge, Allocator);
3591 joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3592 }
3593 },
3594 *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3595}
3596
3597bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3599 return false;
3600 auto &Counter = LargeLIVisitCounter[LI.reg()];
3601 if (Counter < LargeIntervalFreqThreshold) {
3602 Counter++;
3603 return false;
3604 }
3605 return true;
3606}
3607
3608bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3609 SmallVector<VNInfo*, 16> NewVNInfo;
3610 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3611 LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3612 bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3613 JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3614 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3615 JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3616 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3617
3618 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3619
3620 if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3621 return false;
3622
3623 // First compute NewVNInfo and the simple value mappings.
3624 // Detect impossible conflicts early.
3625 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3626 return false;
3627
3628 // Some conflicts can only be resolved after all values have been mapped.
3629 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3630 return false;
3631
3632 // All clear, the live ranges can be merged.
3633 if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3635
3636 // Transform lanemasks from the LHS to masks in the coalesced register and
3637 // create initial subranges if necessary.
3638 unsigned DstIdx = CP.getDstIdx();
3639 if (!LHS.hasSubRanges()) {
3640 LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3641 : TRI->getSubRegIndexLaneMask(DstIdx);
3642 // LHS must support subregs or we wouldn't be in this codepath.
3643 assert(Mask.any());
3644 LHS.createSubRangeFrom(Allocator, Mask, LHS);
3645 } else if (DstIdx != 0) {
3646 // Transform LHS lanemasks to new register class if necessary.
3647 for (LiveInterval::SubRange &R : LHS.subranges()) {
3648 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3649 R.LaneMask = Mask;
3650 }
3651 }
3652 LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3653 << '\n');
3654
3655 // Determine lanemasks of RHS in the coalesced register and merge subranges.
3656 unsigned SrcIdx = CP.getSrcIdx();
3657 if (!RHS.hasSubRanges()) {
3658 LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3659 : TRI->getSubRegIndexLaneMask(SrcIdx);
3660 mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3661 } else {
3662 // Pair up subranges and merge.
3663 for (LiveInterval::SubRange &R : RHS.subranges()) {
3664 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3665 mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3666 }
3667 }
3668 LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3669
3670 // Pruning implicit defs from subranges may result in the main range
3671 // having stale segments.
3672 LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3673
3674 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3675 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3676 }
3677
3678 // The merging algorithm in LiveInterval::join() can't handle conflicting
3679 // value mappings, so we need to remove any live ranges that overlap a
3680 // CR_Replace resolution. Collect a set of end points that can be used to
3681 // restore the live range after joining.
3682 SmallVector<SlotIndex, 8> EndPoints;
3683 LHSVals.pruneValues(RHSVals, EndPoints, true);
3684 RHSVals.pruneValues(LHSVals, EndPoints, true);
3685
3686 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3687 // registers to require trimming.
3688 SmallVector<Register, 8> ShrinkRegs;
3689 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3690 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3691 while (!ShrinkRegs.empty())
3692 shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3693
3694 // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3695 checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3696
3697 // If the RHS covers any PHI locations that were tracked for debug-info, we
3698 // must update tracking information to reflect the join.
3699 auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3700 if (RegIt != RegToPHIIdx.end()) {
3701 // Iterate over all the debug instruction numbers assigned this register.
3702 for (unsigned InstID : RegIt->second) {
3703 auto PHIIt = PHIValToPos.find(InstID);
3704 assert(PHIIt != PHIValToPos.end());
3705 const SlotIndex &SI = PHIIt->second.SI;
3706
3707 // Does the RHS cover the position of this PHI?
3708 auto LII = RHS.find(SI);
3709 if (LII == RHS.end() || LII->start > SI)
3710 continue;
3711
3712 // Accept two kinds of subregister movement:
3713 // * When we merge from one register class into a larger register:
3714 // %1:gr16 = some-inst
3715 // ->
3716 // %2:gr32.sub_16bit = some-inst
3717 // * When the PHI is already in a subregister, and the larger class
3718 // is coalesced:
3719 // %2:gr32.sub_16bit = some-inst
3720 // %3:gr32 = COPY %2
3721 // ->
3722 // %3:gr32.sub_16bit = some-inst
3723 // Test for subregister move:
3724 if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3725 // If we're moving between different subregisters, ignore this join.
3726 // The PHI will not get a location, dropping variable locations.
3727 if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3728 continue;
3729
3730 // Update our tracking of where the PHI is.
3731 PHIIt->second.Reg = CP.getDstReg();
3732
3733 // If we merge into a sub-register of a larger class (test above),
3734 // update SubReg.
3735 if (CP.getSrcIdx() != 0)
3736 PHIIt->second.SubReg = CP.getSrcIdx();
3737 }
3738
3739 // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3740 // different VRegs now. Copy old collection of debug instruction numbers and
3741 // erase the old one:
3742 auto InstrNums = RegIt->second;
3743 RegToPHIIdx.erase(RegIt);
3744
3745 // There might already be PHIs being tracked in the destination VReg. Insert
3746 // into an existing tracking collection, or insert a new one.
3747 RegIt = RegToPHIIdx.find(CP.getDstReg());
3748 if (RegIt != RegToPHIIdx.end())
3749 RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3750 InstrNums.end());
3751 else
3752 RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3753 }
3754
3755 // Join RHS into LHS.
3756 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3757
3758 // Kill flags are going to be wrong if the live ranges were overlapping.
3759 // Eventually, we should simply clear all kill flags when computing live
3760 // ranges. They are reinserted after register allocation.
3761 MRI->clearKillFlags(LHS.reg());
3762 MRI->clearKillFlags(RHS.reg());
3763
3764 if (!EndPoints.empty()) {
3765 // Recompute the parts of the live range we had to remove because of
3766 // CR_Replace conflicts.
3767 LLVM_DEBUG({
3768 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3769 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3770 dbgs() << EndPoints[i];
3771 if (i != n-1)
3772 dbgs() << ',';
3773 }
3774 dbgs() << ": " << LHS << '\n';
3775 });
3776 LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3777 }
3778
3779 return true;
3780}
3781
3782bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3783 return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3784}
3785
3786void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
3787{
3788 const SlotIndexes &Slots = *LIS->getSlotIndexes();
3790
3791 // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3792 // vreg => DbgValueLoc map.
3793 auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3794 for (auto *X : ToInsert) {
3795 for (const auto &Op : X->debug_operands()) {
3796 if (Op.isReg() && Op.getReg().isVirtual())
3797 DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3798 }
3799 }
3800
3801 ToInsert.clear();
3802 };
3803
3804 // Iterate over all instructions, collecting them into the ToInsert vector.
3805 // Once a non-debug instruction is found, record the slot index of the
3806 // collected DBG_VALUEs.
3807 for (auto &MBB : MF) {
3808 SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3809
3810 for (auto &MI : MBB) {
3811 if (MI.isDebugValue()) {
3812 if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3813 return MO.isReg() && MO.getReg().isVirtual();
3814 }))
3815 ToInsert.push_back(&MI);
3816 } else if (!MI.isDebugOrPseudoInstr()) {
3817 CurrentSlot = Slots.getInstructionIndex(MI);
3818 CloseNewDVRange(CurrentSlot);
3819 }
3820 }
3821
3822 // Close range of DBG_VALUEs at the end of blocks.
3823 CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3824 }
3825
3826 // Sort all DBG_VALUEs we've seen by slot number.
3827 for (auto &Pair : DbgVRegToValues)
3828 llvm::sort(Pair.second);
3829}
3830
3831void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3832 LiveRange &LHS,
3833 JoinVals &LHSVals,
3834 LiveRange &RHS,
3835 JoinVals &RHSVals) {
3836 auto ScanForDstReg = [&](Register Reg) {
3837 checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3838 };
3839
3840 auto ScanForSrcReg = [&](Register Reg) {
3841 checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3842 };
3843
3844 // Scan for unsound updates of both the source and destination register.
3845 ScanForSrcReg(CP.getSrcReg());
3846 ScanForDstReg(CP.getDstReg());
3847}
3848
3849void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3850 LiveRange &OtherLR,
3851 LiveRange &RegLR,
3852 JoinVals &RegVals) {
3853 // Are there any DBG_VALUEs to examine?
3854 auto VRegMapIt = DbgVRegToValues.find(Reg);
3855 if (VRegMapIt == DbgVRegToValues.end())
3856 return;
3857
3858 auto &DbgValueSet = VRegMapIt->second;
3859 auto DbgValueSetIt = DbgValueSet.begin();
3860 auto SegmentIt = OtherLR.begin();
3861
3862 bool LastUndefResult = false;
3863 SlotIndex LastUndefIdx;
3864
3865 // If the "Other" register is live at a slot Idx, test whether Reg can
3866 // safely be merged with it, or should be marked undef.
3867 auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3868 &LastUndefIdx](SlotIndex Idx) -> bool {
3869 // Our worst-case performance typically happens with asan, causing very
3870 // many DBG_VALUEs of the same location. Cache a copy of the most recent
3871 // result for this edge-case.
3872 if (LastUndefIdx == Idx)
3873 return LastUndefResult;
3874
3875 // If the other range was live, and Reg's was not, the register coalescer
3876 // will not have tried to resolve any conflicts. We don't know whether
3877 // the DBG_VALUE will refer to the same value number, so it must be made
3878 // undef.
3879 auto OtherIt = RegLR.find(Idx);
3880 if (OtherIt == RegLR.end())
3881 return true;
3882
3883 // Both the registers were live: examine the conflict resolution record for
3884 // the value number Reg refers to. CR_Keep meant that this value number
3885 // "won" and the merged register definitely refers to that value. CR_Erase
3886 // means the value number was a redundant copy of the other value, which
3887 // was coalesced and Reg deleted. It's safe to refer to the other register
3888 // (which will be the source of the copy).
3889 auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3890 LastUndefResult = Resolution != JoinVals::CR_Keep &&
3891 Resolution != JoinVals::CR_Erase;
3892 LastUndefIdx = Idx;
3893 return LastUndefResult;
3894 };
3895
3896 // Iterate over both the live-range of the "Other" register, and the set of
3897 // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3898 // slot index. This relies on the DbgValueSet being ordered.
3899 while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3900 if (DbgValueSetIt->first < SegmentIt->end) {
3901 // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3902 // set it undef.
3903 if (DbgValueSetIt->first >= SegmentIt->start) {
3904 bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3905 bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3906 if (HasReg && ShouldUndefReg) {
3907 // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3908 DbgValueSetIt->second->setDebugValueUndef();
3909 continue;
3910 }
3911 }
3912 ++DbgValueSetIt;
3913 } else {
3914 ++SegmentIt;
3915 }
3916 }
3917}
3918
3919namespace {
3920
3921/// Information concerning MBB coalescing priority.
3922struct MBBPriorityInfo {
3924 unsigned Depth;
3925 bool IsSplit;
3926
3927 MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3928 : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3929};
3930
3931} // end anonymous namespace
3932
3933/// C-style comparator that sorts first based on the loop depth of the basic
3934/// block (the unsigned), and then on the MBB number.
3935///
3936/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3937static int compareMBBPriority(const MBBPriorityInfo *LHS,
3938 const MBBPriorityInfo *RHS) {
3939 // Deeper loops first
3940 if (LHS->Depth != RHS->Depth)
3941 return LHS->Depth > RHS->Depth ? -1 : 1;
3942
3943 // Try to unsplit critical edges next.
3944 if (LHS->IsSplit != RHS->IsSplit)
3945 return LHS->IsSplit ? -1 : 1;
3946
3947 // Prefer blocks that are more connected in the CFG. This takes care of
3948 // the most difficult copies first while intervals are short.
3949 unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3950 unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3951 if (cl != cr)
3952 return cl > cr ? -1 : 1;
3953
3954 // As a last resort, sort by block number.
3955 return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3956}
3957
3958/// \returns true if the given copy uses or defines a local live range.
3959static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3960 if (!Copy->isCopy())
3961 return false;
3962
3963 if (Copy->getOperand(1).isUndef())
3964 return false;
3965
3966 Register SrcReg = Copy->getOperand(1).getReg();
3967 Register DstReg = Copy->getOperand(0).getReg();
3968 if (SrcReg.isPhysical() || DstReg.isPhysical())
3969 return false;
3970
3971 return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3972 || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3973}
3974
3975void RegisterCoalescer::lateLiveIntervalUpdate() {
3976 for (Register reg : ToBeUpdated) {
3977 if (!LIS->hasInterval(reg))
3978 continue;
3979 LiveInterval &LI = LIS->getInterval(reg);
3980 shrinkToUses(&LI, &DeadDefs);
3981 if (!DeadDefs.empty())
3982 eliminateDeadDefs();
3983 }
3984 ToBeUpdated.clear();
3985}
3986
3987bool RegisterCoalescer::
3988copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3989 bool Progress = false;
3990 SmallPtrSet<MachineInstr *, 4> CurrentErasedInstrs;
3991 for (MachineInstr *&MI : CurrList) {
3992 if (!MI)
3993 continue;
3994 // Skip instruction pointers that have already been erased, for example by
3995 // dead code elimination.
3996 if (ErasedInstrs.count(MI) || CurrentErasedInstrs.count(MI)) {
3997 MI = nullptr;
3998 continue;
3999 }
4000 bool Again = false;
4001 bool Success = joinCopy(MI, Again, CurrentErasedInstrs);
4002 Progress |= Success;
4003 if (Success || !Again)
4004 MI = nullptr;
4005 }
4006 // Clear instructions not recorded in `ErasedInstrs` but erased.
4007 if (!CurrentErasedInstrs.empty()) {
4008 for (MachineInstr *&MI : CurrList) {
4009 if (MI && CurrentErasedInstrs.count(MI))
4010 MI = nullptr;
4011 }
4012 for (MachineInstr *&MI : WorkList) {
4013 if (MI && CurrentErasedInstrs.count(MI))
4014 MI = nullptr;
4015 }
4016 }
4017 return Progress;
4018}
4019
4020/// Check if DstReg is a terminal node.
4021/// I.e., it does not have any affinity other than \p Copy.
4022static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
4023 const MachineRegisterInfo *MRI) {
4024 assert(Copy.isCopyLike());
4025 // Check if the destination of this copy as any other affinity.
4026 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
4027 if (&MI != &Copy && MI.isCopyLike())
4028 return false;
4029 return true;
4030}
4031
4032bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
4033 assert(Copy.isCopyLike());
4034 if (!UseTerminalRule)
4035 return false;
4036 Register SrcReg, DstReg;
4037 unsigned SrcSubReg = 0, DstSubReg = 0;
4038 if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
4039 return false;
4040 // Check if the destination of this copy has any other affinity.
4041 if (DstReg.isPhysical() ||
4042 // If SrcReg is a physical register, the copy won't be coalesced.
4043 // Ignoring it may have other side effect (like missing
4044 // rematerialization). So keep it.
4045 SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
4046 return false;
4047
4048 // DstReg is a terminal node. Check if it interferes with any other
4049 // copy involving SrcReg.
4050 const MachineBasicBlock *OrigBB = Copy.getParent();
4051 const LiveInterval &DstLI = LIS->getInterval(DstReg);
4052 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
4053 // Technically we should check if the weight of the new copy is
4054 // interesting compared to the other one and update the weight
4055 // of the copies accordingly. However, this would only work if
4056 // we would gather all the copies first then coalesce, whereas
4057 // right now we interleave both actions.
4058 // For now, just consider the copies that are in the same block.
4059 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
4060 continue;
4061 Register OtherSrcReg, OtherReg;
4062 unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
4063 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
4064 OtherSubReg))
4065 return false;
4066 if (OtherReg == SrcReg)
4067 OtherReg = OtherSrcReg;
4068 // Check if OtherReg is a non-terminal.
4069 if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
4070 continue;
4071 // Check that OtherReg interfere with DstReg.
4072 if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
4073 LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
4074 << '\n');
4075 return true;
4076 }
4077 }
4078 return false;
4079}
4080
4081void
4082RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
4083 LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
4084
4085 // Collect all copy-like instructions in MBB. Don't start coalescing anything
4086 // yet, it might invalidate the iterator.
4087 const unsigned PrevSize = WorkList.size();
4088 if (JoinGlobalCopies) {
4089 SmallVector<MachineInstr*, 2> LocalTerminals;
4090 SmallVector<MachineInstr*, 2> GlobalTerminals;
4091 // Coalesce copies bottom-up to coalesce local defs before local uses. They
4092 // are not inherently easier to resolve, but slightly preferable until we
4093 // have local live range splitting. In particular this is required by
4094 // cmp+jmp macro fusion.
4095 for (MachineInstr &MI : *MBB) {
4096 if (!MI.isCopyLike())
4097 continue;
4098 bool ApplyTerminalRule = applyTerminalRule(MI);
4099 if (isLocalCopy(&MI, LIS)) {
4100 if (ApplyTerminalRule)
4101 LocalTerminals.push_back(&MI);
4102 else
4103 LocalWorkList.push_back(&MI);
4104 } else {
4105 if (ApplyTerminalRule)
4106 GlobalTerminals.push_back(&MI);
4107 else
4108 WorkList.push_back(&MI);
4109 }
4110 }
4111 // Append the copies evicted by the terminal rule at the end of the list.
4112 LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4113 WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4114 }
4115 else {
4117 for (MachineInstr &MII : *MBB)
4118 if (MII.isCopyLike()) {
4119 if (applyTerminalRule(MII))
4120 Terminals.push_back(&MII);
4121 else
4122 WorkList.push_back(&MII);
4123 }
4124 // Append the copies evicted by the terminal rule at the end of the list.
4125 WorkList.append(Terminals.begin(), Terminals.end());
4126 }
4127 // Try coalescing the collected copies immediately, and remove the nulls.
4128 // This prevents the WorkList from getting too large since most copies are
4129 // joinable on the first attempt.
4131 CurrList(WorkList.begin() + PrevSize, WorkList.end());
4132 if (copyCoalesceWorkList(CurrList))
4133 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
4134 nullptr), WorkList.end());
4135}
4136
4137void RegisterCoalescer::coalesceLocals() {
4138 copyCoalesceWorkList(LocalWorkList);
4139 for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
4140 if (LocalWorkList[j])
4141 WorkList.push_back(LocalWorkList[j]);
4142 }
4143 LocalWorkList.clear();
4144}
4145
4146void RegisterCoalescer::joinAllIntervals() {
4147 LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4148 assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4149
4150 std::vector<MBBPriorityInfo> MBBs;
4151 MBBs.reserve(MF->size());
4152 for (MachineBasicBlock &MBB : *MF) {
4153 MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4154 JoinSplitEdges && isSplitEdge(&MBB)));
4155 }
4156 array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4157
4158 // Coalesce intervals in MBB priority order.
4159 unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4160 for (MBBPriorityInfo &MBB : MBBs) {
4161 // Try coalescing the collected local copies for deeper loops.
4162 if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4163 coalesceLocals();
4164 CurrDepth = MBB.Depth;
4165 }
4166 copyCoalesceInMBB(MBB.MBB);
4167 }
4168 lateLiveIntervalUpdate();
4169 coalesceLocals();
4170
4171 // Joining intervals can allow other intervals to be joined. Iteratively join
4172 // until we make no progress.
4173 while (copyCoalesceWorkList(WorkList))
4174 /* empty */ ;
4175 lateLiveIntervalUpdate();
4176}
4177
4178void RegisterCoalescer::releaseMemory() {
4179 ErasedInstrs.clear();
4180 WorkList.clear();
4181 DeadDefs.clear();
4182 InflateRegs.clear();
4183 LargeLIVisitCounter.clear();
4184}
4185
4186bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4187 LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
4188 << "********** Function: " << fn.getName() << '\n');
4189
4190 // Variables changed between a setjmp and a longjump can have undefined value
4191 // after the longjmp. This behaviour can be observed if such a variable is
4192 // spilled, so longjmp won't restore the value in the spill slot.
4193 // RegisterCoalescer should not run in functions with a setjmp to avoid
4194 // merging such undefined variables with predictable ones.
4195 //
4196 // TODO: Could specifically disable coalescing registers live across setjmp
4197 // calls
4198 if (fn.exposesReturnsTwice()) {
4199 LLVM_DEBUG(
4200 dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4201 return false;
4202 }
4203
4204 MF = &fn;
4205 MRI = &fn.getRegInfo();
4206 const TargetSubtargetInfo &STI = fn.getSubtarget();
4207 TRI = STI.getRegisterInfo();
4208 TII = STI.getInstrInfo();
4209 LIS = &getAnalysis<LiveIntervals>();
4210 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4211 Loops = &getAnalysis<MachineLoopInfo>();
4213 JoinGlobalCopies = STI.enableJoinGlobalCopies();
4214 else
4215 JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4216
4217 // If there are PHIs tracked by debug-info, they will need updating during
4218 // coalescing. Build an index of those PHIs to ease updating.
4219 SlotIndexes *Slots = LIS->getSlotIndexes();
4220 for (const auto &DebugPHI : MF->DebugPHIPositions) {
4221 MachineBasicBlock *MBB = DebugPHI.second.MBB;
4222 Register Reg = DebugPHI.second.Reg;
4223 unsigned SubReg = DebugPHI.second.SubReg;
4224 SlotIndex SI = Slots->getMBBStartIdx(MBB);
4225 PHIValPos P = {SI, Reg, SubReg};
4226 PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4227 RegToPHIIdx[Reg].push_back(DebugPHI.first);
4228 }
4229
4230 // The MachineScheduler does not currently require JoinSplitEdges. This will
4231 // either be enabled unconditionally or replaced by a more general live range
4232 // splitting optimization.
4233 JoinSplitEdges = EnableJoinSplits;
4234
4235 if (VerifyCoalescing)
4236 MF->verify(this, "Before register coalescing");
4237
4238 DbgVRegToValues.clear();
4240
4241 RegClassInfo.runOnMachineFunction(fn);
4242
4243 // Join (coalesce) intervals if requested.
4244 if (EnableJoining)
4245 joinAllIntervals();
4246
4247 // After deleting a lot of copies, register classes may be less constrained.
4248 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4249 // DPR inflation.
4250 array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4251 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
4252 InflateRegs.end());
4253 LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4254 << " regs.\n");
4255 for (Register Reg : InflateRegs) {
4256 if (MRI->reg_nodbg_empty(Reg))
4257 continue;
4258 if (MRI->recomputeRegClass(Reg)) {
4259 LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4260 << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4261 ++NumInflated;
4262
4263 LiveInterval &LI = LIS->getInterval(Reg);
4264 if (LI.hasSubRanges()) {
4265 // If the inflated register class does not support subregisters anymore
4266 // remove the subranges.
4267 if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4268 LI.clearSubRanges();
4269 } else {
4270#ifndef NDEBUG
4271 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4272 // If subranges are still supported, then the same subregs
4273 // should still be supported.
4274 for (LiveInterval::SubRange &S : LI.subranges()) {
4275 assert((S.LaneMask & ~MaxMask).none());
4276 }
4277#endif
4278 }
4279 }
4280 }
4281 }
4282
4283 // After coalescing, update any PHIs that are being tracked by debug-info
4284 // with their new VReg locations.
4285 for (auto &p : MF->DebugPHIPositions) {
4286 auto it = PHIValToPos.find(p.first);
4287 assert(it != PHIValToPos.end());
4288 p.second.Reg = it->second.Reg;
4289 p.second.SubReg = it->second.SubReg;
4290 }
4291
4292 PHIValToPos.clear();
4293 RegToPHIIdx.clear();
4294
4295 LLVM_DEBUG(dump());
4296 if (VerifyCoalescing)
4297 MF->verify(this, "After register coalescing");
4298 return true;
4299}
4300
4301void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
4302 LIS->print(O, m);
4303}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
bool End
Definition: ELF_riscv.cpp:480
SmallVector< uint32_t, 0 > Writes
Definition: ELF_riscv.cpp:497
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
Basic Register Allocator
static cl::opt< cl::boolOrDefault > EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), cl::init(cl::BOU_UNSET), cl::Hidden)
Temporary flag to test global copy optimization.
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS)
static bool isSplitEdge(const MachineBasicBlock *MBB)
Return true if this block should be vacated by the coalescer to eliminate branches.
static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS)
C-style comparator that sorts first based on the loop depth of the basic block (the unsigned),...
register Register Coalescer
register coalescer
static cl::opt< unsigned > LargeIntervalSizeThreshold("large-interval-size-threshold", cl::Hidden, cl::desc("If the valnos size of an interval is larger than the threshold, " "it is regarded as a large interval. "), cl::init(100))
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def)
Check if any of the subranges of LI contain a definition at Def.
static cl::opt< unsigned > LargeIntervalFreqThreshold("large-interval-freq-threshold", cl::Hidden, cl::desc("For a large interval, if it is coalesed with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(256))
static std::pair< bool, bool > addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo)
Copy segments with value number SrcValNo from liverange Src to live range @Dst and use value number D...
static bool isLiveThrough(const LiveQueryResult Q)
static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI)
Check if DstReg is a terminal node.
static cl::opt< bool > VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden)
register Register static false bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, Register &Src, Register &Dst, unsigned &SrcSub, unsigned &DstSub)
static cl::opt< bool > EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden)
Temporary flag to test critical edge unsplitting.
static cl::opt< bool > EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden)
static bool definesFullReg(const MachineInstr &MI, Register Reg)
Returns true if MI defines the full vreg Reg, as opposed to just defining a subregister.
static cl::opt< unsigned > LateRematUpdateThreshold("late-remat-update-threshold", cl::Hidden, cl::desc("During rematerialization for a copy, if the def instruction has " "many other copy uses to be rematerialized, delay the multiple " "separate live interval update work and do them all at once after " "all those rematerialization are done. It will save a lot of " "repeated work. "), cl::init(100))
static cl::opt< bool > UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), cl::init(false), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static DenseMap< Register, std::vector< std::pair< SlotIndex, MachineInstr * > > > buildVRegToDbgValueMap(MachineFunction &MF, const LiveIntervals *Liveness)
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS)
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
bool test(unsigned Idx) const
Definition: BitVector.h:461
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
A helper class for register coalescers.
bool flip()
Swap SrcReg and DstReg.
bool isCoalescable(const MachineInstr *) const
Return true if MI is a copy instruction that will become an identity copy after coalescing.
bool setRegisters(const MachineInstr *)
Set registers to match the copy instruction MI.
This class represents an Operation in the Expression.
The location of a single variable, composed of an expression and 0 or more DbgValueLocEntries.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
bool isAsCheapAsAMove(const MachineInstr &MI) const override
A live range for subregisters.
Definition: LiveInterval.h:694
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
Register reg() const
Definition: LiveInterval.h:718
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
SubRange * createSubRangeFrom(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, const LiveRange &CopyFrom)
Like createSubRange() but the new range is filled with a copy of the liveness information in CopyFrom...
Definition: LiveInterval.h:801
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply, const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, unsigned ComposeSubRegIdx=0)
Refines the subranges to support LaneMask.
void computeSubRangeUndefs(SmallVectorImpl< SlotIndex > &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const
For a given lane mask LaneMask, compute indexes at which the lane is marked undefined by subregister ...
SubRange * createSubRange(BumpPtrAllocator &Allocator, LaneBitmask LaneMask)
Creates a new empty subregister live range.
Definition: LiveInterval.h:792
void clearSubRanges()
Removes all subregister liveness information.
bool hasInterval(Register Reg) const
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const
Returns true if VNI is killed by any PHI-def values in LI.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VNInfo::Allocator & getVNInfoAllocator()
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveRange * getCachedRegUnit(unsigned Unit)
Return the live range for register unit Unit if it has already been computed, or nullptr if it hasn't...
LiveInterval & getInterval(Register Reg)
void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl< SlotIndex > *EndPoints)
If LR has a live value at Kill, prune its live range by removing any liveness reachable from Kill.
void removeInterval(Register Reg)
Interval removal.
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos)
Remove value numbers and related live segments starting at position Pos that are part of any liverang...
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
bool isLiveInToMBB(const LiveRange &LR, const MachineBasicBlock *mbb) const
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void print(raw_ostream &O, const Module *=nullptr) const override
Implement the dump method.
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueOutOrDead() const
Returns the value alive at the end of the instruction, if any.
Definition: LiveInterval.h:129
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
Definition: LiveInterval.h:123
VNInfo * valueDefined() const
Return the value defined by this instruction, if any.
Definition: LiveInterval.h:135
SlotIndex endPoint() const
Return the end point of the last live range segment to interact with the instruction,...
Definition: LiveInterval.h:147
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
Callback methods for LiveRangeEdit owners.
Definition: LiveRangeEdit.h:45
virtual void LRE_WillEraseInstruction(MachineInstr *MI)
Called immediately before erasing a dead machine instruction.
Definition: LiveRangeEdit.h:52
void eliminateDeadDefs(SmallVectorImpl< MachineInstr * > &Dead, ArrayRef< Register > RegsBeingSpilled=std::nullopt)
eliminateDeadDefs - Try to delete machine instructions that are now dead (allDefsAreDead returns true...
SlotIndex rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, const Remat &RM, const TargetRegisterInfo &, bool Late=false, unsigned SubIdx=0, MachineInstr *ReplaceIndexMI=nullptr)
rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an instruction into MBB before...
bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI)
checkRematerializable - Manually add VNI to the list of rematerializable values if DefMI may be remat...
bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx, bool cheapAsAMove)
canRematerializeAt - Determine if ParentVNI can be rematerialized at UseIdx.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:317
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void join(LiveRange &Other, const int *ValNoAssignments, const int *RHSValNoAssignments, SmallVectorImpl< VNInfo * > &NewVNInfo)
join - Join two live ranges (this, and other) together.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc)
createDeadDef - Make sure the range has a value defined at Def.
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
void verify() const
Walk the range and assert if any invariants fail to hold.
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:542
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
VNInfo * MergeValueNumberInto(VNInfo *V1, VNInfo *V2)
MergeValueNumberInto - This method is called when two value numbers are found to be equivalent.
unsigned getNumValNums() const
Definition: LiveInterval.h:313
iterator begin()
Definition: LiveInterval.h:215
Segments segments
Definition: LiveInterval.h:203
VNInfoList valnos
Definition: LiveInterval.h:204
bool containsOneValue() const
Definition: LiveInterval.h:311
size_t size() const
Definition: LiveInterval.h:305
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none.
Definition: LiveInterval.h:436
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
MCRegUnitRootIterator enumerates the root registers of a register unit.
bool isValid() const
Check if the iterator is at the end of the list.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
void setRegisterDefReadUndef(Register Reg, bool IsUndef=true)
Mark all subregister defs of register Reg with the undef flag.
bool isSafeToMove(AAResults *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool isImplicitDef() const
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:329
bool isCopyLike() const
Return true if the instruction behaves like a copy.
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
std::pair< bool, bool > readsWritesVirtualRegister(Register Reg, SmallVectorImpl< unsigned > *Ops=nullptr) const
Return a pair of bools (reads, writes) indicating if this instruction reads or writes Reg.
bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx=nullptr) const
Return true if the use operand of the specified index is tied to a def operand.
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:543
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:662
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:475
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
Definition: MachineInstr.h:733
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
bool allDefsAreDead() const
Return true if all the defs of this instruction are dead.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
void setIsUndef(bool Val=true)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class repr