LLVM 17.0.0git
RegisterCoalescer.cpp
Go to the documentation of this file.
1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the generic RegisterCoalescer interface which
10// is used as the common interface used by all clients and
11// implementations of register coalescing.
12//
13//===----------------------------------------------------------------------===//
14
15#include "RegisterCoalescer.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/DenseSet.h"
19#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/CodeGen/Passes.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/LaneBitmask.h"
45#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
53#include <algorithm>
54#include <cassert>
55#include <iterator>
56#include <limits>
57#include <tuple>
58#include <utility>
59#include <vector>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "regalloc"
64
65STATISTIC(numJoins , "Number of interval joins performed");
66STATISTIC(numCrossRCs , "Number of cross class joins performed");
67STATISTIC(numCommutes , "Number of instruction commuting performed");
68STATISTIC(numExtends , "Number of copies extended");
69STATISTIC(NumReMats , "Number of instructions re-materialized");
70STATISTIC(NumInflated , "Number of register classes inflated");
71STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
73STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
74
75static cl::opt<bool> EnableJoining("join-liveintervals",
76 cl::desc("Coalesce copies (default=true)"),
77 cl::init(true), cl::Hidden);
78
79static cl::opt<bool> UseTerminalRule("terminal-rule",
80 cl::desc("Apply the terminal rule"),
81 cl::init(false), cl::Hidden);
82
83/// Temporary flag to test critical edge unsplitting.
84static cl::opt<bool>
85EnableJoinSplits("join-splitedges",
86 cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87
88/// Temporary flag to test global copy optimization.
90EnableGlobalCopies("join-globalcopies",
91 cl::desc("Coalesce copies that span blocks (default=subtarget)"),
93
94static cl::opt<bool>
95VerifyCoalescing("verify-coalescing",
96 cl::desc("Verify machine instrs before and after register coalescing"),
98
100 "late-remat-update-threshold", cl::Hidden,
101 cl::desc("During rematerialization for a copy, if the def instruction has "
102 "many other copy uses to be rematerialized, delay the multiple "
103 "separate live interval update work and do them all at once after "
104 "all those rematerialization are done. It will save a lot of "
105 "repeated work. "),
106 cl::init(100));
107
109 "large-interval-size-threshold", cl::Hidden,
110 cl::desc("If the valnos size of an interval is larger than the threshold, "
111 "it is regarded as a large interval. "),
112 cl::init(100));
113
115 "large-interval-freq-threshold", cl::Hidden,
116 cl::desc("For a large interval, if it is coalesed with other live "
117 "intervals many times more than the threshold, stop its "
118 "coalescing to control the compile time. "),
119 cl::init(100));
120
121namespace {
122
123 class JoinVals;
124
125 class RegisterCoalescer : public MachineFunctionPass,
127 MachineFunction* MF = nullptr;
128 MachineRegisterInfo* MRI = nullptr;
129 const TargetRegisterInfo* TRI = nullptr;
130 const TargetInstrInfo* TII = nullptr;
131 LiveIntervals *LIS = nullptr;
132 const MachineLoopInfo* Loops = nullptr;
133 AliasAnalysis *AA = nullptr;
134 RegisterClassInfo RegClassInfo;
135
136 /// Position and VReg of a PHI instruction during coalescing.
137 struct PHIValPos {
138 SlotIndex SI; ///< Slot where this PHI occurs.
139 Register Reg; ///< VReg the PHI occurs in.
140 unsigned SubReg; ///< Qualifying subregister for Reg.
141 };
142
143 /// Map from debug instruction number to PHI position during coalescing.
145 /// Index of, for each VReg, which debug instruction numbers and
146 /// corresponding PHIs are sensitive to coalescing. Each VReg may have
147 /// multiple PHI defs, at different positions.
149
150 /// Debug variable location tracking -- for each VReg, maintain an
151 /// ordered-by-slot-index set of DBG_VALUEs, to help quick
152 /// identification of whether coalescing may change location validity.
153 using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
155
156 /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
157 /// To avoid repeatedly merging sets of DbgValueLocs, instead record
158 /// which vregs have been coalesced, and where to. This map is from
159 /// vreg => {set of vregs merged in}.
161
162 /// A LaneMask to remember on which subregister live ranges we need to call
163 /// shrinkToUses() later.
164 LaneBitmask ShrinkMask;
165
166 /// True if the main range of the currently coalesced intervals should be
167 /// checked for smaller live intervals.
168 bool ShrinkMainRange = false;
169
170 /// True if the coalescer should aggressively coalesce global copies
171 /// in favor of keeping local copies.
172 bool JoinGlobalCopies = false;
173
174 /// True if the coalescer should aggressively coalesce fall-thru
175 /// blocks exclusively containing copies.
176 bool JoinSplitEdges = false;
177
178 /// Copy instructions yet to be coalesced.
180 SmallVector<MachineInstr*, 8> LocalWorkList;
181
182 /// Set of instruction pointers that have been erased, and
183 /// that may be present in WorkList.
185
186 /// Dead instructions that are about to be deleted.
188
189 /// Virtual registers to be considered for register class inflation.
190 SmallVector<Register, 8> InflateRegs;
191
192 /// The collection of live intervals which should have been updated
193 /// immediately after rematerialiation but delayed until
194 /// lateLiveIntervalUpdate is called.
195 DenseSet<Register> ToBeUpdated;
196
197 /// Record how many times the large live interval with many valnos
198 /// has been tried to join with other live interval.
199 DenseMap<Register, unsigned long> LargeLIVisitCounter;
200
201 /// Recursively eliminate dead defs in DeadDefs.
202 void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
203
204 /// LiveRangeEdit callback for eliminateDeadDefs().
206
207 /// Coalesce the LocalWorkList.
208 void coalesceLocals();
209
210 /// Join compatible live intervals
211 void joinAllIntervals();
212
213 /// Coalesce copies in the specified MBB, putting
214 /// copies that cannot yet be coalesced into WorkList.
215 void copyCoalesceInMBB(MachineBasicBlock *MBB);
216
217 /// Tries to coalesce all copies in CurrList. Returns true if any progress
218 /// was made.
219 bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
220
221 /// If one def has many copy like uses, and those copy uses are all
222 /// rematerialized, the live interval update needed for those
223 /// rematerializations will be delayed and done all at once instead
224 /// of being done multiple times. This is to save compile cost because
225 /// live interval update is costly.
226 void lateLiveIntervalUpdate();
227
228 /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
229 /// has no value defined in the predecessors. If the incoming value is the
230 /// same as defined by the copy itself, the value is considered undefined.
231 bool copyValueUndefInPredecessors(LiveRange &S,
232 const MachineBasicBlock *MBB,
233 LiveQueryResult SLRQ);
234
235 /// Set necessary undef flags on subregister uses after pruning out undef
236 /// lane segments from the subrange.
237 void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
238 LaneBitmask PrunedLanes);
239
240 /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
241 /// src/dst of the copy instruction CopyMI. This returns true if the copy
242 /// was successfully coalesced away. If it is not currently possible to
243 /// coalesce this interval, but it may be possible if other things get
244 /// coalesced, then it returns true by reference in 'Again'.
245 bool joinCopy(MachineInstr *CopyMI, bool &Again);
246
247 /// Attempt to join these two intervals. On failure, this
248 /// returns false. The output "SrcInt" will not have been modified, so we
249 /// can use this information below to update aliases.
250 bool joinIntervals(CoalescerPair &CP);
251
252 /// Attempt joining two virtual registers. Return true on success.
253 bool joinVirtRegs(CoalescerPair &CP);
254
255 /// If a live interval has many valnos and is coalesced with other
256 /// live intervals many times, we regard such live interval as having
257 /// high compile time cost.
258 bool isHighCostLiveInterval(LiveInterval &LI);
259
260 /// Attempt joining with a reserved physreg.
261 bool joinReservedPhysReg(CoalescerPair &CP);
262
263 /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
264 /// Subranges in @p LI which only partially interfere with the desired
265 /// LaneMask are split as necessary. @p LaneMask are the lanes that
266 /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
267 /// lanemasks already adjusted to the coalesced register.
268 void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
269 LaneBitmask LaneMask, CoalescerPair &CP,
270 unsigned DstIdx);
271
272 /// Join the liveranges of two subregisters. Joins @p RRange into
273 /// @p LRange, @p RRange may be invalid afterwards.
274 void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
275 LaneBitmask LaneMask, const CoalescerPair &CP);
276
277 /// We found a non-trivially-coalescable copy. If the source value number is
278 /// defined by a copy from the destination reg see if we can merge these two
279 /// destination reg valno# into a single value number, eliminating a copy.
280 /// This returns true if an interval was modified.
281 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
282
283 /// Return true if there are definitions of IntB
284 /// other than BValNo val# that can reach uses of AValno val# of IntA.
285 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
286 VNInfo *AValNo, VNInfo *BValNo);
287
288 /// We found a non-trivially-coalescable copy.
289 /// If the source value number is defined by a commutable instruction and
290 /// its other operand is coalesced to the copy dest register, see if we
291 /// can transform the copy into a noop by commuting the definition.
292 /// This returns a pair of two flags:
293 /// - the first element is true if an interval was modified,
294 /// - the second element is true if the destination interval needs
295 /// to be shrunk after deleting the copy.
296 std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
297 MachineInstr *CopyMI);
298
299 /// We found a copy which can be moved to its less frequent predecessor.
300 bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
301
302 /// If the source of a copy is defined by a
303 /// trivial computation, replace the copy by rematerialize the definition.
304 bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
305 bool &IsDefCopy);
306
307 /// Return true if a copy involving a physreg should be joined.
308 bool canJoinPhys(const CoalescerPair &CP);
309
310 /// Replace all defs and uses of SrcReg to DstReg and update the subregister
311 /// number if it is not zero. If DstReg is a physical register and the
312 /// existing subregister number of the def / use being updated is not zero,
313 /// make sure to set it to the correct physical subregister.
314 void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
315
316 /// If the given machine operand reads only undefined lanes add an undef
317 /// flag.
318 /// This can happen when undef uses were previously concealed by a copy
319 /// which we coalesced. Example:
320 /// %0:sub0<def,read-undef> = ...
321 /// %1 = COPY %0 <-- Coalescing COPY reveals undef
322 /// = use %1:sub1 <-- hidden undef use
323 void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
324 MachineOperand &MO, unsigned SubRegIdx);
325
326 /// Handle copies of undef values. If the undef value is an incoming
327 /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
328 /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
329 /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
330 MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
331
332 /// Check whether or not we should apply the terminal rule on the
333 /// destination (Dst) of \p Copy.
334 /// When the terminal rule applies, Copy is not profitable to
335 /// coalesce.
336 /// Dst is terminal if it has exactly one affinity (Dst, Src) and
337 /// at least one interference (Dst, Dst2). If Dst is terminal, the
338 /// terminal rule consists in checking that at least one of
339 /// interfering node, say Dst2, has an affinity of equal or greater
340 /// weight with Src.
341 /// In that case, Dst2 and Dst will not be able to be both coalesced
342 /// with Src. Since Dst2 exposes more coalescing opportunities than
343 /// Dst, we can drop \p Copy.
344 bool applyTerminalRule(const MachineInstr &Copy) const;
345
346 /// Wrapper method for \see LiveIntervals::shrinkToUses.
347 /// This method does the proper fixing of the live-ranges when the afore
348 /// mentioned method returns true.
349 void shrinkToUses(LiveInterval *LI,
350 SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
351 NumShrinkToUses++;
352 if (LIS->shrinkToUses(LI, Dead)) {
353 /// Check whether or not \p LI is composed by multiple connected
354 /// components and if that is the case, fix that.
356 LIS->splitSeparateComponents(*LI, SplitLIs);
357 }
358 }
359
360 /// Wrapper Method to do all the necessary work when an Instruction is
361 /// deleted.
362 /// Optimizations should use this to make sure that deleted instructions
363 /// are always accounted for.
364 void deleteInstr(MachineInstr* MI) {
365 ErasedInstrs.insert(MI);
367 MI->eraseFromParent();
368 }
369
370 /// Walk over function and initialize the DbgVRegToValues map.
371 void buildVRegToDbgValueMap(MachineFunction &MF);
372
373 /// Test whether, after merging, any DBG_VALUEs would refer to a
374 /// different value number than before merging, and whether this can
375 /// be resolved. If not, mark the DBG_VALUE as being undef.
376 void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
377 JoinVals &LHSVals, LiveRange &RHS,
378 JoinVals &RHSVals);
379
380 void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
381 LiveRange &RegRange, JoinVals &Vals2);
382
383 public:
384 static char ID; ///< Class identification, replacement for typeinfo
385
386 RegisterCoalescer() : MachineFunctionPass(ID) {
388 }
389
390 void getAnalysisUsage(AnalysisUsage &AU) const override;
391
392 void releaseMemory() override;
393
394 /// This is the pass entry point.
396
397 /// Implement the dump method.
398 void print(raw_ostream &O, const Module* = nullptr) const override;
399 };
400
401} // end anonymous namespace
402
403char RegisterCoalescer::ID = 0;
404
405char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
406
407INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
408 "Simple Register Coalescing", false, false)
413INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
415
416[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
418 Register &Dst, unsigned &SrcSub,
419 unsigned &DstSub) {
420 if (MI->isCopy()) {
421 Dst = MI->getOperand(0).getReg();
422 DstSub = MI->getOperand(0).getSubReg();
423 Src = MI->getOperand(1).getReg();
424 SrcSub = MI->getOperand(1).getSubReg();
425 } else if (MI->isSubregToReg()) {
426 Dst = MI->getOperand(0).getReg();
427 DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
428 MI->getOperand(3).getImm());
429 Src = MI->getOperand(2).getReg();
430 SrcSub = MI->getOperand(2).getSubReg();
431 } else
432 return false;
433 return true;
434}
435
436/// Return true if this block should be vacated by the coalescer to eliminate
437/// branches. The important cases to handle in the coalescer are critical edges
438/// split during phi elimination which contain only copies. Simple blocks that
439/// contain non-branches should also be vacated, but this can be handled by an
440/// earlier pass similar to early if-conversion.
441static bool isSplitEdge(const MachineBasicBlock *MBB) {
442 if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
443 return false;
444
445 for (const auto &MI : *MBB) {
446 if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
447 return false;
448 }
449 return true;
450}
451
453 SrcReg = DstReg = Register();
454 SrcIdx = DstIdx = 0;
455 NewRC = nullptr;
456 Flipped = CrossClass = false;
457
458 Register Src, Dst;
459 unsigned SrcSub = 0, DstSub = 0;
460 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
461 return false;
462 Partial = SrcSub || DstSub;
463
464 // If one register is a physreg, it must be Dst.
465 if (Src.isPhysical()) {
466 if (Dst.isPhysical())
467 return false;
468 std::swap(Src, Dst);
469 std::swap(SrcSub, DstSub);
470 Flipped = true;
471 }
472
473 const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
474
475 if (Dst.isPhysical()) {
476 // Eliminate DstSub on a physreg.
477 if (DstSub) {
478 Dst = TRI.getSubReg(Dst, DstSub);
479 if (!Dst) return false;
480 DstSub = 0;
481 }
482
483 // Eliminate SrcSub by picking a corresponding Dst superregister.
484 if (SrcSub) {
485 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
486 if (!Dst) return false;
487 } else if (!MRI.getRegClass(Src)->contains(Dst)) {
488 return false;
489 }
490 } else {
491 // Both registers are virtual.
492 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
493 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
494
495 // Both registers have subreg indices.
496 if (SrcSub && DstSub) {
497 // Copies between different sub-registers are never coalescable.
498 if (Src == Dst && SrcSub != DstSub)
499 return false;
500
501 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
502 SrcIdx, DstIdx);
503 if (!NewRC)
504 return false;
505 } else if (DstSub) {
506 // SrcReg will be merged with a sub-register of DstReg.
507 SrcIdx = DstSub;
508 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
509 } else if (SrcSub) {
510 // DstReg will be merged with a sub-register of SrcReg.
511 DstIdx = SrcSub;
512 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
513 } else {
514 // This is a straight copy without sub-registers.
515 NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
516 }
517
518 // The combined constraint may be impossible to satisfy.
519 if (!NewRC)
520 return false;
521
522 // Prefer SrcReg to be a sub-register of DstReg.
523 // FIXME: Coalescer should support subregs symmetrically.
524 if (DstIdx && !SrcIdx) {
525 std::swap(Src, Dst);
526 std::swap(SrcIdx, DstIdx);
527 Flipped = !Flipped;
528 }
529
530 CrossClass = NewRC != DstRC || NewRC != SrcRC;
531 }
532 // Check our invariants
533 assert(Src.isVirtual() && "Src must be virtual");
534 assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
535 SrcReg = Src;
536 DstReg = Dst;
537 return true;
538}
539
541 if (DstReg.isPhysical())
542 return false;
543 std::swap(SrcReg, DstReg);
544 std::swap(SrcIdx, DstIdx);
545 Flipped = !Flipped;
546 return true;
547}
548
550 if (!MI)
551 return false;
552 Register Src, Dst;
553 unsigned SrcSub = 0, DstSub = 0;
554 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
555 return false;
556
557 // Find the virtual register that is SrcReg.
558 if (Dst == SrcReg) {
559 std::swap(Src, Dst);
560 std::swap(SrcSub, DstSub);
561 } else if (Src != SrcReg) {
562 return false;
563 }
564
565 // Now check that Dst matches DstReg.
566 if (DstReg.isPhysical()) {
567 if (!Dst.isPhysical())
568 return false;
569 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
570 // DstSub could be set for a physreg from INSERT_SUBREG.
571 if (DstSub)
572 Dst = TRI.getSubReg(Dst, DstSub);
573 // Full copy of Src.
574 if (!SrcSub)
575 return DstReg == Dst;
576 // This is a partial register copy. Check that the parts match.
577 return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
578 } else {
579 // DstReg is virtual.
580 if (DstReg != Dst)
581 return false;
582 // Registers match, do the subregisters line up?
583 return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
584 TRI.composeSubRegIndices(DstIdx, DstSub);
585 }
586}
587
588void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
589 AU.setPreservesCFG();
598}
599
600void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
601 if (Edit) {
602 Edit->eliminateDeadDefs(DeadDefs);
603 return;
604 }
606 LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
607 nullptr, this).eliminateDeadDefs(DeadDefs);
608}
609
610void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
611 // MI may be in WorkList. Make sure we don't visit it.
612 ErasedInstrs.insert(MI);
613}
614
615bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
616 MachineInstr *CopyMI) {
617 assert(!CP.isPartial() && "This doesn't work for partial copies.");
618 assert(!CP.isPhys() && "This doesn't work for physreg copies.");
619
620 LiveInterval &IntA =
621 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
622 LiveInterval &IntB =
623 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
624 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
625
626 // We have a non-trivially-coalescable copy with IntA being the source and
627 // IntB being the dest, thus this defines a value number in IntB. If the
628 // source value number (in IntA) is defined by a copy from B, see if we can
629 // merge these two pieces of B into a single value number, eliminating a copy.
630 // For example:
631 //
632 // A3 = B0
633 // ...
634 // B1 = A3 <- this copy
635 //
636 // In this case, B0 can be extended to where the B1 copy lives, allowing the
637 // B1 value number to be replaced with B0 (which simplifies the B
638 // liveinterval).
639
640 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
641 // the example above.
643 if (BS == IntB.end()) return false;
644 VNInfo *BValNo = BS->valno;
645
646 // Get the location that B is defined at. Two options: either this value has
647 // an unknown definition point or it is defined at CopyIdx. If unknown, we
648 // can't process it.
649 if (BValNo->def != CopyIdx) return false;
650
651 // AValNo is the value number in A that defines the copy, A3 in the example.
652 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
653 LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
654 // The live segment might not exist after fun with physreg coalescing.
655 if (AS == IntA.end()) return false;
656 VNInfo *AValNo = AS->valno;
657
658 // If AValNo is defined as a copy from IntB, we can potentially process this.
659 // Get the instruction that defines this value number.
660 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
661 // Don't allow any partial copies, even if isCoalescable() allows them.
662 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
663 return false;
664
665 // Get the Segment in IntB that this value number starts with.
667 IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
668 if (ValS == IntB.end())
669 return false;
670
671 // Make sure that the end of the live segment is inside the same block as
672 // CopyMI.
673 MachineInstr *ValSEndInst =
674 LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
675 if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
676 return false;
677
678 // Okay, we now know that ValS ends in the same block that the CopyMI
679 // live-range starts. If there are no intervening live segments between them
680 // in IntB, we can merge them.
681 if (ValS+1 != BS) return false;
682
683 LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
684
685 SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
686 // We are about to delete CopyMI, so need to remove it as the 'instruction
687 // that defines this value #'. Update the valnum with the new defining
688 // instruction #.
689 BValNo->def = FillerStart;
690
691 // Okay, we can merge them. We need to insert a new liverange:
692 // [ValS.end, BS.begin) of either value number, then we merge the
693 // two value numbers.
694 IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
695
696 // Okay, merge "B1" into the same value number as "B0".
697 if (BValNo != ValS->valno)
698 IntB.MergeValueNumberInto(BValNo, ValS->valno);
699
700 // Do the same for the subregister segments.
701 for (LiveInterval::SubRange &S : IntB.subranges()) {
702 // Check for SubRange Segments of the form [1234r,1234d:0) which can be
703 // removed to prevent creating bogus SubRange Segments.
704 LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
705 if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
706 S.removeSegment(*SS, true);
707 continue;
708 }
709 // The subrange may have ended before FillerStart. If so, extend it.
710 if (!S.getVNInfoAt(FillerStart)) {
711 SlotIndex BBStart =
712 LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
713 S.extendInBlock(BBStart, FillerStart);
714 }
715 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
716 S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
717 VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
718 if (SubBValNo != SubValSNo)
719 S.MergeValueNumberInto(SubBValNo, SubValSNo);
720 }
721
722 LLVM_DEBUG(dbgs() << " result = " << IntB << '\n');
723
724 // If the source instruction was killing the source register before the
725 // merge, unset the isKill marker given the live range has been extended.
726 int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
727 if (UIdx != -1) {
728 ValSEndInst->getOperand(UIdx).setIsKill(false);
729 }
730
731 // Rewrite the copy.
732 CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
733 // If the copy instruction was killing the destination register or any
734 // subrange before the merge trim the live range.
735 bool RecomputeLiveRange = AS->end == CopyIdx;
736 if (!RecomputeLiveRange) {
737 for (LiveInterval::SubRange &S : IntA.subranges()) {
738 LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
739 if (SS != S.end() && SS->end == CopyIdx) {
740 RecomputeLiveRange = true;
741 break;
742 }
743 }
744 }
745 if (RecomputeLiveRange)
746 shrinkToUses(&IntA);
747
748 ++numExtends;
749 return true;
750}
751
752bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
753 LiveInterval &IntB,
754 VNInfo *AValNo,
755 VNInfo *BValNo) {
756 // If AValNo has PHI kills, conservatively assume that IntB defs can reach
757 // the PHI values.
758 if (LIS->hasPHIKill(IntA, AValNo))
759 return true;
760
761 for (LiveRange::Segment &ASeg : IntA.segments) {
762 if (ASeg.valno != AValNo) continue;
764 if (BI != IntB.begin())
765 --BI;
766 for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
767 if (BI->valno == BValNo)
768 continue;
769 if (BI->start <= ASeg.start && BI->end > ASeg.start)
770 return true;
771 if (BI->start > ASeg.start && BI->start < ASeg.end)
772 return true;
773 }
774 }
775 return false;
776}
777
778/// Copy segments with value number @p SrcValNo from liverange @p Src to live
779/// range @Dst and use value number @p DstValNo there.
780static std::pair<bool,bool>
781addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
782 const VNInfo *SrcValNo) {
783 bool Changed = false;
784 bool MergedWithDead = false;
785 for (const LiveRange::Segment &S : Src.segments) {
786 if (S.valno != SrcValNo)
787 continue;
788 // This is adding a segment from Src that ends in a copy that is about
789 // to be removed. This segment is going to be merged with a pre-existing
790 // segment in Dst. This works, except in cases when the corresponding
791 // segment in Dst is dead. For example: adding [192r,208r:1) from Src
792 // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
793 // Recognized such cases, so that the segments can be shrunk.
794 LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
795 LiveRange::Segment &Merged = *Dst.addSegment(Added);
796 if (Merged.end.isDead())
797 MergedWithDead = true;
798 Changed = true;
799 }
800 return std::make_pair(Changed, MergedWithDead);
801}
802
803std::pair<bool,bool>
804RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
805 MachineInstr *CopyMI) {
806 assert(!CP.isPhys());
807
808 LiveInterval &IntA =
809 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
810 LiveInterval &IntB =
811 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
812
813 // We found a non-trivially-coalescable copy with IntA being the source and
814 // IntB being the dest, thus this defines a value number in IntB. If the
815 // source value number (in IntA) is defined by a commutable instruction and
816 // its other operand is coalesced to the copy dest register, see if we can
817 // transform the copy into a noop by commuting the definition. For example,
818 //
819 // A3 = op A2 killed B0
820 // ...
821 // B1 = A3 <- this copy
822 // ...
823 // = op A3 <- more uses
824 //
825 // ==>
826 //
827 // B2 = op B0 killed A2
828 // ...
829 // B1 = B2 <- now an identity copy
830 // ...
831 // = op B2 <- more uses
832
833 // BValNo is a value number in B that is defined by a copy from A. 'B1' in
834 // the example above.
835 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
836 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
837 assert(BValNo != nullptr && BValNo->def == CopyIdx);
838
839 // AValNo is the value number in A that defines the copy, A3 in the example.
840 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
841 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
842 if (AValNo->isPHIDef())
843 return { false, false };
845 if (!DefMI)
846 return { false, false };
847 if (!DefMI->isCommutable())
848 return { false, false };
849 // If DefMI is a two-address instruction then commuting it will change the
850 // destination register.
851 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
852 assert(DefIdx != -1);
853 unsigned UseOpIdx;
854 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
855 return { false, false };
856
857 // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
858 // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
859 // passed to the method. That _other_ operand is chosen by
860 // the findCommutedOpIndices() method.
861 //
862 // That is obviously an area for improvement in case of instructions having
863 // more than 2 operands. For example, if some instruction has 3 commutable
864 // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
865 // op#2<->op#3) of commute transformation should be considered/tried here.
866 unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
867 if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
868 return { false, false };
869
870 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
871 Register NewReg = NewDstMO.getReg();
872 if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
873 return { false, false };
874
875 // Make sure there are no other definitions of IntB that would reach the
876 // uses which the new definition can reach.
877 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
878 return { false, false };
879
880 // If some of the uses of IntA.reg is already coalesced away, return false.
881 // It's not possible to determine whether it's safe to perform the coalescing.
882 for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
883 MachineInstr *UseMI = MO.getParent();
884 unsigned OpNo = &MO - &UseMI->getOperand(0);
885 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
887 if (US == IntA.end() || US->valno != AValNo)
888 continue;
889 // If this use is tied to a def, we can't rewrite the register.
890 if (UseMI->isRegTiedToDefOperand(OpNo))
891 return { false, false };
892 }
893
894 LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
895 << *DefMI);
896
897 // At this point we have decided that it is legal to do this
898 // transformation. Start by commuting the instruction.
900 MachineInstr *NewMI =
901 TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
902 if (!NewMI)
903 return { false, false };
904 if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
905 !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
906 return { false, false };
907 if (NewMI != DefMI) {
908 LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
910 MBB->insert(Pos, NewMI);
911 MBB->erase(DefMI);
912 }
913
914 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
915 // A = or A, B
916 // ...
917 // B = A
918 // ...
919 // C = killed A
920 // ...
921 // = B
922
923 // Update uses of IntA of the specific Val# with IntB.
924 for (MachineOperand &UseMO :
925 llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
926 if (UseMO.isUndef())
927 continue;
928 MachineInstr *UseMI = UseMO.getParent();
929 if (UseMI->isDebugInstr()) {
930 // FIXME These don't have an instruction index. Not clear we have enough
931 // info to decide whether to do this replacement or not. For now do it.
932 UseMO.setReg(NewReg);
933 continue;
934 }
935 SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
937 assert(US != IntA.end() && "Use must be live");
938 if (US->valno != AValNo)
939 continue;
940 // Kill flags are no longer accurate. They are recomputed after RA.
941 UseMO.setIsKill(false);
942 if (NewReg.isPhysical())
943 UseMO.substPhysReg(NewReg, *TRI);
944 else
945 UseMO.setReg(NewReg);
946 if (UseMI == CopyMI)
947 continue;
948 if (!UseMI->isCopy())
949 continue;
950 if (UseMI->getOperand(0).getReg() != IntB.reg() ||
952 continue;
953
954 // This copy will become a noop. If it's defining a new val#, merge it into
955 // BValNo.
956 SlotIndex DefIdx = UseIdx.getRegSlot();
957 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
958 if (!DVNI)
959 continue;
960 LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
961 assert(DVNI->def == DefIdx);
962 BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
963 for (LiveInterval::SubRange &S : IntB.subranges()) {
964 VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
965 if (!SubDVNI)
966 continue;
967 VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
968 assert(SubBValNo->def == CopyIdx);
969 S.MergeValueNumberInto(SubDVNI, SubBValNo);
970 }
971
972 deleteInstr(UseMI);
973 }
974
975 // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
976 // is updated.
977 bool ShrinkB = false;
979 if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
980 if (!IntA.hasSubRanges()) {
981 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
982 IntA.createSubRangeFrom(Allocator, Mask, IntA);
983 } else if (!IntB.hasSubRanges()) {
984 LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
985 IntB.createSubRangeFrom(Allocator, Mask, IntB);
986 }
987 SlotIndex AIdx = CopyIdx.getRegSlot(true);
988 LaneBitmask MaskA;
989 const SlotIndexes &Indexes = *LIS->getSlotIndexes();
990 for (LiveInterval::SubRange &SA : IntA.subranges()) {
991 VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
992 // Even if we are dealing with a full copy, some lanes can
993 // still be undefined.
994 // E.g.,
995 // undef A.subLow = ...
996 // B = COPY A <== A.subHigh is undefined here and does
997 // not have a value number.
998 if (!ASubValNo)
999 continue;
1000 MaskA |= SA.LaneMask;
1001
1002 IntB.refineSubRanges(
1003 Allocator, SA.LaneMask,
1004 [&Allocator, &SA, CopyIdx, ASubValNo,
1005 &ShrinkB](LiveInterval::SubRange &SR) {
1006 VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1007 : SR.getVNInfoAt(CopyIdx);
1008 assert(BSubValNo != nullptr);
1009 auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1010 ShrinkB |= P.second;
1011 if (P.first)
1012 BSubValNo->def = ASubValNo->def;
1013 },
1014 Indexes, *TRI);
1015 }
1016 // Go over all subranges of IntB that have not been covered by IntA,
1017 // and delete the segments starting at CopyIdx. This can happen if
1018 // IntA has undef lanes that are defined in IntB.
1019 for (LiveInterval::SubRange &SB : IntB.subranges()) {
1020 if ((SB.LaneMask & MaskA).any())
1021 continue;
1022 if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1023 if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1024 SB.removeSegment(*S, true);
1025 }
1026 }
1027
1028 BValNo->def = AValNo->def;
1029 auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1030 ShrinkB |= P.second;
1031 LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1032
1033 LIS->removeVRegDefAt(IntA, AValNo->def);
1034
1035 LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
1036 ++numCommutes;
1037 return { true, ShrinkB };
1038}
1039
1040/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1041/// predecessor of BB2, and if B is not redefined on the way from A = B
1042/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1043/// execution goes through the path from BB0 to BB2. We may move B = A
1044/// to the predecessor without such reversed copy.
1045/// So we will transform the program from:
1046/// BB0:
1047/// A = B; BB1:
1048/// ... ...
1049/// / \ /
1050/// BB2:
1051/// ...
1052/// B = A;
1053///
1054/// to:
1055///
1056/// BB0: BB1:
1057/// A = B; ...
1058/// ... B = A;
1059/// / \ /
1060/// BB2:
1061/// ...
1062///
1063/// A special case is when BB0 and BB2 are the same BB which is the only
1064/// BB in a loop:
1065/// BB1:
1066/// ...
1067/// BB0/BB2: ----
1068/// B = A; |
1069/// ... |
1070/// A = B; |
1071/// |-------
1072/// |
1073/// We may hoist B = A from BB0/BB2 to BB1.
1074///
1075/// The major preconditions for correctness to remove such partial
1076/// redundancy include:
1077/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1078/// the PHI is defined by the reversed copy A = B in BB0.
1079/// 2. No B is referenced from the start of BB2 to B = A.
1080/// 3. No B is defined from A = B to the end of BB0.
1081/// 4. BB1 has only one successor.
1082///
1083/// 2 and 4 implicitly ensure B is not live at the end of BB1.
1084/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1085/// colder place, which not only prevent endless loop, but also make sure
1086/// the movement of copy is beneficial.
1087bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1088 MachineInstr &CopyMI) {
1089 assert(!CP.isPhys());
1090 if (!CopyMI.isFullCopy())
1091 return false;
1092
1093 MachineBasicBlock &MBB = *CopyMI.getParent();
1094 // If this block is the target of an invoke/inlineasm_br, moving the copy into
1095 // the predecessor is tricker, and we don't handle it.
1097 return false;
1098
1099 if (MBB.pred_size() != 2)
1100 return false;
1101
1102 LiveInterval &IntA =
1103 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1104 LiveInterval &IntB =
1105 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1106
1107 // A is defined by PHI at the entry of MBB.
1108 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1109 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1110 assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1111 if (!AValNo->isPHIDef())
1112 return false;
1113
1114 // No B is referenced before CopyMI in MBB.
1115 if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1116 return false;
1117
1118 // MBB has two predecessors: one contains A = B so no copy will be inserted
1119 // for it. The other one will have a copy moved from MBB.
1120 bool FoundReverseCopy = false;
1121 MachineBasicBlock *CopyLeftBB = nullptr;
1122 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1123 VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1125 if (!DefMI || !DefMI->isFullCopy()) {
1126 CopyLeftBB = Pred;
1127 continue;
1128 }
1129 // Check DefMI is a reverse copy and it is in BB Pred.
1130 if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1131 DefMI->getOperand(1).getReg() != IntB.reg() ||
1132 DefMI->getParent() != Pred) {
1133 CopyLeftBB = Pred;
1134 continue;
1135 }
1136 // If there is any other def of B after DefMI and before the end of Pred,
1137 // we need to keep the copy of B = A at the end of Pred if we remove
1138 // B = A from MBB.
1139 bool ValB_Changed = false;
1140 for (auto *VNI : IntB.valnos) {
1141 if (VNI->isUnused())
1142 continue;
1143 if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1144 ValB_Changed = true;
1145 break;
1146 }
1147 }
1148 if (ValB_Changed) {
1149 CopyLeftBB = Pred;
1150 continue;
1151 }
1152 FoundReverseCopy = true;
1153 }
1154
1155 // If no reverse copy is found in predecessors, nothing to do.
1156 if (!FoundReverseCopy)
1157 return false;
1158
1159 // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1160 // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1161 // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1162 // update IntA/IntB.
1163 //
1164 // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1165 // MBB is hotter than CopyLeftBB.
1166 if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1167 return false;
1168
1169 // Now (almost sure it's) ok to move copy.
1170 if (CopyLeftBB) {
1171 // Position in CopyLeftBB where we should insert new copy.
1172 auto InsPos = CopyLeftBB->getFirstTerminator();
1173
1174 // Make sure that B isn't referenced in the terminators (if any) at the end
1175 // of the predecessor since we're about to insert a new definition of B
1176 // before them.
1177 if (InsPos != CopyLeftBB->end()) {
1178 SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1179 if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1180 return false;
1181 }
1182
1183 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1184 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1185
1186 // Insert new copy to CopyLeftBB.
1187 MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1188 TII->get(TargetOpcode::COPY), IntB.reg())
1189 .addReg(IntA.reg());
1190 SlotIndex NewCopyIdx =
1191 LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1192 IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1193 for (LiveInterval::SubRange &SR : IntB.subranges())
1194 SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1195
1196 // If the newly created Instruction has an address of an instruction that was
1197 // deleted before (object recycled by the allocator) it needs to be removed from
1198 // the deleted list.
1199 ErasedInstrs.erase(NewCopyMI);
1200 } else {
1201 LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1202 << printMBBReference(MBB) << '\t' << CopyMI);
1203 }
1204
1205 // Remove CopyMI.
1206 // Note: This is fine to remove the copy before updating the live-ranges.
1207 // While updating the live-ranges, we only look at slot indices and
1208 // never go back to the instruction.
1209 // Mark instructions as deleted.
1210 deleteInstr(&CopyMI);
1211
1212 // Update the liveness.
1213 SmallVector<SlotIndex, 8> EndPoints;
1214 VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1215 LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1216 &EndPoints);
1217 BValNo->markUnused();
1218 // Extend IntB to the EndPoints of its original live interval.
1219 LIS->extendToIndices(IntB, EndPoints);
1220
1221 // Now, do the same for its subranges.
1222 for (LiveInterval::SubRange &SR : IntB.subranges()) {
1223 EndPoints.clear();
1224 VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1225 assert(BValNo && "All sublanes should be live");
1226 LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1227 BValNo->markUnused();
1228 // We can have a situation where the result of the original copy is live,
1229 // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1230 // the copy appear as an endpoint from pruneValue(), but we don't want it
1231 // to because the copy has been removed. We can go ahead and remove that
1232 // endpoint; there is no other situation here that there could be a use at
1233 // the same place as we know that the copy is a full copy.
1234 for (unsigned I = 0; I != EndPoints.size(); ) {
1235 if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1236 EndPoints[I] = EndPoints.back();
1237 EndPoints.pop_back();
1238 continue;
1239 }
1240 ++I;
1241 }
1243 IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1244 *LIS->getSlotIndexes());
1245 LIS->extendToIndices(SR, EndPoints, Undefs);
1246 }
1247 // If any dead defs were extended, truncate them.
1248 shrinkToUses(&IntB);
1249
1250 // Finally, update the live-range of IntA.
1251 shrinkToUses(&IntA);
1252 return true;
1253}
1254
1255/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1256/// defining a subregister.
1257static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1258 assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1259
1260 for (const MachineOperand &Op : MI.operands()) {
1261 if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
1262 continue;
1263 // Return true if we define the full register or don't care about the value
1264 // inside other subregisters.
1265 if (Op.getSubReg() == 0 || Op.isUndef())
1266 return true;
1267 }
1268 return false;
1269}
1270
1271bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1272 MachineInstr *CopyMI,
1273 bool &IsDefCopy) {
1274 IsDefCopy = false;
1275 Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1276 unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1277 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1278 unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1279 if (SrcReg.isPhysical())
1280 return false;
1281
1282 LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1283 SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1284 VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1285 if (!ValNo)
1286 return false;
1287 if (ValNo->isPHIDef() || ValNo->isUnused())
1288 return false;
1290 if (!DefMI)
1291 return false;
1292 if (DefMI->isCopyLike()) {
1293 IsDefCopy = true;
1294 return false;
1295 }
1296 if (!TII->isAsCheapAsAMove(*DefMI))
1297 return false;
1298
1300 LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1301 if (!Edit.checkRematerializable(ValNo, DefMI))
1302 return false;
1303
1304 if (!definesFullReg(*DefMI, SrcReg))
1305 return false;
1306 bool SawStore = false;
1307 if (!DefMI->isSafeToMove(AA, SawStore))
1308 return false;
1309 const MCInstrDesc &MCID = DefMI->getDesc();
1310 if (MCID.getNumDefs() != 1)
1311 return false;
1312 // Only support subregister destinations when the def is read-undef.
1313 MachineOperand &DstOperand = CopyMI->getOperand(0);
1314 Register CopyDstReg = DstOperand.getReg();
1315 if (DstOperand.getSubReg() && !DstOperand.isUndef())
1316 return false;
1317
1318 // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1319 // the register substantially (beyond both source and dest size). This is bad
1320 // for performance since it can cascade through a function, introducing many
1321 // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1322 // around after a few subreg copies).
1323 if (SrcIdx && DstIdx)
1324 return false;
1325
1326 const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1327 if (!DefMI->isImplicitDef()) {
1328 if (DstReg.isPhysical()) {
1329 Register NewDstReg = DstReg;
1330
1331 unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1332 DefMI->getOperand(0).getSubReg());
1333 if (NewDstIdx)
1334 NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1335
1336 // Finally, make sure that the physical subregister that will be
1337 // constructed later is permitted for the instruction.
1338 if (!DefRC->contains(NewDstReg))
1339 return false;
1340 } else {
1341 // Theoretically, some stack frame reference could exist. Just make sure
1342 // it hasn't actually happened.
1343 assert(DstReg.isVirtual() &&
1344 "Only expect to deal with virtual or physical registers");
1345 }
1346 }
1347
1348 LiveRangeEdit::Remat RM(ValNo);
1349 RM.OrigMI = DefMI;
1350 if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1351 return false;
1352
1353 DebugLoc DL = CopyMI->getDebugLoc();
1354 MachineBasicBlock *MBB = CopyMI->getParent();
1356 std::next(MachineBasicBlock::iterator(CopyMI));
1357 Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1358 MachineInstr &NewMI = *std::prev(MII);
1359 NewMI.setDebugLoc(DL);
1360
1361 // In a situation like the following:
1362 // %0:subreg = instr ; DefMI, subreg = DstIdx
1363 // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0
1364 // instead of widening %1 to the register class of %0 simply do:
1365 // %1 = instr
1366 const TargetRegisterClass *NewRC = CP.getNewRC();
1367 if (DstIdx != 0) {
1368 MachineOperand &DefMO = NewMI.getOperand(0);
1369 if (DefMO.getSubReg() == DstIdx) {
1370 assert(SrcIdx == 0 && CP.isFlipped()
1371 && "Shouldn't have SrcIdx+DstIdx at this point");
1372 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1373 const TargetRegisterClass *CommonRC =
1374 TRI->getCommonSubClass(DefRC, DstRC);
1375 if (CommonRC != nullptr) {
1376 NewRC = CommonRC;
1377
1378 // Instruction might contain "undef %0:subreg" as use operand:
1379 // %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1380 //
1381 // Need to check all operands.
1382 for (MachineOperand &MO : NewMI.operands()) {
1383 if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1384 MO.setSubReg(0);
1385 }
1386 }
1387
1388 DstIdx = 0;
1389 DefMO.setIsUndef(false); // Only subregs can have def+undef.
1390 }
1391 }
1392 }
1393
1394 // CopyMI may have implicit operands, save them so that we can transfer them
1395 // over to the newly materialized instruction after CopyMI is removed.
1397 ImplicitOps.reserve(CopyMI->getNumOperands() -
1398 CopyMI->getDesc().getNumOperands());
1399 for (unsigned I = CopyMI->getDesc().getNumOperands(),
1400 E = CopyMI->getNumOperands();
1401 I != E; ++I) {
1402 MachineOperand &MO = CopyMI->getOperand(I);
1403 if (MO.isReg()) {
1404 assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1405 // Discard VReg implicit defs.
1406 if (MO.getReg().isPhysical())
1407 ImplicitOps.push_back(MO);
1408 }
1409 }
1410
1411 CopyMI->eraseFromParent();
1412 ErasedInstrs.insert(CopyMI);
1413
1414 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1415 // We need to remember these so we can add intervals once we insert
1416 // NewMI into SlotIndexes.
1417 SmallVector<MCRegister, 4> NewMIImplDefs;
1418 for (unsigned i = NewMI.getDesc().getNumOperands(),
1419 e = NewMI.getNumOperands();
1420 i != e; ++i) {
1421 MachineOperand &MO = NewMI.getOperand(i);
1422 if (MO.isReg() && MO.isDef()) {
1423 assert(MO.isImplicit() && MO.isDead() && MO.getReg().isPhysical());
1424 NewMIImplDefs.push_back(MO.getReg().asMCReg());
1425 }
1426 }
1427
1428 if (DstReg.isVirtual()) {
1429 unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1430
1431 if (DefRC != nullptr) {
1432 if (NewIdx)
1433 NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1434 else
1435 NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1436 assert(NewRC && "subreg chosen for remat incompatible with instruction");
1437 }
1438 // Remap subranges to new lanemask and change register class.
1439 LiveInterval &DstInt = LIS->getInterval(DstReg);
1440 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1441 SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1442 }
1443 MRI->setRegClass(DstReg, NewRC);
1444
1445 // Update machine operands and add flags.
1446 updateRegDefsUses(DstReg, DstReg, DstIdx);
1447 NewMI.getOperand(0).setSubReg(NewIdx);
1448 // updateRegDefUses can add an "undef" flag to the definition, since
1449 // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1450 // sure that "undef" is not set.
1451 if (NewIdx == 0)
1452 NewMI.getOperand(0).setIsUndef(false);
1453 // Add dead subregister definitions if we are defining the whole register
1454 // but only part of it is live.
1455 // This could happen if the rematerialization instruction is rematerializing
1456 // more than actually is used in the register.
1457 // An example would be:
1458 // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1459 // ; Copying only part of the register here, but the rest is undef.
1460 // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1461 // ==>
1462 // ; Materialize all the constants but only using one
1463 // %2 = LOAD_CONSTANTS 5, 8
1464 //
1465 // at this point for the part that wasn't defined before we could have
1466 // subranges missing the definition.
1467 if (NewIdx == 0 && DstInt.hasSubRanges()) {
1468 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1469 SlotIndex DefIndex =
1470 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1471 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1473 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1474 if (!SR.liveAt(DefIndex))
1475 SR.createDeadDef(DefIndex, Alloc);
1476 MaxMask &= ~SR.LaneMask;
1477 }
1478 if (MaxMask.any()) {
1479 LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1480 SR->createDeadDef(DefIndex, Alloc);
1481 }
1482 }
1483
1484 // Make sure that the subrange for resultant undef is removed
1485 // For example:
1486 // %1:sub1<def,read-undef> = LOAD CONSTANT 1
1487 // %2 = COPY %1
1488 // ==>
1489 // %2:sub1<def, read-undef> = LOAD CONSTANT 1
1490 // ; Correct but need to remove the subrange for %2:sub0
1491 // ; as it is now undef
1492 if (NewIdx != 0 && DstInt.hasSubRanges()) {
1493 // The affected subregister segments can be removed.
1494 SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1495 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1496 bool UpdatedSubRanges = false;
1497 SlotIndex DefIndex =
1498 CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1500 for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1501 if ((SR.LaneMask & DstMask).none()) {
1503 << "Removing undefined SubRange "
1504 << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1505 // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1506 if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1507 SR.removeValNo(RmValNo);
1508 UpdatedSubRanges = true;
1509 }
1510 } else {
1511 // We know that this lane is defined by this instruction,
1512 // but at this point it may be empty because it is not used by
1513 // anything. This happens when updateRegDefUses adds the missing
1514 // lanes. Assign that lane a dead def so that the interferences
1515 // are properly modeled.
1516 if (SR.empty())
1517 SR.createDeadDef(DefIndex, Alloc);
1518 }
1519 }
1520 if (UpdatedSubRanges)
1521 DstInt.removeEmptySubRanges();
1522 }
1523 } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1524 // The New instruction may be defining a sub-register of what's actually
1525 // been asked for. If so it must implicitly define the whole thing.
1526 assert(DstReg.isPhysical() &&
1527 "Only expect virtual or physical registers in remat");
1528 NewMI.getOperand(0).setIsDead(true);
1530 CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1531 // Record small dead def live-ranges for all the subregisters
1532 // of the destination register.
1533 // Otherwise, variables that live through may miss some
1534 // interferences, thus creating invalid allocation.
1535 // E.g., i386 code:
1536 // %1 = somedef ; %1 GR8
1537 // %2 = remat ; %2 GR32
1538 // CL = COPY %2.sub_8bit
1539 // = somedef %1 ; %1 GR8
1540 // =>
1541 // %1 = somedef ; %1 GR8
1542 // dead ECX = remat ; implicit-def CL
1543 // = somedef %1 ; %1 GR8
1544 // %1 will see the interferences with CL but not with CH since
1545 // no live-ranges would have been created for ECX.
1546 // Fix that!
1547 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1548 for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1549 Units.isValid(); ++Units)
1550 if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1551 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1552 }
1553
1554 if (NewMI.getOperand(0).getSubReg())
1555 NewMI.getOperand(0).setIsUndef();
1556
1557 // Transfer over implicit operands to the rematerialized instruction.
1558 for (MachineOperand &MO : ImplicitOps)
1559 NewMI.addOperand(MO);
1560
1561 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1562 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1563 MCRegister Reg = NewMIImplDefs[i];
1564 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1565 if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1566 LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1567 }
1568
1569 LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1570 ++NumReMats;
1571
1572 // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1573 // to describe DstReg instead.
1574 if (MRI->use_nodbg_empty(SrcReg)) {
1575 for (MachineOperand &UseMO :
1576 llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1577 MachineInstr *UseMI = UseMO.getParent();
1578 if (UseMI->isDebugInstr()) {
1579 if (DstReg.isPhysical())
1580 UseMO.substPhysReg(DstReg, *TRI);
1581 else
1582 UseMO.setReg(DstReg);
1583 // Move the debug value directly after the def of the rematerialized
1584 // value in DstReg.
1585 MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1586 LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1587 }
1588 }
1589 }
1590
1591 if (ToBeUpdated.count(SrcReg))
1592 return true;
1593
1594 unsigned NumCopyUses = 0;
1595 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1596 if (UseMO.getParent()->isCopyLike())
1597 NumCopyUses++;
1598 }
1599 if (NumCopyUses < LateRematUpdateThreshold) {
1600 // The source interval can become smaller because we removed a use.
1601 shrinkToUses(&SrcInt, &DeadDefs);
1602 if (!DeadDefs.empty())
1603 eliminateDeadDefs(&Edit);
1604 } else {
1605 ToBeUpdated.insert(SrcReg);
1606 }
1607 return true;
1608}
1609
1610MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1611 // ProcessImplicitDefs may leave some copies of <undef> values, it only
1612 // removes local variables. When we have a copy like:
1613 //
1614 // %1 = COPY undef %2
1615 //
1616 // We delete the copy and remove the corresponding value number from %1.
1617 // Any uses of that value number are marked as <undef>.
1618
1619 // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1620 // CoalescerPair may have a new register class with adjusted subreg indices
1621 // at this point.
1622 Register SrcReg, DstReg;
1623 unsigned SrcSubIdx = 0, DstSubIdx = 0;
1624 if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1625 return nullptr;
1626
1627 SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1628 const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1629 // CopyMI is undef iff SrcReg is not live before the instruction.
1630 if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1631 LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1632 for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1633 if ((SR.LaneMask & SrcMask).none())
1634 continue;
1635 if (SR.liveAt(Idx))
1636 return nullptr;
1637 }
1638 } else if (SrcLI.liveAt(Idx))
1639 return nullptr;
1640
1641 // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1642 // then replace it with an IMPLICIT_DEF.
1643 LiveInterval &DstLI = LIS->getInterval(DstReg);
1644 SlotIndex RegIndex = Idx.getRegSlot();
1645 LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1646 assert(Seg != nullptr && "No segment for defining instruction");
1647 VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1648
1649 // The source interval may also have been on an undef use, in which case the
1650 // copy introduced a live value.
1651 if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1652 CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1653 for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1654 MachineOperand &MO = CopyMI->getOperand(i-1);
1655 if (MO.isReg() && MO.isUse())
1656 CopyMI->removeOperand(i-1);
1657 }
1658 LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1659 "implicit def\n");
1660 return CopyMI;
1661 }
1662
1663 // Remove any DstReg segments starting at the instruction.
1664 LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1665
1666 // Remove value or merge with previous one in case of a subregister def.
1667 if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1668 VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1669 DstLI.MergeValueNumberInto(VNI, PrevVNI);
1670
1671 // The affected subregister segments can be removed.
1672 LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1673 for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1674 if ((SR.LaneMask & DstMask).none())
1675 continue;
1676
1677 VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1678 assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1679 SR.removeValNo(SVNI);
1680 }
1681 DstLI.removeEmptySubRanges();
1682 } else
1683 LIS->removeVRegDefAt(DstLI, RegIndex);
1684
1685 // Mark uses as undef.
1686 for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1687 if (MO.isDef() /*|| MO.isUndef()*/)
1688 continue;
1689 const MachineInstr &MI = *MO.getParent();
1690 SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1691 LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1692 bool isLive;
1693 if (!UseMask.all() && DstLI.hasSubRanges()) {
1694 isLive = false;
1695 for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1696 if ((SR.LaneMask & UseMask).none())
1697 continue;
1698 if (SR.liveAt(UseIdx)) {
1699 isLive = true;
1700 break;
1701 }
1702 }
1703 } else
1704 isLive = DstLI.liveAt(UseIdx);
1705 if (isLive)
1706 continue;
1707 MO.setIsUndef(true);
1708 LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1709 }
1710
1711 // A def of a subregister may be a use of the other subregisters, so
1712 // deleting a def of a subregister may also remove uses. Since CopyMI
1713 // is still part of the function (but about to be erased), mark all
1714 // defs of DstReg in it as <undef>, so that shrinkToUses would
1715 // ignore them.
1716 for (MachineOperand &MO : CopyMI->operands())
1717 if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
1718 MO.setIsUndef(true);
1719 LIS->shrinkToUses(&DstLI);
1720
1721 return CopyMI;
1722}
1723
1724void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1725 MachineOperand &MO, unsigned SubRegIdx) {
1726 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1727 if (MO.isDef())
1728 Mask = ~Mask;
1729 bool IsUndef = true;
1730 for (const LiveInterval::SubRange &S : Int.subranges()) {
1731 if ((S.LaneMask & Mask).none())
1732 continue;
1733 if (S.liveAt(UseIdx)) {
1734 IsUndef = false;
1735 break;
1736 }
1737 }
1738 if (IsUndef) {
1739 MO.setIsUndef(true);
1740 // We found out some subregister use is actually reading an undefined
1741 // value. In some cases the whole vreg has become undefined at this
1742 // point so we have to potentially shrink the main range if the
1743 // use was ending a live segment there.
1744 LiveQueryResult Q = Int.Query(UseIdx);
1745 if (Q.valueOut() == nullptr)
1746 ShrinkMainRange = true;
1747 }
1748}
1749
1750void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1751 unsigned SubIdx) {
1752 bool DstIsPhys = DstReg.isPhysical();
1753 LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1754
1755 if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1756 for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1757 unsigned SubReg = MO.getSubReg();
1758 if (SubReg == 0 || MO.isUndef())
1759 continue;
1760 MachineInstr &MI = *MO.getParent();
1761 if (MI.isDebugInstr())
1762 continue;
1763 SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1764 addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1765 }
1766 }
1767
1770 I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1771 I != E; ) {
1772 MachineInstr *UseMI = &*(I++);
1773
1774 // Each instruction can only be rewritten once because sub-register
1775 // composition is not always idempotent. When SrcReg != DstReg, rewriting
1776 // the UseMI operands removes them from the SrcReg use-def chain, but when
1777 // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1778 // operands mentioning the virtual register.
1779 if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1780 continue;
1781
1783 bool Reads, Writes;
1784 std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1785
1786 // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1787 // because SrcReg is a sub-register.
1788 if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1789 Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1790
1791 // Replace SrcReg with DstReg in all UseMI operands.
1792 for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1793 MachineOperand &MO = UseMI->getOperand(Ops[i]);
1794
1795 // Adjust <undef> flags in case of sub-register joins. We don't want to
1796 // turn a full def into a read-modify-write sub-register def and vice
1797 // versa.
1798 if (SubIdx && MO.isDef())
1799 MO.setIsUndef(!Reads);
1800
1801 // A subreg use of a partially undef (super) register may be a complete
1802 // undef use now and then has to be marked that way.
1803 if (MO.isUse() && !DstIsPhys) {
1804 unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1805 if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1806 if (!DstInt->hasSubRanges()) {
1808 LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1809 LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1810 LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1811 DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1812 // The unused lanes are just empty live-ranges at this point.
1813 // It is the caller responsibility to set the proper
1814 // dead segments if there is an actual dead def of the
1815 // unused lanes. This may happen with rematerialization.
1816 DstInt->createSubRange(Allocator, UnusedLanes);
1817 }
1818 SlotIndex MIIdx = UseMI->isDebugInstr()
1820 : LIS->getInstructionIndex(*UseMI);
1821 SlotIndex UseIdx = MIIdx.getRegSlot(true);
1822 addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1823 }
1824 }
1825
1826 if (DstIsPhys)
1827 MO.substPhysReg(DstReg, *TRI);
1828 else
1829 MO.substVirtReg(DstReg, SubIdx, *TRI);
1830 }
1831
1832 LLVM_DEBUG({
1833 dbgs() << "\t\tupdated: ";
1834 if (!UseMI->isDebugInstr())
1835 dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1836 dbgs() << *UseMI;
1837 });
1838 }
1839}
1840
1841bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1842 // Always join simple intervals that are defined by a single copy from a
1843 // reserved register. This doesn't increase register pressure, so it is
1844 // always beneficial.
1845 if (!MRI->isReserved(CP.getDstReg())) {
1846 LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1847 return false;
1848 }
1849
1850 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1851 if (JoinVInt.containsOneValue())
1852 return true;
1853
1854 LLVM_DEBUG(
1855 dbgs() << "\tCannot join complex intervals into reserved register.\n");
1856 return false;
1857}
1858
1859bool RegisterCoalescer::copyValueUndefInPredecessors(
1861 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1862 SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1863 if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1864 // If this is a self loop, we may be reading the same value.
1865 if (V->id != SLRQ.valueOutOrDead()->id)
1866 return false;
1867 }
1868 }
1869
1870 return true;
1871}
1872
1873void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1874 Register Reg,
1875 LaneBitmask PrunedLanes) {
1876 // If we had other instructions in the segment reading the undef sublane
1877 // value, we need to mark them with undef.
1878 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1879 unsigned SubRegIdx = MO.getSubReg();
1880 if (SubRegIdx == 0 || MO.isUndef())
1881 continue;
1882
1883 LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1884 SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
1885 for (LiveInterval::SubRange &S : LI.subranges()) {
1886 if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
1887 MO.setIsUndef();
1888 break;
1889 }
1890 }
1891 }
1892
1894
1895 // A def of a subregister may be a use of other register lanes. Replacing
1896 // such a def with a def of a different register will eliminate the use,
1897 // and may cause the recorded live range to be larger than the actual
1898 // liveness in the program IR.
1899 LIS->shrinkToUses(&LI);
1900}
1901
1902bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1903 Again = false;
1904 LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1905
1907 if (!CP.setRegisters(CopyMI)) {
1908 LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1909 return false;
1910 }
1911
1912 if (CP.getNewRC()) {
1913 auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1914 auto DstRC = MRI->getRegClass(CP.getDstReg());
1915 unsigned SrcIdx = CP.getSrcIdx();
1916 unsigned DstIdx = CP.getDstIdx();
1917 if (CP.isFlipped()) {
1918 std::swap(SrcIdx, DstIdx);
1919 std::swap(SrcRC, DstRC);
1920 }
1921 if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1922 CP.getNewRC(), *LIS)) {
1923 LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1924 return false;
1925 }
1926 }
1927
1928 // Dead code elimination. This really should be handled by MachineDCE, but
1929 // sometimes dead copies slip through, and we can't generate invalid live
1930 // ranges.
1931 if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1932 LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1933 DeadDefs.push_back(CopyMI);
1934 eliminateDeadDefs();
1935 return true;
1936 }
1937
1938 // Eliminate undefs.
1939 if (!CP.isPhys()) {
1940 // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
1941 if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
1942 if (UndefMI->isImplicitDef())
1943 return false;
1944 deleteInstr(CopyMI);
1945 return false; // Not coalescable.
1946 }
1947 }
1948
1949 // Coalesced copies are normally removed immediately, but transformations
1950 // like removeCopyByCommutingDef() can inadvertently create identity copies.
1951 // When that happens, just join the values and remove the copy.
1952 if (CP.getSrcReg() == CP.getDstReg()) {
1953 LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1954 LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1955 const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1956 LiveQueryResult LRQ = LI.Query(CopyIdx);
1957 if (VNInfo *DefVNI = LRQ.valueDefined()) {
1958 VNInfo *ReadVNI = LRQ.valueIn();
1959 assert(ReadVNI && "No value before copy and no <undef> flag.");
1960 assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1961
1962 // Track incoming undef lanes we need to eliminate from the subrange.
1963 LaneBitmask PrunedLanes;
1964 MachineBasicBlock *MBB = CopyMI->getParent();
1965
1966 // Process subregister liveranges.
1967 for (LiveInterval::SubRange &S : LI.subranges()) {
1968 LiveQueryResult SLRQ = S.Query(CopyIdx);
1969 if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1970 if (VNInfo *SReadVNI = SLRQ.valueIn())
1971 SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
1972
1973 // If this copy introduced an undef subrange from an incoming value,
1974 // we need to eliminate the undef live in values from the subrange.
1975 if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
1976 LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
1977 PrunedLanes |= S.LaneMask;
1978 S.removeValNo(SDefVNI);
1979 }
1980 }
1981 }
1982
1983 LI.MergeValueNumberInto(DefVNI, ReadVNI);
1984 if (PrunedLanes.any()) {
1985 LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
1986 << PrunedLanes << '\n');
1987 setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
1988 }
1989
1990 LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
1991 }
1992 deleteInstr(CopyMI);
1993 return true;
1994 }
1995
1996 // Enforce policies.
1997 if (CP.isPhys()) {
1998 LLVM_DEBUG(dbgs() << "\tConsidering merging "
1999 << printReg(CP.getSrcReg(), TRI) << " with "
2000 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
2001 if (!canJoinPhys(CP)) {
2002 // Before giving up coalescing, if definition of source is defined by
2003 // trivial computation, try rematerializing it.
2004 bool IsDefCopy = false;
2005 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2006 return true;
2007 if (IsDefCopy)
2008 Again = true; // May be possible to coalesce later.
2009 return false;
2010 }
2011 } else {
2012 // When possible, let DstReg be the larger interval.
2013 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2014 LIS->getInterval(CP.getDstReg()).size())
2015 CP.flip();
2016
2017 LLVM_DEBUG({
2018 dbgs() << "\tConsidering merging to "
2019 << TRI->getRegClassName(CP.getNewRC()) << " with ";
2020 if (CP.getDstIdx() && CP.getSrcIdx())
2021 dbgs() << printReg(CP.getDstReg()) << " in "
2022 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2023 << printReg(CP.getSrcReg()) << " in "
2024 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2025 else
2026 dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2027 << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2028 });
2029 }
2030
2031 ShrinkMask = LaneBitmask::getNone();
2032 ShrinkMainRange = false;
2033
2034 // Okay, attempt to join these two intervals. On failure, this returns false.
2035 // Otherwise, if one of the intervals being joined is a physreg, this method
2036 // always canonicalizes DstInt to be it. The output "SrcInt" will not have
2037 // been modified, so we can use this information below to update aliases.
2038 if (!joinIntervals(CP)) {
2039 // Coalescing failed.
2040
2041 // If definition of source is defined by trivial computation, try
2042 // rematerializing it.
2043 bool IsDefCopy = false;
2044 if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2045 return true;
2046
2047 // If we can eliminate the copy without merging the live segments, do so
2048 // now.
2049 if (!CP.isPartial() && !CP.isPhys()) {
2050 bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2051 bool Shrink = false;
2052 if (!Changed)
2053 std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2054 if (Changed) {
2055 deleteInstr(CopyMI);
2056 if (Shrink) {
2057 Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2058 LiveInterval &DstLI = LIS->getInterval(DstReg);
2059 shrinkToUses(&DstLI);
2060 LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
2061 }
2062 LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2063 return true;
2064 }
2065 }
2066
2067 // Try and see if we can partially eliminate the copy by moving the copy to
2068 // its predecessor.
2069 if (!CP.isPartial() && !CP.isPhys())
2070 if (removePartialRedundancy(CP, *CopyMI))
2071 return true;
2072
2073 // Otherwise, we are unable to join the intervals.
2074 LLVM_DEBUG(dbgs() << "\tInterference!\n");
2075 Again = true; // May be possible to coalesce later.
2076 return false;
2077 }
2078
2079 // Coalescing to a virtual register that is of a sub-register class of the
2080 // other. Make sure the resulting register is set to the right register class.
2081 if (CP.isCrossClass()) {
2082 ++numCrossRCs;
2083 MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2084 }
2085
2086 // Removing sub-register copies can ease the register class constraints.
2087 // Make sure we attempt to inflate the register class of DstReg.
2088 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2089 InflateRegs.push_back(CP.getDstReg());
2090
2091 // CopyMI has been erased by joinIntervals at this point. Remove it from
2092 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2093 // to the work list. This keeps ErasedInstrs from growing needlessly.
2094 ErasedInstrs.erase(CopyMI);
2095
2096 // Rewrite all SrcReg operands to DstReg.
2097 // Also update DstReg operands to include DstIdx if it is set.
2098 if (CP.getDstIdx())
2099 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2100 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2101
2102 // Shrink subregister ranges if necessary.
2103 if (ShrinkMask.any()) {
2104 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2105 for (LiveInterval::SubRange &S : LI.subranges()) {
2106 if ((S.LaneMask & ShrinkMask).none())
2107 continue;
2108 LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2109 << ")\n");
2110 LIS->shrinkToUses(S, LI.reg());
2111 ShrinkMainRange = true;
2112 }
2114 }
2115
2116 // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2117 // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2118 // is not up-to-date, need to update the merged live interval here.
2119 if (ToBeUpdated.count(CP.getSrcReg()))
2120 ShrinkMainRange = true;
2121
2122 if (ShrinkMainRange) {
2123 LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2124 shrinkToUses(&LI);
2125 }
2126
2127 // SrcReg is guaranteed to be the register whose live interval that is
2128 // being merged.
2129 LIS->removeInterval(CP.getSrcReg());
2130
2131 // Update regalloc hint.
2132 TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2133
2134 LLVM_DEBUG({
2135 dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2136 << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2137 dbgs() << "\tResult = ";
2138 if (CP.isPhys())
2139 dbgs() << printReg(CP.getDstReg(), TRI);
2140 else
2141 dbgs() << LIS->getInterval(CP.getDstReg());
2142 dbgs() << '\n';
2143 });
2144
2145 ++numJoins;
2146 return true;
2147}
2148
2149bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2150 Register DstReg = CP.getDstReg();
2151 Register SrcReg = CP.getSrcReg();
2152 assert(CP.isPhys() && "Must be a physreg copy");
2153 assert(MRI->isReserved(DstReg) && "Not a reserved register");
2154 LiveInterval &RHS = LIS->getInterval(SrcReg);
2155 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2156
2157 assert(RHS.containsOneValue() && "Invalid join with reserved register");
2158
2159 // Optimization for reserved registers like ESP. We can only merge with a
2160 // reserved physreg if RHS has a single value that is a copy of DstReg.
2161 // The live range of the reserved register will look like a set of dead defs
2162 // - we don't properly track the live range of reserved registers.
2163
2164 // Deny any overlapping intervals. This depends on all the reserved
2165 // register live ranges to look like dead defs.
2166 if (!MRI->isConstantPhysReg(DstReg)) {
2167 for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
2168 // Abort if not all the regunits are reserved.
2169 for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
2170 if (!MRI->isReserved(*RI))
2171 return false;
2172 }
2173 if (RHS.overlaps(LIS->getRegUnit(*UI))) {
2174 LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
2175 << '\n');
2176 return false;
2177 }
2178 }
2179
2180 // We must also check for overlaps with regmask clobbers.
2181 BitVector RegMaskUsable;
2182 if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2183 !RegMaskUsable.test(DstReg)) {
2184 LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2185 return false;
2186 }
2187 }
2188
2189 // Skip any value computations, we are not adding new values to the
2190 // reserved register. Also skip merging the live ranges, the reserved
2191 // register live range doesn't need to be accurate as long as all the
2192 // defs are there.
2193
2194 // Delete the identity copy.
2195 MachineInstr *CopyMI;
2196 if (CP.isFlipped()) {
2197 // Physreg is copied into vreg
2198 // %y = COPY %physreg_x
2199 // ... //< no other def of %physreg_x here
2200 // use %y
2201 // =>
2202 // ...
2203 // use %physreg_x
2204 CopyMI = MRI->getVRegDef(SrcReg);
2205 } else {
2206 // VReg is copied into physreg:
2207 // %y = def
2208 // ... //< no other def or use of %physreg_x here
2209 // %physreg_x = COPY %y
2210 // =>
2211 // %physreg_x = def
2212 // ...
2213 if (!MRI->hasOneNonDBGUse(SrcReg)) {
2214 LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2215 return false;
2216 }
2217
2218 if (!LIS->intervalIsInOneMBB(RHS)) {
2219 LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2220 return false;
2221 }
2222
2223 MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2224 CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2225 SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2226 SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2227
2228 if (!MRI->isConstantPhysReg(DstReg)) {
2229 // We checked above that there are no interfering defs of the physical
2230 // register. However, for this case, where we intend to move up the def of
2231 // the physical register, we also need to check for interfering uses.
2232 SlotIndexes *Indexes = LIS->getSlotIndexes();
2233 for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2234 SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2236 if (MI->readsRegister(DstReg, TRI)) {
2237 LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2238 return false;
2239 }
2240 }
2241 }
2242
2243 // We're going to remove the copy which defines a physical reserved
2244 // register, so remove its valno, etc.
2245 LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2246 << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2247
2248 LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2249 // Create a new dead def at the new def location.
2250 for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
2251 LiveRange &LR = LIS->getRegUnit(*UI);
2252 LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2253 }
2254 }
2255
2256 deleteInstr(CopyMI);
2257
2258 // We don't track kills for reserved registers.
2259 MRI->clearKillFlags(CP.getSrcReg());
2260
2261 return true;
2262}
2263
2264//===----------------------------------------------------------------------===//
2265// Interference checking and interval joining
2266//===----------------------------------------------------------------------===//
2267//
2268// In the easiest case, the two live ranges being joined are disjoint, and
2269// there is no interference to consider. It is quite common, though, to have
2270// overlapping live ranges, and we need to check if the interference can be
2271// resolved.
2272//
2273// The live range of a single SSA value forms a sub-tree of the dominator tree.
2274// This means that two SSA values overlap if and only if the def of one value
2275// is contained in the live range of the other value. As a special case, the
2276// overlapping values can be defined at the same index.
2277//
2278// The interference from an overlapping def can be resolved in these cases:
2279//
2280// 1. Coalescable copies. The value is defined by a copy that would become an
2281// identity copy after joining SrcReg and DstReg. The copy instruction will
2282// be removed, and the value will be merged with the source value.
2283//
2284// There can be several copies back and forth, causing many values to be
2285// merged into one. We compute a list of ultimate values in the joined live
2286// range as well as a mappings from the old value numbers.
2287//
2288// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2289// predecessors have a live out value. It doesn't cause real interference,
2290// and can be merged into the value it overlaps. Like a coalescable copy, it
2291// can be erased after joining.
2292//
2293// 3. Copy of external value. The overlapping def may be a copy of a value that
2294// is already in the other register. This is like a coalescable copy, but
2295// the live range of the source register must be trimmed after erasing the
2296// copy instruction:
2297//
2298// %src = COPY %ext
2299// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
2300//
2301// 4. Clobbering undefined lanes. Vector registers are sometimes built by
2302// defining one lane at a time:
2303//
2304// %dst:ssub0<def,read-undef> = FOO
2305// %src = BAR
2306// %dst:ssub1 = COPY %src
2307//
2308// The live range of %src overlaps the %dst value defined by FOO, but
2309// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2310// which was undef anyway.
2311//
2312// The value mapping is more complicated in this case. The final live range
2313// will have different value numbers for both FOO and BAR, but there is no
2314// simple mapping from old to new values. It may even be necessary to add
2315// new PHI values.
2316//
2317// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2318// is live, but never read. This can happen because we don't compute
2319// individual live ranges per lane.
2320//
2321// %dst = FOO
2322// %src = BAR
2323// %dst:ssub1 = COPY %src
2324//
2325// This kind of interference is only resolved locally. If the clobbered
2326// lane value escapes the block, the join is aborted.
2327
2328namespace {
2329
2330/// Track information about values in a single virtual register about to be
2331/// joined. Objects of this class are always created in pairs - one for each
2332/// side of the CoalescerPair (or one for each lane of a side of the coalescer
2333/// pair)
2334class JoinVals {
2335 /// Live range we work on.
2336 LiveRange &LR;
2337
2338 /// (Main) register we work on.
2339 const Register Reg;
2340
2341 /// Reg (and therefore the values in this liverange) will end up as
2342 /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2343 /// CP.SrcIdx.
2344 const unsigned SubIdx;
2345
2346 /// The LaneMask that this liverange will occupy the coalesced register. May
2347 /// be smaller than the lanemask produced by SubIdx when merging subranges.
2348 const LaneBitmask LaneMask;
2349
2350 /// This is true when joining sub register ranges, false when joining main
2351 /// ranges.
2352 const bool SubRangeJoin;
2353
2354 /// Whether the current LiveInterval tracks subregister liveness.
2355 const bool TrackSubRegLiveness;
2356
2357 /// Values that will be present in the final live range.
2358 SmallVectorImpl<VNInfo*> &NewVNInfo;
2359
2360 const CoalescerPair &CP;
2361 LiveIntervals *LIS;
2362 SlotIndexes *Indexes;
2363 const TargetRegisterInfo *TRI;
2364
2365 /// Value number assignments. Maps value numbers in LI to entries in
2366 /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2367 SmallVector<int, 8> Assignments;
2368
2369 public:
2370 /// Conflict resolution for overlapping values.
2371 enum ConflictResolution {
2372 /// No overlap, simply keep this value.
2373 CR_Keep,
2374
2375 /// Merge this value into OtherVNI and erase the defining instruction.
2376 /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2377 /// values.
2378 CR_Erase,
2379
2380 /// Merge this value into OtherVNI but keep the defining instruction.
2381 /// This is for the special case where OtherVNI is defined by the same
2382 /// instruction.
2383 CR_Merge,
2384
2385 /// Keep this value, and have it replace OtherVNI where possible. This
2386 /// complicates value mapping since OtherVNI maps to two different values
2387 /// before and after this def.
2388 /// Used when clobbering undefined or dead lanes.
2389 CR_Replace,
2390
2391 /// Unresolved conflict. Visit later when all values have been mapped.
2392 CR_Unresolved,
2393
2394 /// Unresolvable conflict. Abort the join.
2395 CR_Impossible
2396 };
2397
2398 private:
2399 /// Per-value info for LI. The lane bit masks are all relative to the final
2400 /// joined register, so they can be compared directly between SrcReg and
2401 /// DstReg.
2402 struct Val {
2403 ConflictResolution Resolution = CR_Keep;
2404
2405 /// Lanes written by this def, 0 for unanalyzed values.
2406 LaneBitmask WriteLanes;
2407
2408 /// Lanes with defined values in this register. Other lanes are undef and
2409 /// safe to clobber.
2410 LaneBitmask ValidLanes;
2411
2412 /// Value in LI being redefined by this def.
2413 VNInfo *RedefVNI = nullptr;
2414
2415 /// Value in the other live range that overlaps this def, if any.
2416 VNInfo *OtherVNI = nullptr;
2417
2418 /// Is this value an IMPLICIT_DEF that can be erased?
2419 ///
2420 /// IMPLICIT_DEF values should only exist at the end of a basic block that
2421 /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2422 /// safely erased if they are overlapping a live value in the other live
2423 /// interval.
2424 ///
2425 /// Weird control flow graphs and incomplete PHI handling in
2426 /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2427 /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2428 /// normal values.
2429 bool ErasableImplicitDef = false;
2430
2431 /// True when the live range of this value will be pruned because of an
2432 /// overlapping CR_Replace value in the other live range.
2433 bool Pruned = false;
2434
2435 /// True once Pruned above has been computed.
2436 bool PrunedComputed = false;
2437
2438 /// True if this value is determined to be identical to OtherVNI
2439 /// (in valuesIdentical). This is used with CR_Erase where the erased
2440 /// copy is redundant, i.e. the source value is already the same as
2441 /// the destination. In such cases the subranges need to be updated
2442 /// properly. See comment at pruneSubRegValues for more info.
2443 bool Identical = false;
2444
2445 Val() = default;
2446
2447 bool isAnalyzed() const { return WriteLanes.any(); }
2448 };
2449
2450 /// One entry per value number in LI.
2452
2453 /// Compute the bitmask of lanes actually written by DefMI.
2454 /// Set Redef if there are any partial register definitions that depend on the
2455 /// previous value of the register.
2456 LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2457
2458 /// Find the ultimate value that VNI was copied from.
2459 std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2460
2461 bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2462
2463 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2464 /// Return a conflict resolution when possible, but leave the hard cases as
2465 /// CR_Unresolved.
2466 /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2467 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2468 /// The recursion always goes upwards in the dominator tree, making loops
2469 /// impossible.
2470 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2471
2472 /// Compute the value assignment for ValNo in RI.
2473 /// This may be called recursively by analyzeValue(), but never for a ValNo on
2474 /// the stack.
2475 void computeAssignment(unsigned ValNo, JoinVals &Other);
2476
2477 /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2478 /// the extent of the tainted lanes in the block.
2479 ///
2480 /// Multiple values in Other.LR can be affected since partial redefinitions
2481 /// can preserve previously tainted lanes.
2482 ///
2483 /// 1 %dst = VLOAD <-- Define all lanes in %dst
2484 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
2485 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
2486 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2487 ///
2488 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2489 /// entry to TaintedVals.
2490 ///
2491 /// Returns false if the tainted lanes extend beyond the basic block.
2492 bool
2493 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2494 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2495
2496 /// Return true if MI uses any of the given Lanes from Reg.
2497 /// This does not include partial redefinitions of Reg.
2498 bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2499
2500 /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2501 /// be pruned:
2502 ///
2503 /// %dst = COPY %src
2504 /// %src = COPY %dst <-- This value to be pruned.
2505 /// %dst = COPY %src <-- This value is a copy of a pruned value.
2506 bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2507
2508public:
2509 JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2510 SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2511 LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2512 bool TrackSubRegLiveness)
2513 : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2514 SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2515 NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2516 TRI(TRI), Assignments(LR.getNumValNums(), -1),
2517 Vals(LR.getNumValNums()) {}
2518
2519 /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2520 /// Returns false if any conflicts were impossible to resolve.
2521 bool mapValues(JoinVals &Other);
2522
2523 /// Try to resolve conflicts that require all values to be mapped.
2524 /// Returns false if any conflicts were impossible to resolve.
2525 bool resolveConflicts(JoinVals &Other);
2526
2527 /// Prune the live range of values in Other.LR where they would conflict with
2528 /// CR_Replace values in LR. Collect end points for restoring the live range
2529 /// after joining.
2530 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2531 bool changeInstrs);
2532
2533 /// Removes subranges starting at copies that get removed. This sometimes
2534 /// happens when undefined subranges are copied around. These ranges contain
2535 /// no useful information and can be removed.
2536 void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2537
2538 /// Pruning values in subranges can lead to removing segments in these
2539 /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2540 /// the main range also need to be removed. This function will mark
2541 /// the corresponding values in the main range as pruned, so that
2542 /// eraseInstrs can do the final cleanup.
2543 /// The parameter @p LI must be the interval whose main range is the
2544 /// live range LR.
2545 void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2546
2547 /// Erase any machine instructions that have been coalesced away.
2548 /// Add erased instructions to ErasedInstrs.
2549 /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2550 /// the erased instrs.
2552 SmallVectorImpl<Register> &ShrinkRegs,
2553 LiveInterval *LI = nullptr);
2554
2555 /// Remove liverange defs at places where implicit defs will be removed.
2556 void removeImplicitDefs();
2557
2558 /// Get the value assignments suitable for passing to LiveInterval::join.
2559 const int *getAssignments() const { return Assignments.data(); }
2560
2561 /// Get the conflict resolution for a value number.
2562 ConflictResolution getResolution(unsigned Num) const {
2563 return Vals[Num].Resolution;
2564 }
2565};
2566
2567} // end anonymous namespace
2568
2569LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2570 const {
2571 LaneBitmask L;
2572 for (const MachineOperand &MO : DefMI->operands()) {
2573 if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
2574 continue;
2575 L |= TRI->getSubRegIndexLaneMask(
2576 TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2577 if (MO.readsReg())
2578 Redef = true;
2579 }
2580 return L;
2581}
2582
2583std::pair<const VNInfo *, Register>
2584JoinVals::followCopyChain(const VNInfo *VNI) const {
2585 Register TrackReg = Reg;
2586
2587 while (!VNI->isPHIDef()) {
2588 SlotIndex Def = VNI->def;
2589 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2590 assert(MI && "No defining instruction");
2591 if (!MI->isFullCopy())
2592 return std::make_pair(VNI, TrackReg);
2593 Register SrcReg = MI->getOperand(1).getReg();
2594 if (!SrcReg.isVirtual())
2595 return std::make_pair(VNI, TrackReg);
2596
2597 const LiveInterval &LI = LIS->getInterval(SrcReg);
2598 const VNInfo *ValueIn;
2599 // No subrange involved.
2600 if (!SubRangeJoin || !LI.hasSubRanges()) {
2601 LiveQueryResult LRQ = LI.Query(Def);
2602 ValueIn = LRQ.valueIn();
2603 } else {
2604 // Query subranges. Ensure that all matching ones take us to the same def
2605 // (allowing some of them to be undef).
2606 ValueIn = nullptr;
2607 for (const LiveInterval::SubRange &S : LI.subranges()) {
2608 // Transform lanemask to a mask in the joined live interval.
2609 LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2610 if ((SMask & LaneMask).none())
2611 continue;
2612 LiveQueryResult LRQ = S.Query(Def);
2613 if (!ValueIn) {
2614 ValueIn = LRQ.valueIn();
2615 continue;
2616 }
2617 if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2618 return std::make_pair(VNI, TrackReg);
2619 }
2620 }
2621 if (ValueIn == nullptr) {
2622 // Reaching an undefined value is legitimate, for example:
2623 //
2624 // 1 undef %0.sub1 = ... ;; %0.sub0 == undef
2625 // 2 %1 = COPY %0 ;; %1 is defined here.
2626 // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition,
2627 // ;; but it's equivalent to "undef".
2628 return std::make_pair(nullptr, SrcReg);
2629 }
2630 VNI = ValueIn;
2631 TrackReg = SrcReg;
2632 }
2633 return std::make_pair(VNI, TrackReg);
2634}
2635
2636bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2637 const JoinVals &Other) const {
2638 const VNInfo *Orig0;
2639 Register Reg0;
2640 std::tie(Orig0, Reg0) = followCopyChain(Value0);
2641 if (Orig0 == Value1 && Reg0 == Other.Reg)
2642 return true;
2643
2644 const VNInfo *Orig1;
2645 Register Reg1;
2646 std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2647 // If both values are undefined, and the source registers are the same
2648 // register, the values are identical. Filter out cases where only one
2649 // value is defined.
2650 if (Orig0 == nullptr || Orig1 == nullptr)
2651 return Orig0 == Orig1 && Reg0 == Reg1;
2652
2653 // The values are equal if they are defined at the same place and use the
2654 // same register. Note that we cannot compare VNInfos directly as some of
2655 // them might be from a copy created in mergeSubRangeInto() while the other
2656 // is from the original LiveInterval.
2657 return Orig0->def == Orig1->def && Reg0 == Reg1;
2658}
2659
2660JoinVals::ConflictResolution
2661JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2662 Val &V = Vals[ValNo];
2663 assert(!V.isAnalyzed() && "Value has already been analyzed!");
2664 VNInfo *VNI = LR.getValNumInfo(ValNo);
2665 if (VNI->isUnused()) {
2666 V.WriteLanes = LaneBitmask::getAll();
2667 return CR_Keep;
2668 }
2669
2670 // Get the instruction defining this value, compute the lanes written.
2671 const MachineInstr *DefMI = nullptr;
2672 if (VNI->isPHIDef()) {
2673 // Conservatively assume that all lanes in a PHI are valid.
2674 LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2675 : TRI->getSubRegIndexLaneMask(SubIdx);
2676 V.ValidLanes = V.WriteLanes = Lanes;
2677 } else {
2678 DefMI = Indexes->getInstructionFromIndex(VNI->def);
2679 assert(DefMI != nullptr);
2680 if (SubRangeJoin) {
2681 // We don't care about the lanes when joining subregister ranges.
2682 V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2683 if (DefMI->isImplicitDef()) {
2684 V.ValidLanes = LaneBitmask::getNone();
2685 V.ErasableImplicitDef = true;
2686 }
2687 } else {
2688 bool Redef = false;
2689 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2690
2691 // If this is a read-modify-write instruction, there may be more valid
2692 // lanes than the ones written by this instruction.
2693 // This only covers partial redef operands. DefMI may have normal use
2694 // operands reading the register. They don't contribute valid lanes.
2695 //
2696 // This adds ssub1 to the set of valid lanes in %src:
2697 //
2698 // %src:ssub1 = FOO
2699 //
2700 // This leaves only ssub1 valid, making any other lanes undef:
2701 //
2702 // %src:ssub1<def,read-undef> = FOO %src:ssub2
2703 //
2704 // The <read-undef> flag on the def operand means that old lane values are
2705 // not important.
2706 if (Redef) {
2707 V.RedefVNI = LR.Query(VNI->def).valueIn();
2708 assert((TrackSubRegLiveness || V.RedefVNI) &&
2709 "Instruction is reading nonexistent value");
2710 if (V.RedefVNI != nullptr) {
2711 computeAssignment(V.RedefVNI->id, Other);
2712 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2713 }
2714 }
2715
2716 // An IMPLICIT_DEF writes undef values.
2717 if (DefMI->isImplicitDef()) {
2718 // We normally expect IMPLICIT_DEF values to be live only until the end
2719 // of their block. If the value is really live longer and gets pruned in
2720 // another block, this flag is cleared again.
2721 //
2722 // Clearing the valid lanes is deferred until it is sure this can be
2723 // erased.
2724 V.ErasableImplicitDef = true;
2725 }
2726 }
2727 }
2728
2729 // Find the value in Other that overlaps VNI->def, if any.
2730 LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2731
2732 // It is possible that both values are defined by the same instruction, or
2733 // the values are PHIs defined in the same block. When that happens, the two
2734 // values should be merged into one, but not into any preceding value.
2735 // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2736 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2737 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2738
2739 // One value stays, the other is merged. Keep the earlier one, or the first
2740 // one we see.
2741 if (OtherVNI->def < VNI->def)
2742 Other.computeAssignment(OtherVNI->id, *this);
2743 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2744 // This is an early-clobber def overlapping a live-in value in the other
2745 // register. Not mergeable.
2746 V.OtherVNI = OtherLRQ.valueIn();
2747 return CR_Impossible;
2748 }
2749 V.OtherVNI = OtherVNI;
2750 Val &OtherV = Other.Vals[OtherVNI->id];
2751 // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2752 // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2753 // is assigned.
2754 if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2755 return CR_Keep;
2756 // Both sides have been analyzed now.
2757 // Allow overlapping PHI values. Any real interference would show up in a
2758 // predecessor, the PHI itself can't introduce any conflicts.
2759 if (VNI->isPHIDef())
2760 return CR_Merge;
2761 if ((V.ValidLanes & OtherV.ValidLanes).any())
2762 // Overlapping lanes can't be resolved.
2763 return CR_Impossible;
2764 else
2765 return CR_Merge;
2766 }
2767
2768 // No simultaneous def. Is Other live at the def?
2769 V.OtherVNI = OtherLRQ.valueIn();
2770 if (!V.OtherVNI)
2771 // No overlap, no conflict.
2772 return CR_Keep;
2773
2774 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2775
2776 // We have overlapping values, or possibly a kill of Other.
2777 // Recursively compute assignments up the dominator tree.
2778 Other.computeAssignment(V.OtherVNI->id, *this);
2779 Val &OtherV = Other.Vals[V.OtherVNI->id];
2780
2781 if (OtherV.ErasableImplicitDef) {
2782 // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2783 // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2784 // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2785 // technically.
2786 //
2787 // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2788 // to erase the IMPLICIT_DEF instruction.
2789 if (DefMI &&
2790 DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
2791 LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2792 << " extends into "
2794 << ", keeping it.\n");
2795 OtherV.ErasableImplicitDef = false;
2796 } else {
2797 // We deferred clearing these lanes in case we needed to save them
2798 OtherV.ValidLanes &= ~OtherV.WriteLanes;
2799 }
2800 }
2801
2802 // Allow overlapping PHI values. Any real interference would show up in a
2803 // predecessor, the PHI itself can't introduce any conflicts.
2804 if (VNI->isPHIDef())
2805 return CR_Replace;
2806
2807 // Check for simple erasable conflicts.
2808 if (DefMI->isImplicitDef())
2809 return CR_Erase;
2810
2811 // Include the non-conflict where DefMI is a coalescable copy that kills
2812 // OtherVNI. We still want the copy erased and value numbers merged.
2813 if (CP.isCoalescable(DefMI)) {
2814 // Some of the lanes copied from OtherVNI may be undef, making them undef
2815 // here too.
2816 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2817 return CR_Erase;
2818 }
2819
2820 // This may not be a real conflict if DefMI simply kills Other and defines
2821 // VNI.
2822 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2823 return CR_Keep;
2824
2825 // Handle the case where VNI and OtherVNI can be proven to be identical:
2826 //
2827 // %other = COPY %ext
2828 // %this = COPY %ext <-- Erase this copy
2829 //
2830 if (DefMI->isFullCopy() && !CP.isPartial() &&
2831 valuesIdentical(VNI, V.OtherVNI, Other)) {
2832 V.Identical = true;
2833 return CR_Erase;
2834 }
2835
2836 // The remaining checks apply to the lanes, which aren't tracked here. This
2837 // was already decided to be OK via the following CR_Replace condition.
2838 // CR_Replace.
2839 if (SubRangeJoin)
2840 return CR_Replace;
2841
2842 // If the lanes written by this instruction were all undef in OtherVNI, it is
2843 // still safe to join the live ranges. This can't be done with a simple value
2844 // mapping, though - OtherVNI will map to multiple values:
2845 //
2846 // 1 %dst:ssub0 = FOO <-- OtherVNI
2847 // 2 %src = BAR <-- VNI
2848 // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2849 // 4 BAZ killed %dst
2850 // 5 QUUX killed %src
2851 //
2852 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2853 // handles this complex value mapping.
2854 if ((V.WriteLanes & OtherV.ValidLanes).none())
2855 return CR_Replace;
2856
2857 // If the other live range is killed by DefMI and the live ranges are still
2858 // overlapping, it must be because we're looking at an early clobber def:
2859 //
2860 // %dst<def,early-clobber> = ASM killed %src
2861 //
2862 // In this case, it is illegal to merge the two live ranges since the early
2863 // clobber def would clobber %src before it was read.
2864 if (OtherLRQ.isKill()) {
2865 // This case where the def doesn't overlap the kill is handled above.
2866 assert(VNI->def.isEarlyClobber() &&
2867 "Only early clobber defs can overlap a kill");
2868 return CR_Impossible;
2869 }
2870
2871 // VNI is clobbering live lanes in OtherVNI, but there is still the
2872 // possibility that no instructions actually read the clobbered lanes.
2873 // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2874 // Otherwise Other.RI wouldn't be live here.
2875 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2876 return CR_Impossible;
2877
2878 if (TrackSubRegLiveness) {
2879 auto &OtherLI = LIS->getInterval(Other.Reg);
2880 // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
2881 // share the same live range, so we just need to check whether they have
2882 // any conflict bit in their LaneMask.
2883 if (!OtherLI.hasSubRanges()) {
2884 LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
2885 return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
2886 }
2887
2888 // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
2889 // impossible to resolve the conflict. Otherwise, we can just replace
2890 // OtherVNI because of no real conflict.
2891 for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
2892 LaneBitmask OtherMask =
2893 TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
2894 if ((OtherMask & V.WriteLanes).none())
2895 continue;
2896
2897 auto OtherSRQ = OtherSR.Query(VNI->def);
2898 if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
2899 // VNI is clobbering some lanes of OtherVNI, they have real conflict.
2900 return CR_Impossible;
2901 }
2902 }
2903
2904 // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
2905 return CR_Replace;
2906 }
2907
2908 // We need to verify that no instructions are reading the clobbered lanes.
2909 // To save compile time, we'll only check that locally. Don't allow the
2910 // tainted value to escape the basic block.
2911 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2912 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2913 return CR_Impossible;
2914
2915 // There are still some things that could go wrong besides clobbered lanes
2916 // being read, for example OtherVNI may be only partially redefined in MBB,
2917 // and some clobbered lanes could escape the block. Save this analysis for
2918 // resolveConflicts() when all values have been mapped. We need to know
2919 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2920 // that now - the recursive analyzeValue() calls must go upwards in the
2921 // dominator tree.
2922 return CR_Unresolved;
2923}
2924
2925void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2926 Val &V = Vals[ValNo];
2927 if (V.isAnalyzed()) {
2928 // Recursion should always move up the dominator tree, so ValNo is not
2929 // supposed to reappear before it has been assigned.
2930 assert(Assignments[ValNo] != -1 && "Bad recursion?");
2931 return;
2932 }
2933 switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2934 case CR_Erase:
2935 case CR_Merge:
2936 // Merge this ValNo into OtherVNI.
2937 assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2938 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2939 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2940 LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2941 << LR.getValNumInfo(ValNo)->def << " into "
2942 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2943 << V.OtherVNI->def << " --> @"
2944 << NewVNInfo[Assignments[ValNo]]->def << '\n');
2945 break;
2946 case CR_Replace:
2947 case CR_Unresolved: {
2948 // The other value is going to be pruned if this join is successful.
2949 assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2950 Val &OtherV = Other.Vals[V.OtherVNI->id];
2951 // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2952 // its lanes.
2953 if (OtherV.ErasableImplicitDef &&
2954 TrackSubRegLiveness &&
2955 (OtherV.WriteLanes & ~V.ValidLanes).any()) {
2956 LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
2957
2958 OtherV.ErasableImplicitDef = false;
2959 // The valid lanes written by the implicit_def were speculatively cleared
2960 // before, so make this more conservative. It may be better to track this,
2961 // I haven't found a testcase where it matters.
2962 OtherV.ValidLanes = LaneBitmask::getAll();
2963 }
2964
2965 OtherV.Pruned = true;
2966 [[fallthrough]];
2967 }
2968 default:
2969 // This value number needs to go in the final joined live range.
2970 Assignments[ValNo] = NewVNInfo.size();
2971 NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2972 break;
2973 }
2974}
2975
2976bool JoinVals::mapValues(JoinVals &Other) {
2977 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2978 computeAssignment(i, Other);
2979 if (Vals[i].Resolution == CR_Impossible) {
2980 LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2981 << '@' << LR.getValNumInfo(i)->def << '\n');
2982 return false;
2983 }
2984 }
2985 return true;
2986}
2987
2988bool JoinVals::
2989taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2990 SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2991 VNInfo *VNI = LR.getValNumInfo(ValNo);
2992 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2993 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2994
2995 // Scan Other.LR from VNI.def to MBBEnd.
2996 LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
2997 assert(OtherI != Other.LR.end() && "No conflict?");
2998 do {
2999 // OtherI is pointing to a tainted value. Abort the join if the tainted
3000 // lanes escape the block.
3001 SlotIndex End = OtherI->end;
3002 if (End >= MBBEnd) {
3003 LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3004 << OtherI->valno->id << '@' << OtherI->start << '\n');
3005 return false;
3006 }
3007 LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3008 << OtherI->valno->id << '@' << OtherI->start << " to "
3009 << End << '\n');
3010 // A dead def is not a problem.
3011 if (End.isDead())
3012 break;
3013 TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3014
3015 // Check for another def in the MBB.
3016 if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3017 break;
3018
3019 // Lanes written by the new def are no longer tainted.
3020 const Val &OV = Other.Vals[OtherI->valno->id];
3021 TaintedLanes &= ~OV.WriteLanes;
3022 if (!OV.RedefVNI)
3023 break;
3024 } while (TaintedLanes.any());
3025 return true;
3026}
3027
3028bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3029 LaneBitmask Lanes) const {
3030 if (MI.isDebugOrPseudoInstr())
3031 return false;
3032 for (const MachineOperand &MO : MI.operands()) {
3033 if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
3034 continue;
3035 if (!MO.readsReg())
3036 continue;
3037 unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3038 if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3039 return true;
3040 }
3041 return false;
3042}
3043
3044bool JoinVals::resolveConflicts(JoinVals &Other) {
3045 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3046 Val &V = Vals[i];
3047 assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3048 if (V.Resolution != CR_Unresolved)
3049 continue;
3050 LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3051 << LR.getValNumInfo(i)->def
3052 << ' ' << PrintLaneMask(LaneMask) << '\n');
3053 if (SubRangeJoin)
3054 return false;
3055
3056 ++NumLaneConflicts;
3057 assert(V.OtherVNI && "Inconsistent conflict resolution.");
3058 VNInfo *VNI = LR.getValNumInfo(i);
3059 const Val &OtherV = Other.Vals[V.OtherVNI->id];
3060
3061 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3062 // join, those lanes will be tainted with a wrong value. Get the extent of
3063 // the tainted lanes.
3064 LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3066 if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3067 // Tainted lanes would extend beyond the basic block.
3068 return false;
3069
3070 assert(!TaintExtent.empty() && "There should be at least one conflict.");
3071
3072 // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3073 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3075 if (!VNI->isPHIDef()) {
3076 MI = Indexes->getInstructionFromIndex(VNI->def);
3077 if (!VNI->def.isEarlyClobber()) {
3078 // No need to check the instruction defining VNI for reads.
3079 ++MI;
3080 }
3081 }
3082 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3083 "Interference ends on VNI->def. Should have been handled earlier");
3084 MachineInstr *LastMI =
3085 Indexes->getInstructionFromIndex(TaintExtent.front().first);
3086 assert(LastMI && "Range must end at a proper instruction");
3087 unsigned TaintNum = 0;
3088 while (true) {
3089 assert(MI != MBB->end() && "Bad LastMI");
3090 if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3091 LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3092 return false;
3093 }
3094 // LastMI is the last instruction to use the current value.
3095 if (&*MI == LastMI) {
3096 if (++TaintNum == TaintExtent.size())
3097 break;
3098 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3099 assert(LastMI && "Range must end at a proper instruction");
3100 TaintedLanes = TaintExtent[TaintNum].second;
3101 }
3102 ++MI;
3103 }
3104
3105 // The tainted lanes are unused.
3106 V.Resolution = CR_Replace;
3107 ++NumLaneResolves;
3108 }
3109 return true;
3110}
3111
3112bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3113 Val &V = Vals[ValNo];
3114 if (V.Pruned || V.PrunedComputed)
3115 return V.Pruned;
3116
3117 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3118 return V.Pruned;
3119
3120 // Follow copies up the dominator tree and check if any intermediate value
3121 // has been pruned.
3122 V.PrunedComputed = true;
3123 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3124 return V.Pruned;
3125}
3126
3127void JoinVals::pruneValues(JoinVals &Other,
3128 SmallVectorImpl<SlotIndex> &EndPoints,
3129 bool changeInstrs) {
3130 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3131 SlotIndex Def = LR.getValNumInfo(i)->def;
3132 switch (Vals[i].Resolution) {
3133 case CR_Keep:
3134 break;
3135 case CR_Replace: {
3136 // This value takes precedence over the value in Other.LR.
3137 LIS->pruneValue(Other.LR, Def, &EndPoints);
3138 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3139 // instructions are only inserted to provide a live-out value for PHI
3140 // predecessors, so the instruction should simply go away once its value
3141 // has been replaced.
3142 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3143 bool EraseImpDef = OtherV.ErasableImplicitDef &&
3144 OtherV.Resolution == CR_Keep;
3145 if (!Def.isBlock()) {
3146 if (changeInstrs) {
3147 // Remove <def,read-undef> flags. This def is now a partial redef.
3148 // Also remove dead flags since the joined live range will
3149 // continue past this instruction.
3150 for (MachineOperand &MO :
3151 Indexes->getInstructionFromIndex(Def)->operands()) {
3152 if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
3153 if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3154 MO.setIsUndef(false);
3155 MO.setIsDead(false);
3156 }
3157 }
3158 }
3159 // This value will reach instructions below, but we need to make sure
3160 // the live range also reaches the instruction at Def.
3161 if (!EraseImpDef)
3162 EndPoints.push_back(Def);
3163 }
3164 LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3165 << ": " << Other.LR << '\n');
3166 break;
3167 }
3168 case CR_Erase:
3169 case CR_Merge:
3170 if (isPrunedValue(i, Other)) {
3171 // This value is ultimately a copy of a pruned value in LR or Other.LR.
3172 // We can no longer trust the value mapping computed by
3173 // computeAssignment(), the value that was originally copied could have
3174 // been replaced.
3175 LIS->pruneValue(LR, Def, &EndPoints);
3176 LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3177 << Def << ": " << LR << '\n');
3178 }
3179 break;
3180 case CR_Unresolved:
3181 case CR_Impossible:
3182 llvm_unreachable("Unresolved conflicts");
3183 }
3184 }
3185}
3186
3187// Check if the segment consists of a copied live-through value (i.e. the copy
3188// in the block only extended the liveness, of an undef value which we may need
3189// to handle).
3190static bool isLiveThrough(const LiveQueryResult Q) {
3191 return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3192}
3193
3194/// Consider the following situation when coalescing the copy between
3195/// %31 and %45 at 800. (The vertical lines represent live range segments.)
3196///
3197/// Main range Subrange 0004 (sub2)
3198/// %31 %45 %31 %45
3199/// 544 %45 = COPY %28 + +
3200/// | v1 | v1
3201/// 560B bb.1: + +
3202/// 624 = %45.sub2 | v2 | v2
3203/// 800 %31 = COPY %45 + + + +
3204/// | v0 | v0
3205/// 816 %31.sub1 = ... + |
3206/// 880 %30 = COPY %31 | v1 +
3207/// 928 %45 = COPY %30 | + +
3208/// | | v0 | v0 <--+
3209/// 992B ; backedge -> bb.1 | + + |
3210/// 1040 = %31.sub0 + |
3211/// This value must remain
3212/// live-out!
3213///
3214/// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3215/// redundant, since it copies the value from %45 back into it. The
3216/// conflict resolution for the main range determines that %45.v0 is
3217/// to be erased, which is ok since %31.v1 is identical to it.
3218/// The problem happens with the subrange for sub2: it has to be live
3219/// on exit from the block, but since 928 was actually a point of
3220/// definition of %45.sub2, %45.sub2 was not live immediately prior
3221/// to that definition. As a result, when 928 was erased, the value v0
3222/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3223/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3224/// providing an incorrect value to the use at 624.
3225///
3226/// Since the main-range values %31.v1 and %45.v0 were proved to be
3227/// identical, the corresponding values in subranges must also be the
3228/// same. A redundant copy is removed because it's not needed, and not
3229/// because it copied an undefined value, so any liveness that originated
3230/// from that copy cannot disappear. When pruning a value that started
3231/// at the removed copy, the corresponding identical value must be
3232/// extended to replace it.
3233void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3234 // Look for values being erased.
3235 bool DidPrune = false;
3236 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3237 Val &V = Vals[i];
3238 // We should trigger in all cases in which eraseInstrs() does something.
3239 // match what eraseInstrs() is doing, print a message so
3240 if (V.Resolution != CR_Erase &&
3241 (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3242 continue;
3243
3244 // Check subranges at the point where the copy will be removed.
3245 SlotIndex Def = LR.getValNumInfo(i)->def;
3246 SlotIndex OtherDef;
3247 if (V.Identical)
3248 OtherDef = V.OtherVNI->def;
3249
3250 // Print message so mismatches with eraseInstrs() can be diagnosed.
3251 LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3252 << '\n');
3253 for (LiveInterval::SubRange &S : LI.subranges()) {
3254 LiveQueryResult Q = S.Query(Def);
3255
3256 // If a subrange starts at the copy then an undefined value has been
3257 // copied and we must remove that subrange value as well.
3258 VNInfo *ValueOut = Q.valueOutOrDead();
3259 if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
3260 (V.Identical && V.Resolution == CR_Erase &&
3261 ValueOut->def == Def))) {
3262 LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3263 << " at " << Def << "\n");
3264 SmallVector<SlotIndex,8> EndPoints;
3265 LIS->pruneValue(S, Def, &EndPoints);
3266 DidPrune = true;
3267 // Mark value number as unused.
3268 ValueOut->markUnused();
3269
3270 if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3271 // If V is identical to V.OtherVNI (and S was live at OtherDef),
3272 // then we can't simply prune V from S. V needs to be replaced
3273 // with V.OtherVNI.
3274 LIS->extendToIndices(S, EndPoints);
3275 }
3276
3277 // We may need to eliminate the subrange if the copy introduced a live
3278 // out undef value.
3279 if (ValueOut->isPHIDef())
3280 ShrinkMask |= S.LaneMask;
3281 continue;
3282 }
3283
3284 // If a subrange ends at the copy, then a value was copied but only
3285 // partially used later. Shrink the subregister range appropriately.
3286 //
3287 // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3288 // conservatively correct.
3289 if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3290 (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3291 LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3292 << PrintLaneMask(S.LaneMask) << " at " << Def
3293 << "\n");
3294 ShrinkMask |= S.LaneMask;
3295 }
3296 }
3297 }
3298 if (DidPrune)
3300}
3301
3302/// Check if any of the subranges of @p LI contain a definition at @p Def.
3304 for (LiveInterval::SubRange &SR : LI.subranges()) {
3305 if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3306 if (VNI->def == Def)
3307 return true;
3308 }
3309 return false;
3310}
3311
3312void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3313 assert(&static_cast<LiveRange&>(LI) == &LR);
3314
3315 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3316 if (Vals[i].Resolution != CR_Keep)
3317 continue;
3318 VNInfo *VNI = LR.getValNumInfo(i);
3319 if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3320 continue;
3321 Vals[i].Pruned = true;
3322 ShrinkMainRange = true;
3323 }
3324}
3325
3326void JoinVals::removeImplicitDefs() {
3327 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3328 Val &V = Vals[i];
3329 if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3330 continue;
3331
3332 VNInfo *VNI = LR.getValNumInfo(i);
3333 VNI->markUnused();
3334 LR.removeValNo(VNI);
3335 }
3336}
3337
3338void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3339 SmallVectorImpl<Register> &ShrinkRegs,
3340 LiveInterval *LI) {
3341 for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3342 // Get the def location before markUnused() below invalidates it.
3343 VNInfo *VNI = LR.getValNumInfo(i);
3344 SlotIndex Def = VNI->def;
3345 switch (Vals[i].Resolution) {
3346 case CR_Keep: {
3347 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3348 // longer. The IMPLICIT_DEF instructions are only inserted by
3349 // PHIElimination to guarantee that all PHI predecessors have a value.
3350 if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3351 break;
3352 // Remove value number i from LR.
3353 // For intervals with subranges, removing a segment from the main range
3354 // may require extending the previous segment: for each definition of
3355 // a subregister, there will be a corresponding def in the main range.
3356 // That def may fall in the middle of a segment from another subrange.
3357 // In such cases, removing this def from the main range must be
3358 // complemented by extending the main range to account for the liveness
3359 // of the other subrange.
3360 // The new end point of the main range segment to be extended.
3361 SlotIndex NewEnd;
3362 if (LI != nullptr) {
3364 assert(I != LR.end());
3365 // Do not extend beyond the end of the segment being removed.
3366 // The segment may have been pruned in preparation for joining
3367 // live ranges.
3368 NewEnd = I->end;
3369 }
3370
3371 LR.removeValNo(VNI);
3372 // Note that this VNInfo is reused and still referenced in NewVNInfo,
3373 // make it appear like an unused value number.
3374 VNI->markUnused();
3375
3376 if (LI != nullptr && LI->hasSubRanges()) {
3377 assert(static_cast<LiveRange*>(LI) == &LR);
3378 // Determine the end point based on the subrange information:
3379 // minimum of (earliest def of next segment,
3380 // latest end point of containing segment)
3381 SlotIndex ED, LE;
3382 for (LiveInterval::SubRange &SR : LI->subranges()) {
3383 LiveRange::iterator I = SR.find(Def);
3384 if (I == SR.end())
3385 continue;
3386 if (I->start > Def)
3387 ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3388 else
3389 LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3390 }
3391 if (LE.isValid())
3392 NewEnd = std::min(NewEnd, LE);
3393 if (ED.isValid())
3394 NewEnd = std::min(NewEnd, ED);
3395
3396 // We only want to do the extension if there was a subrange that
3397 // was live across Def.
3398 if (LE.isValid()) {
3399 LiveRange::iterator S = LR.find(Def);
3400 if (S != LR.begin())
3401 std::prev(S)->end = NewEnd;
3402 }
3403 }
3404 LLVM_DEBUG({
3405 dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3406 if (LI != nullptr)
3407 dbgs() << "\t\t LHS = " << *LI << '\n';
3408 });
3409 [[fallthrough]];
3410 }
3411
3412 case CR_Erase: {
3413 MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3414 assert(MI && "No instruction to erase");
3415 if (MI->isCopy()) {
3416 Register Reg = MI->getOperand(1).getReg();
3417 if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3418 ShrinkRegs.push_back(Reg);
3419 }
3420 ErasedInstrs.insert(MI);
3421 LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3423 MI->eraseFromParent();
3424 break;
3425 }
3426 default:
3427 break;
3428 }
3429 }
3430}
3431
3432void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3433 LaneBitmask LaneMask,
3434 const CoalescerPair &CP) {
3435 SmallVector<VNInfo*, 16> NewVNInfo;
3436 JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3437 NewVNInfo, CP, LIS, TRI, true, true);
3438 JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3439 NewVNInfo, CP, LIS, TRI, true, true);
3440
3441 // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3442 // We should be able to resolve all conflicts here as we could successfully do
3443 // it on the mainrange already. There is however a problem when multiple
3444 // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3445 // interferences.
3446 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3447 // We already determined that it is legal to merge the intervals, so this
3448 // should never fail.
3449 llvm_unreachable("*** Couldn't join subrange!\n");
3450 }
3451 if (!LHSVals.resolveConflicts(RHSVals) ||
3452 !RHSVals.resolveConflicts(LHSVals)) {
3453 // We already determined that it is legal to merge the intervals, so this
3454 // should never fail.
3455 llvm_unreachable("*** Couldn't join subrange!\n");
3456 }
3457
3458 // The merging algorithm in LiveInterval::join() can't handle conflicting
3459 // value mappings, so we need to remove any live ranges that overlap a
3460 // CR_Replace resolution. Collect a set of end points that can be used to
3461 // restore the live range after joining.
3462 SmallVector<SlotIndex, 8> EndPoints;
3463 LHSVals.pruneValues(RHSVals, EndPoints, false);
3464 RHSVals.pruneValues(LHSVals, EndPoints, false);
3465
3466 LHSVals.removeImplicitDefs();
3467 RHSVals.removeImplicitDefs();
3468
3469 LRange.verify();
3470 RRange.verify();
3471
3472 // Join RRange into LHS.
3473 LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3474 NewVNInfo);
3475
3476 LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3477 << ' ' << LRange << "\n");
3478 if (EndPoints.empty())
3479 return;
3480
3481 // Recompute the parts of the live range we had to remove because of
3482 // CR_Replace conflicts.
3483 LLVM_DEBUG({
3484 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3485 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3486 dbgs() << EndPoints[i];
3487 if (i != n-1)
3488 dbgs() << ',';
3489 }
3490 dbgs() << ": " << LRange << '\n';
3491 });
3492 LIS->extendToIndices(LRange, EndPoints);
3493}
3494
3495void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3496 const LiveRange &ToMerge,
3497 LaneBitmask LaneMask,
3498 CoalescerPair &CP,
3499 unsigned ComposeSubRegIdx) {
3501 LI.refineSubRanges(
3502 Allocator, LaneMask,
3503 [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3504 if (SR.empty()) {
3505 SR.assign(ToMerge, Allocator);
3506 } else {
3507 // joinSubRegRange() destroys the merged range, so we need a copy.
3508 LiveRange RangeCopy(ToMerge, Allocator);
3509 joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3510 }
3511 },
3512 *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3513}
3514
3515bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3517 return false;
3518 auto &Counter = LargeLIVisitCounter[LI.reg()];
3519 if (Counter < LargeIntervalFreqThreshold) {
3520 Counter++;
3521 return false;
3522 }
3523 return true;
3524}
3525
3526bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3527 SmallVector<VNInfo*, 16> NewVNInfo;
3528 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3529 LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3530 bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3531 JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3532 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3533 JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3534 NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3535
3536 LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3537
3538 if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3539 return false;
3540
3541 // First compute NewVNInfo and the simple value mappings.
3542 // Detect impossible conflicts early.
3543 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3544 return false;
3545
3546 // Some conflicts can only be resolved after all values have been mapped.
3547 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3548 return false;
3549
3550 // All clear, the live ranges can be merged.
3551 if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3553
3554 // Transform lanemasks from the LHS to masks in the coalesced register and
3555 // create initial subranges if necessary.
3556 unsigned DstIdx = CP.getDstIdx();
3557 if (!LHS.hasSubRanges()) {
3558 LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3559 : TRI->getSubRegIndexLaneMask(DstIdx);
3560 // LHS must support subregs or we wouldn't be in this codepath.
3561 assert(Mask.any());
3562 LHS.createSubRangeFrom(Allocator, Mask, LHS);
3563 } else if (DstIdx != 0) {
3564 // Transform LHS lanemasks to new register class if necessary.
3565 for (LiveInterval::SubRange &R : LHS.subranges()) {
3566 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3567 R.LaneMask = Mask;
3568 }
3569 }
3570 LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3571 << '\n');
3572
3573 // Determine lanemasks of RHS in the coalesced register and merge subranges.
3574 unsigned SrcIdx = CP.getSrcIdx();
3575 if (!RHS.hasSubRanges()) {
3576 LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3577 : TRI->getSubRegIndexLaneMask(SrcIdx);
3578 mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3579 } else {
3580 // Pair up subranges and merge.
3581 for (LiveInterval::SubRange &R : RHS.subranges()) {
3582 LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3583 mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3584 }
3585 }
3586 LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3587
3588 // Pruning implicit defs from subranges may result in the main range
3589 // having stale segments.
3590 LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3591
3592 LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3593 RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3594 }
3595
3596 // The merging algorithm in LiveInterval::join() can't handle conflicting
3597 // value mappings, so we need to remove any live ranges that overlap a
3598 // CR_Replace resolution. Collect a set of end points that can be used to
3599 // restore the live range after joining.
3600 SmallVector<SlotIndex, 8> EndPoints;
3601 LHSVals.pruneValues(RHSVals, EndPoints, true);
3602 RHSVals.pruneValues(LHSVals, EndPoints, true);
3603
3604 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3605 // registers to require trimming.
3606 SmallVector<Register, 8> ShrinkRegs;
3607 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3608 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3609 while (!ShrinkRegs.empty())
3610 shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3611
3612 // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3613 checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3614
3615 // If the RHS covers any PHI locations that were tracked for debug-info, we
3616 // must update tracking information to reflect the join.
3617 auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3618 if (RegIt != RegToPHIIdx.end()) {
3619 // Iterate over all the debug instruction numbers assigned this register.
3620 for (unsigned InstID : RegIt->second) {
3621 auto PHIIt = PHIValToPos.find(InstID);
3622 assert(PHIIt != PHIValToPos.end());
3623 const SlotIndex &SI = PHIIt->second.SI;
3624
3625 // Does the RHS cover the position of this PHI?
3626 auto LII = RHS.find(SI);
3627 if (LII == RHS.end() || LII->start > SI)
3628 continue;
3629
3630 // Accept two kinds of subregister movement:
3631 // * When we merge from one register class into a larger register:
3632 // %1:gr16 = some-inst
3633 // ->
3634 // %2:gr32.sub_16bit = some-inst
3635 // * When the PHI is already in a subregister, and the larger class
3636 // is coalesced:
3637 // %2:gr32.sub_16bit = some-inst
3638 // %3:gr32 = COPY %2
3639 // ->
3640 // %3:gr32.sub_16bit = some-inst
3641 // Test for subregister move:
3642 if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3643 // If we're moving between different subregisters, ignore this join.
3644 // The PHI will not get a location, dropping variable locations.
3645 if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3646 continue;
3647
3648 // Update our tracking of where the PHI is.
3649 PHIIt->second.Reg = CP.getDstReg();
3650
3651 // If we merge into a sub-register of a larger class (test above),
3652 // update SubReg.
3653 if (CP.getSrcIdx() != 0)
3654 PHIIt->second.SubReg = CP.getSrcIdx();
3655 }
3656
3657 // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3658 // different VRegs now. Copy old collection of debug instruction numbers and
3659 // erase the old one:
3660 auto InstrNums = RegIt->second;
3661 RegToPHIIdx.erase(RegIt);
3662
3663 // There might already be PHIs being tracked in the destination VReg. Insert
3664 // into an existing tracking collection, or insert a new one.
3665 RegIt = RegToPHIIdx.find(CP.getDstReg());
3666 if (RegIt != RegToPHIIdx.end())
3667 RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3668 InstrNums.end());
3669 else
3670 RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3671 }
3672
3673 // Join RHS into LHS.
3674 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3675
3676 // Kill flags are going to be wrong if the live ranges were overlapping.
3677 // Eventually, we should simply clear all kill flags when computing live
3678 // ranges. They are reinserted after register allocation.
3679 MRI->clearKillFlags(LHS.reg());
3680 MRI->clearKillFlags(RHS.reg());
3681
3682 if (!EndPoints.empty()) {
3683 // Recompute the parts of the live range we had to remove because of
3684 // CR_Replace conflicts.
3685 LLVM_DEBUG({
3686 dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3687 for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3688 dbgs() << EndPoints[i];
3689 if (i != n-1)
3690 dbgs() << ',';
3691 }
3692 dbgs() << ": " << LHS << '\n';
3693 });
3694 LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3695 }
3696
3697 return true;
3698}
3699
3700bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3701 return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3702}
3703
3704void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
3705{
3706 const SlotIndexes &Slots = *LIS->getSlotIndexes();
3708
3709 // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3710 // vreg => DbgValueLoc map.
3711 auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3712 for (auto *X : ToInsert) {
3713 for (const auto &Op : X->debug_operands()) {
3714 if (Op.isReg() && Op.getReg().isVirtual())
3715 DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3716 }
3717 }
3718
3719 ToInsert.clear();
3720 };
3721
3722 // Iterate over all instructions, collecting them into the ToInsert vector.
3723 // Once a non-debug instruction is found, record the slot index of the
3724 // collected DBG_VALUEs.
3725 for (auto &MBB : MF) {
3726 SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3727
3728 for (auto &MI : MBB) {
3729 if (MI.isDebugValue()) {
3730 if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3731 return MO.isReg() && MO.getReg().isVirtual();
3732 }))
3733 ToInsert.push_back(&MI);
3734 } else if (!MI.isDebugOrPseudoInstr()) {
3735 CurrentSlot = Slots.getInstructionIndex(MI);
3736 CloseNewDVRange(CurrentSlot);
3737 }
3738 }
3739
3740 // Close range of DBG_VALUEs at the end of blocks.
3741 CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3742 }
3743
3744 // Sort all DBG_VALUEs we've seen by slot number.
3745 for (auto &Pair : DbgVRegToValues)
3746 llvm::sort(Pair.second);
3747}
3748
3749void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3750 LiveRange &LHS,
3751 JoinVals &LHSVals,
3752 LiveRange &RHS,
3753 JoinVals &RHSVals) {
3754 auto ScanForDstReg = [&](Register Reg) {
3755 checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3756 };
3757
3758 auto ScanForSrcReg = [&](Register Reg) {
3759 checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3760 };
3761
3762 // Scan for potentially unsound DBG_VALUEs: examine first the register number
3763 // Reg, and then any other vregs that may have been merged into it.
3764 auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
3765 Func(Reg);
3766 if (DbgMergedVRegNums.count(Reg))
3767 for (Register X : DbgMergedVRegNums[Reg])
3768 Func(X);
3769 };
3770
3771 // Scan for unsound updates of both the source and destination register.
3772 PerformScan(CP.getSrcReg(), ScanForSrcReg);
3773 PerformScan(CP.getDstReg(), ScanForDstReg);
3774}
3775
3776void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3777 LiveRange &OtherLR,
3778 LiveRange &RegLR,
3779 JoinVals &RegVals) {
3780 // Are there any DBG_VALUEs to examine?
3781 auto VRegMapIt = DbgVRegToValues.find(Reg);
3782 if (VRegMapIt == DbgVRegToValues.end())
3783 return;
3784
3785 auto &DbgValueSet = VRegMapIt->second;
3786 auto DbgValueSetIt = DbgValueSet.begin();
3787 auto SegmentIt = OtherLR.begin();
3788
3789 bool LastUndefResult = false;
3790 SlotIndex LastUndefIdx;
3791
3792 // If the "Other" register is live at a slot Idx, test whether Reg can
3793 // safely be merged with it, or should be marked undef.
3794 auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3795 &LastUndefIdx](SlotIndex Idx) -> bool {
3796 // Our worst-case performance typically happens with asan, causing very
3797 // many DBG_VALUEs of the same location. Cache a copy of the most recent
3798 // result for this edge-case.
3799 if (LastUndefIdx == Idx)
3800 return LastUndefResult;
3801
3802 // If the other range was live, and Reg's was not, the register coalescer
3803 // will not have tried to resolve any conflicts. We don't know whether
3804 // the DBG_VALUE will refer to the same value number, so it must be made
3805 // undef.
3806 auto OtherIt = RegLR.find(Idx);
3807 if (OtherIt == RegLR.end())
3808 return true;
3809
3810 // Both the registers were live: examine the conflict resolution record for
3811 // the value number Reg refers to. CR_Keep meant that this value number
3812 // "won" and the merged register definitely refers to that value. CR_Erase
3813 // means the value number was a redundant copy of the other value, which
3814 // was coalesced and Reg deleted. It's safe to refer to the other register
3815 // (which will be the source of the copy).
3816 auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3817 LastUndefResult = Resolution != JoinVals::CR_Keep &&
3818 Resolution != JoinVals::CR_Erase;
3819 LastUndefIdx = Idx;
3820 return LastUndefResult;
3821 };
3822
3823 // Iterate over both the live-range of the "Other" register, and the set of
3824 // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3825 // slot index. This relies on the DbgValueSet being ordered.
3826 while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3827 if (DbgValueSetIt->first < SegmentIt->end) {
3828 // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3829 // set it undef.
3830 if (DbgValueSetIt->first >= SegmentIt->start) {
3831 bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3832 bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3833 if (HasReg && ShouldUndefReg) {
3834 // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3835 DbgValueSetIt->second->setDebugValueUndef();
3836 continue;
3837 }
3838 }
3839 ++DbgValueSetIt;
3840 } else {
3841 ++SegmentIt;
3842 }
3843 }
3844}
3845
3846namespace {
3847
3848/// Information concerning MBB coalescing priority.
3849struct MBBPriorityInfo {
3851 unsigned Depth;
3852 bool IsSplit;
3853
3854 MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3855 : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3856};
3857
3858} // end anonymous namespace
3859
3860/// C-style comparator that sorts first based on the loop depth of the basic
3861/// block (the unsigned), and then on the MBB number.
3862///
3863/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3864static int compareMBBPriority(const MBBPriorityInfo *LHS,
3865 const MBBPriorityInfo *RHS) {
3866 // Deeper loops first
3867 if (LHS->Depth != RHS->Depth)
3868 return LHS->Depth > RHS->Depth ? -1 : 1;
3869
3870 // Try to unsplit critical edges next.
3871 if (LHS->IsSplit != RHS->IsSplit)
3872 return LHS->IsSplit ? -1 : 1;
3873
3874 // Prefer blocks that are more connected in the CFG. This takes care of
3875 // the most difficult copies first while intervals are short.
3876 unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3877 unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3878 if (cl != cr)
3879 return cl > cr ? -1 : 1;
3880
3881 // As a last resort, sort by block number.
3882 return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3883}
3884
3885/// \returns true if the given copy uses or defines a local live range.
3886static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3887 if (!Copy->isCopy())
3888 return false;
3889
3890 if (Copy->getOperand(1).isUndef())
3891 return false;
3892
3893 Register SrcReg = Copy->getOperand(1).getReg();
3894 Register DstReg = Copy->getOperand(0).getReg();
3895 if (SrcReg.isPhysical() || DstReg.isPhysical())
3896 return false;
3897
3898 return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3899 || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3900}
3901
3902void RegisterCoalescer::lateLiveIntervalUpdate() {
3903 for (Register reg : ToBeUpdated) {
3904 if (!LIS->hasInterval(reg))
3905 continue;
3906 LiveInterval &LI = LIS->getInterval(reg);
3907 shrinkToUses(&LI, &DeadDefs);
3908 if (!DeadDefs.empty())
3909 eliminateDeadDefs();
3910 }
3911 ToBeUpdated.clear();
3912}
3913
3914bool RegisterCoalescer::
3915copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3916 bool Progress = false;
3917 for (MachineInstr *&MI : CurrList) {
3918 if (!MI)
3919 continue;
3920 // Skip instruction pointers that have already been erased, for example by
3921 // dead code elimination.
3922 if (ErasedInstrs.count(MI)) {
3923 MI = nullptr;
3924 continue;
3925 }
3926 bool Again = false;
3927 bool Success = joinCopy(MI, Again);
3928 Progress |= Success;
3929 if (Success || !Again)
3930 MI = nullptr;
3931 }
3932 return Progress;
3933}
3934
3935/// Check if DstReg is a terminal node.
3936/// I.e., it does not have any affinity other than \p Copy.
3937static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
3938 const MachineRegisterInfo *MRI) {
3939 assert(Copy.isCopyLike());
3940 // Check if the destination of this copy as any other affinity.
3941 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3942 if (&MI != &Copy && MI.isCopyLike())
3943 return false;
3944 return true;
3945}
3946
3947bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3948 assert(Copy.isCopyLike());
3949 if (!UseTerminalRule)
3950 return false;
3951 Register SrcReg, DstReg;
3952 unsigned SrcSubReg = 0, DstSubReg = 0;
3953 if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
3954 return false;
3955 // Check if the destination of this copy has any other affinity.
3956 if (DstReg.isPhysical() ||
3957 // If SrcReg is a physical register, the copy won't be coalesced.
3958 // Ignoring it may have other side effect (like missing
3959 // rematerialization). So keep it.
3960 SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
3961 return false;
3962
3963 // DstReg is a terminal node. Check if it interferes with any other
3964 // copy involving SrcReg.
3965 const MachineBasicBlock *OrigBB = Copy.getParent();
3966 const LiveInterval &DstLI = LIS->getInterval(DstReg);
3967 for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3968 // Technically we should check if the weight of the new copy is
3969 // interesting compared to the other one and update the weight
3970 // of the copies accordingly. However, this would only work if
3971 // we would gather all the copies first then coalesce, whereas
3972 // right now we interleave both actions.
3973 // For now, just consider the copies that are in the same block.
3974 if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
3975 continue;
3976 Register OtherSrcReg, OtherReg;
3977 unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
3978 if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3979 OtherSubReg))
3980 return false;
3981 if (OtherReg == SrcReg)
3982 OtherReg = OtherSrcReg;
3983 // Check if OtherReg is a non-terminal.
3984 if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
3985 continue;
3986 // Check that OtherReg interfere with DstReg.
3987 if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3988 LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
3989 << '\n');
3990 return true;
3991 }
3992 }
3993 return false;
3994}
3995
3996void
3997RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3998 LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
3999
4000 // Collect all copy-like instructions in MBB. Don't start coalescing anything
4001 // yet, it might invalidate the iterator.
4002 const unsigned PrevSize = WorkList.size();
4003 if (JoinGlobalCopies) {
4004 SmallVector<MachineInstr*, 2> LocalTerminals;
4005 SmallVector<MachineInstr*, 2> GlobalTerminals;
4006 // Coalesce copies bottom-up to coalesce local defs before local uses. They
4007 // are not inherently easier to resolve, but slightly preferable until we
4008 // have local live range splitting. In particular this is required by
4009 // cmp+jmp macro fusion.
4010 for (MachineInstr &MI : *MBB) {
4011 if (!MI.isCopyLike())
4012 continue;
4013 bool ApplyTerminalRule = applyTerminalRule(MI);
4014 if (isLocalCopy(&MI, LIS)) {
4015 if (ApplyTerminalRule)
4016 LocalTerminals.push_back(&MI);
4017 else
4018 LocalWorkList.push_back(&MI);
4019 } else {
4020 if (ApplyTerminalRule)
4021 GlobalTerminals.push_back(&MI);
4022 else
4023 WorkList.push_back(&MI);
4024 }
4025 }
4026 // Append the copies evicted by the terminal rule at the end of the list.
4027 LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4028 WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4029 }
4030 else {
4032 for (MachineInstr &MII : *MBB)
4033 if (MII.isCopyLike()) {
4034 if (applyTerminalRule(MII))
4035 Terminals.push_back(&MII);
4036 else
4037 WorkList.push_back(&MII);
4038 }
4039 // Append the copies evicted by the terminal rule at the end of the list.
4040 WorkList.append(Terminals.begin(), Terminals.end());
4041 }
4042 // Try coalescing the collected copies immediately, and remove the nulls.
4043 // This prevents the WorkList from getting too large since most copies are
4044 // joinable on the first attempt.
4046 CurrList(WorkList.begin() + PrevSize, WorkList.end());
4047 if (copyCoalesceWorkList(CurrList))
4048 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
4049 nullptr), WorkList.end());
4050}
4051
4052void RegisterCoalescer::coalesceLocals() {
4053 copyCoalesceWorkList(LocalWorkList);
4054 for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
4055 if (LocalWorkList[j])
4056 WorkList.push_back(LocalWorkList[j]);
4057 }
4058 LocalWorkList.clear();
4059}
4060
4061void RegisterCoalescer::joinAllIntervals() {
4062 LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4063 assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4064
4065 std::vector<MBBPriorityInfo> MBBs;
4066 MBBs.reserve(MF->size());
4067 for (MachineBasicBlock &MBB : *MF) {
4068 MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4069 JoinSplitEdges && isSplitEdge(&MBB)));
4070 }
4071 array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4072
4073 // Coalesce intervals in MBB priority order.
4074 unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4075 for (MBBPriorityInfo &MBB : MBBs) {
4076 // Try coalescing the collected local copies for deeper loops.
4077 if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4078 coalesceLocals();
4079 CurrDepth = MBB.Depth;
4080 }
4081 copyCoalesceInMBB(MBB.MBB);
4082 }
4083 lateLiveIntervalUpdate();
4084 coalesceLocals();
4085
4086 // Joining intervals can allow other intervals to be joined. Iteratively join
4087 // until we make no progress.
4088 while (copyCoalesceWorkList(WorkList))
4089 /* empty */ ;
4090 lateLiveIntervalUpdate();
4091}
4092
4093void RegisterCoalescer::releaseMemory() {
4094 ErasedInstrs.clear();
4095 WorkList.clear();
4096 DeadDefs.clear();
4097 InflateRegs.clear();
4098 LargeLIVisitCounter.clear();
4099}
4100
4101bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4102 LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
4103 << "********** Function: " << fn.getName() << '\n');
4104
4105 // Variables changed between a setjmp and a longjump can have undefined value
4106 // after the longjmp. This behaviour can be observed if such a variable is
4107 // spilled, so longjmp won't restore the value in the spill slot.
4108 // RegisterCoalescer should not run in functions with a setjmp to avoid
4109 // merging such undefined variables with predictable ones.
4110 //
4111 // TODO: Could specifically disable coalescing registers live across setjmp
4112 // calls
4113 if (fn.exposesReturnsTwice()) {
4114 LLVM_DEBUG(
4115 dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4116 return false;
4117 }
4118
4119 MF = &fn;
4120 MRI = &fn.getRegInfo();
4121 const TargetSubtargetInfo &STI = fn.getSubtarget();
4122 TRI = STI.getRegisterInfo();
4123 TII = STI.getInstrInfo();
4124 LIS = &getAnalysis<LiveIntervals>();
4125 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4126 Loops = &getAnalysis<MachineLoopInfo>();
4128 JoinGlobalCopies = STI.enableJoinGlobalCopies();
4129 else
4130 JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4131
4132 // If there are PHIs tracked by debug-info, they will need updating during
4133 // coalescing. Build an index of those PHIs to ease updating.
4134 SlotIndexes *Slots = LIS->getSlotIndexes();
4135 for (const auto &DebugPHI : MF->DebugPHIPositions) {
4136 MachineBasicBlock *MBB = DebugPHI.second.MBB;
4137 Register Reg = DebugPHI.second.Reg;
4138 unsigned SubReg = DebugPHI.second.SubReg;
4139 SlotIndex SI = Slots->getMBBStartIdx(MBB);
4140 PHIValPos P = {SI, Reg, SubReg};
4141 PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4142 RegToPHIIdx[Reg].push_back(DebugPHI.first);
4143 }
4144
4145 // The MachineScheduler does not currently require JoinSplitEdges. This will
4146 // either be enabled unconditionally or replaced by a more general live range
4147 // splitting optimization.
4148 JoinSplitEdges = EnableJoinSplits;
4149
4150 if (VerifyCoalescing)
4151 MF->verify(this, "Before register coalescing");
4152
4153 DbgVRegToValues.clear();
4154 DbgMergedVRegNums.clear();
4155 buildVRegToDbgValueMap(fn);
4156
4157 RegClassInfo.runOnMachineFunction(fn);
4158
4159 // Join (coalesce) intervals if requested.
4160 if (EnableJoining)
4161 joinAllIntervals();
4162
4163 // After deleting a lot of copies, register classes may be less constrained.
4164 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4165 // DPR inflation.
4166 array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4167 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
4168 InflateRegs.end());
4169 LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4170 << " regs.\n");
4171 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
4172 Register Reg = InflateRegs[i];
4173 if (MRI->reg_nodbg_empty(Reg))
4174 continue;
4175 if (MRI->recomputeRegClass(Reg)) {
4176 LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4177 << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4178 ++NumInflated;
4179
4180 LiveInterval &LI = LIS->getInterval(Reg);
4181 if (LI.hasSubRanges()) {
4182 // If the inflated register class does not support subregisters anymore
4183 // remove the subranges.
4184 if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4185 LI.clearSubRanges();
4186 } else {
4187#ifndef NDEBUG
4188 LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4189 // If subranges are still supported, then the same subregs
4190 // should still be supported.
4191 for (LiveInterval::SubRange &S : LI.subranges()) {
4192 assert((S.LaneMask & ~MaxMask).none());
4193 }
4194#endif
4195 }
4196 }
4197 }
4198 }
4199
4200 // After coalescing, update any PHIs that are being tracked by debug-info
4201 // with their new VReg locations.
4202 for (auto &p : MF->DebugPHIPositions) {
4203 auto it = PHIValToPos.find(p.first);
4204 assert(it != PHIValToPos.end());
4205 p.second.Reg = it->second.Reg;
4206 p.second.SubReg = it->second.SubReg;
4207 }
4208
4209 PHIValToPos.clear();
4210 RegToPHIIdx.clear();
4211
4212 LLVM_DEBUG(dump());
4213 if (VerifyCoalescing)
4214 MF->verify(this, "After register coalescing");
4215 return true;
4216}
4217
4218void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
4219 LIS->print(O, m);
4220}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
aarch64 promote const
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
unsigned UseOpIdx
Basic Register Allocator
simple register coalescing
static cl::opt< cl::boolOrDefault > EnableGlobalCopies("join-globalcopies", cl::desc("Coalesce copies that span blocks (default=subtarget)"), cl::init(cl::BOU_UNSET), cl::Hidden)
Temporary flag to test global copy optimization.
static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS)
simple register Simple Register Coalescing
static bool isSplitEdge(const MachineBasicBlock *MBB)
Return true if this block should be vacated by the coalescer to eliminate branches.
static int compareMBBPriority(const MBBPriorityInfo *LHS, const MBBPriorityInfo *RHS)
C-style comparator that sorts first based on the loop depth of the basic block (the unsigned),...
static cl::opt< unsigned > LargeIntervalSizeThreshold("large-interval-size-threshold", cl::Hidden, cl::desc("If the valnos size of an interval is larger than the threshold, " "it is regarded as a large interval. "), cl::init(100))
simple register Simple Register static false bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, Register &Src, Register &Dst, unsigned &SrcSub, unsigned &DstSub)
static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def)
Check if any of the subranges of LI contain a definition at Def.
static std::pair< bool, bool > addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, const VNInfo *SrcValNo)
Copy segments with value number SrcValNo from liverange Src to live range @Dst and use value number D...
static bool isLiveThrough(const LiveQueryResult Q)
static bool isTerminalReg(Register DstReg, const MachineInstr &Copy, const MachineRegisterInfo *MRI)
Check if DstReg is a terminal node.
static cl::opt< bool > VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden)
static cl::opt< unsigned > LargeIntervalFreqThreshold("large-interval-freq-threshold", cl::Hidden, cl::desc("For a large interval, if it is coalesed with other live " "intervals many times more than the threshold, stop its " "coalescing to control the compile time. "), cl::init(100))
static cl::opt< bool > EnableJoinSplits("join-splitedges", cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden)
Temporary flag to test critical edge unsplitting.
static cl::opt< bool > EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true), cl::Hidden)
static bool definesFullReg(const MachineInstr &MI, Register Reg)
Returns true if MI defines the full vreg Reg, as opposed to just defining a subregister.
static cl::opt< unsigned > LateRematUpdateThreshold("late-remat-update-threshold", cl::Hidden, cl::desc("During rematerialization for a copy, if the def instruction has " "many other copy uses to be rematerialized, delay the multiple " "separate live interval update work and do them all at once after " "all those rematerialization are done. It will save a lot of " "repeated work. "), cl::init(100))
static cl::opt< bool > UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), cl::init(false), cl::Hidden)
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS)
Value * RHS
Value * LHS
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
bool test(unsigned Idx) const
Definition: BitVector.h:454
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
A helper class for register coalescers.
bool flip()
Swap SrcReg and DstReg.
bool isCoalescable(const MachineInstr *) const
Return true if MI is a copy instruction that will become an identity copy after coalescing.
bool setRegisters(const MachineInstr *)
Set registers to match the copy instruction MI.
The location of a single variable, composed of an expression and 0 or more DbgValueLocEntries.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
bool isAsCheapAsAMove(const MachineInstr &MI) const override
A live range for subregisters.
Definition: LiveInterval.h:693
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
void removeEmptySubRanges()
Removes all subranges without any segments (subranges without segments are not considered valid and s...
Register reg() const
Definition: LiveInterval.h:717
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
SubRange * createSubRangeFrom(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, const LiveRange &CopyFrom)
Like createSubRange() but the new range is filled with a copy of the liveness information in CopyFrom...
Definition: LiveInterval.h:794
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, std::function< void(LiveInterval::SubRange &)> Apply, const SlotIndexes &Indexes, const TargetRegisterInfo &TRI, unsigned ComposeSubRegIdx=0)
Refines the subranges to support LaneMask.
void computeSubRangeUndefs(SmallVectorImpl< SlotIndex > &Undefs, LaneBitmask LaneMask, const MachineRegisterInfo &MRI, const SlotIndexes &Indexes) const
For a given lane mask LaneMask, compute indexes at which the lane is marked undefined by subregister ...
SubRange * createSubRange(BumpPtrAllocator &Allocator, LaneBitmask LaneMask)
Creates a new empty subregister live range.
Definition: LiveInterval.h:785
void clearSubRanges()
Removes all subregister liveness information.
bool hasInterval(Register Reg) const
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const
Return the first index in the given basic block.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const
Returns true if VNI is killed by any PHI-def values in LI.
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
SlotIndexes * getSlotIndexes() const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
VNInfo::Allocator & getVNInfoAllocator()
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveRange * getCachedRegUnit(unsigned Unit)
Return the live range for register unit Unit if it has already been computed, or nullptr if it hasn't...
LiveInterval & getInterval(Register Reg)
void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl< SlotIndex > *EndPoints)
If LR has a live value at Kill, prune its live range by removing any liveness reachable from Kill.
void removeInterval(Register Reg)
Interval removal.
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos)
Remove value number and related live segments of LI and its subranges that start at position Pos.
bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
void extendToIndices(LiveRange &LR, ArrayRef< SlotIndex > Indices, ArrayRef< SlotIndex > Undefs)
Extend the live range LR to reach all points in Indices.
void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos)
Remove value numbers and related live segments starting at position Pos that are part of any liverang...
void splitSeparateComponents(LiveInterval &LI, SmallVectorImpl< LiveInterval * > &SplitLIs)
Split separate components in LiveInterval LI into separate intervals.
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
void print(raw_ostream &O, const Module *=nullptr) const override
Implement the dump method.
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueOutOrDead() const
Returns the value alive at the end of the instruction, if any.
Definition: LiveInterval.h:129
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
VNInfo * valueOut() const
Return the value leaving the instruction, if any.
Definition: LiveInterval.h:123
VNInfo * valueDefined() const
Return the value defined by this instruction, if any.
Definition: LiveInterval.h:135
SlotIndex endPoint() const
Return the end point of the last live range segment to interact with the instruction,...
Definition: LiveInterval.h:147
bool isKill() const
Return true if the live-in value is killed by this instruction.
Definition: LiveInterval.h:112
Callback methods for LiveRangeEdit owners.
Definition: LiveRangeEdit.h:45
virtual void LRE_WillEraseInstruction(MachineInstr *MI)
Called immediately before erasing a dead machine instruction.
Definition: LiveRangeEdit.h:52
void eliminateDeadDefs(SmallVectorImpl< MachineInstr * > &Dead, ArrayRef< Register > RegsBeingSpilled=std::nullopt)
eliminateDeadDefs - Try to delete machine instructions that are now dead (allDefsAreDead returns true...
SlotIndex rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, const TargetRegisterInfo &, bool Late=false, unsigned SubIdx=0, MachineInstr *ReplaceIndexMI=nullptr)
rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an instruction into MBB before...
bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI)
checkRematerializable - Manually add VNI to the list of rematerializable values if DefMI may be remat...
bool canRematerializeAt(Remat &RM, VNInfo *OrigVNI, SlotIndex UseIdx, bool cheapAsAMove)
canRematerializeAt - Determine if ParentVNI can be rematerialized at UseIdx.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getValNumInfo(unsigned ValNo)
getValNumInfo - Returns pointer to the specified val#.
Definition: LiveInterval.h:317
iterator addSegment(Segment S)
Add the specified Segment to this range, merging segments as appropriate.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void join(LiveRange &Other, const int *ValNoAssignments, const int *RHSValNoAssignments, SmallVectorImpl< VNInfo * > &NewVNInfo)
join - Join two live ranges (this, and other) together.
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
VNInfo * createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc)
createDeadDef - Make sure the range has a value defined at Def.
void removeValNo(VNInfo *ValNo)
removeValNo - Remove all the segments defined by the specified value#.
void verify() const
Walk the range and assert if any invariants fail to hold.
bool empty() const
Definition: LiveInterval.h:382
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Definition: LiveInterval.h:448
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
VNInfo * MergeValueNumberInto(VNInfo *V1, VNInfo *V2)
MergeValueNumberInto - This method is called when two value numbers are found to be equivalent.
unsigned getNumValNums() const
Definition: LiveInterval.h:313
iterator begin()
Definition: LiveInterval.h:215
Segments segments
Definition: LiveInterval.h:203
VNInfoList valnos
Definition: LiveInterval.h:204
bool containsOneValue() const
Definition: LiveInterval.h:311
size_t size() const
Definition: LiveInterval.h:305
iterator FindSegmentContaining(SlotIndex Idx)
Return an iterator to the segment that contains the specified index, or end() if there is none.
Definition: LiveInterval.h:436
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:247
MCRegUnitRootIterator enumerates the root registers of a register unit.
bool isValid() const
Check if the iterator is at the end of the list.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
bool isEHPad() const
Returns true if the block is a landing pad.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
int findRegisterUseOperandIdx(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a use of the specific register or -1 if it is not found.
bool isSafeToMove(AAResults *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
bool isImplicitDef() const
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
bool isCopyLike() const
Return true if the instruction behaves like a copy.
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
std::pair< bool, bool > readsWritesVirtualRegister(Register Reg, SmallVectorImpl< unsigned > *Ops=nullptr) const
Return a pair of bools (reads, writes) indicating if this instruction reads or writes Reg.
bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx=nullptr) const
Return true if the use operand of the specified index is tied to a def operand.
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand,...
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
bool isCommutable(QueryType Type=IgnoreBundle) const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
bool allDefsAreDead() const
Return true if all the defs of this instruction are dead.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
void setIsUndef(bool Val=true)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
bool isEarlyClobber() const
isEarlyClobber - Returns true if this is an early-clobber slot.
Definition: SlotIndexes.h:234
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:246
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
Definition: SlotIndexes.h:294
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:259
bool isDead() const
isDead - Returns true if this is a dead def kill slot.
Definition: SlotIndexes.h:241
SlotIndexes pass.
Definition: SlotIndexes.h:319
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
Returns the basic block which the given index falls in.
Definition: SlotIndexes.h:519
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the last index in the given basic block number.
Definition: SlotIndexes.h:481
SlotIndex getNextNonNullIndex(SlotIndex Index)
Returns the next non-null index, if one exists.
Definition: SlotIndexes.h:414
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
Definition: SlotIndexes.h:390
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:471
SlotIndex getIndexBefore(const MachineInstr &MI) const
getIndexBefore - Returns the index of the last indexed instruction before MI, or the start index of i...
Definition: SlotIndexes.h:427
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:408
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void reserve(size_type N)
Definition: SmallVector.h:667
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool enableJoinGlobalCopies() const
True if the subtarget should enable joining global copies.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
void markUnused()
Mark this value as unused.
Definition: LiveInterval.h:84
bool isUnused() const
Returns true if this value is unused.
Definition: LiveInterval.h:81
unsigned id
The ID number of this value.
Definition: LiveInterval.h:58
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:78
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ SS
Definition: X86.h:202
Reg
All possible values of the reg field in the ModR/M byte.