LLVM 17.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1//===- HexagonGenInsert.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BitTracker.h"
10#include "HexagonBitTracker.h"
11#include "HexagonInstrInfo.h"
12#include "HexagonRegisterInfo.h"
13#include "HexagonSubtarget.h"
14#include "llvm/ADT/BitVector.h"
15#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
31#include "llvm/IR/DebugLoc.h"
33#include "llvm/Pass.h"
35#include "llvm/Support/Debug.h"
37#include "llvm/Support/Timer.h"
39#include <algorithm>
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>
44#include <vector>
45
46#define DEBUG_TYPE "hexinsert"
47
48using namespace llvm;
49
51 VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52 cl::desc("Vreg# cutoff for insert generation."));
53// The distance cutoff is selected based on the precheckin-perf results:
54// cutoffs 20, 25, 35, and 40 are worse than 30.
56 VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57 cl::desc("Vreg distance cutoff for insert "
58 "generation."));
59
60// Limit the container sizes for extreme cases where we run out of memory.
62 MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63 cl::desc("Maximum size of OrderedRegisterList"));
64static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
66 cl::desc("Maximum size of IFMap"));
67
68static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69 cl::desc("Enable timing of insert generation"));
70static cl::opt<bool>
71 OptTimingDetail("insert-timing-detail", cl::Hidden,
72 cl::desc("Enable detailed timing of insert "
73 "generation"));
74
75static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77// Whether to construct constant values via "insert". Could eliminate constant
78// extenders, but often not practical.
79static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80
81// The preprocessor gets confused when the DEBUG macro is passed larger
82// chunks of code. Use this function to detect debugging.
83inline static bool isDebug() {
84#ifndef NDEBUG
86#else
87 return false;
88#endif
89}
90
91namespace {
92
93 // Set of virtual registers, based on BitVector.
94 struct RegisterSet : private BitVector {
95 RegisterSet() = default;
96 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97 RegisterSet(const RegisterSet &RS) = default;
98 RegisterSet &operator=(const RegisterSet &RS) = default;
99
100 using BitVector::clear;
101
102 unsigned find_first() const {
104 if (First < 0)
105 return 0;
106 return x2v(First);
107 }
108
109 unsigned find_next(unsigned Prev) const {
110 int Next = BitVector::find_next(v2x(Prev));
111 if (Next < 0)
112 return 0;
113 return x2v(Next);
114 }
115
116 RegisterSet &insert(unsigned R) {
117 unsigned Idx = v2x(R);
118 ensure(Idx);
119 return static_cast<RegisterSet&>(BitVector::set(Idx));
120 }
121 RegisterSet &remove(unsigned R) {
122 unsigned Idx = v2x(R);
123 if (Idx >= size())
124 return *this;
125 return static_cast<RegisterSet&>(BitVector::reset(Idx));
126 }
127
128 RegisterSet &insert(const RegisterSet &Rs) {
129 return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130 }
131 RegisterSet &remove(const RegisterSet &Rs) {
132 return static_cast<RegisterSet&>(BitVector::reset(Rs));
133 }
134
135 reference operator[](unsigned R) {
136 unsigned Idx = v2x(R);
137 ensure(Idx);
139 }
140 bool operator[](unsigned R) const {
141 unsigned Idx = v2x(R);
142 assert(Idx < size());
144 }
145 bool has(unsigned R) const {
146 unsigned Idx = v2x(R);
147 if (Idx >= size())
148 return false;
149 return BitVector::test(Idx);
150 }
151
152 bool empty() const {
153 return !BitVector::any();
154 }
155 bool includes(const RegisterSet &Rs) const {
156 // A.BitVector::test(B) <=> A-B != {}
157 return !Rs.BitVector::test(*this);
158 }
159 bool intersects(const RegisterSet &Rs) const {
160 return BitVector::anyCommon(Rs);
161 }
162
163 private:
164 void ensure(unsigned Idx) {
165 if (size() <= Idx)
166 resize(std::max(Idx+1, 32U));
167 }
168
169 static inline unsigned v2x(unsigned v) {
170 return Register::virtReg2Index(v);
171 }
172
173 static inline unsigned x2v(unsigned x) {
174 return Register::index2VirtReg(x);
175 }
176 };
177
178 struct PrintRegSet {
179 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
180 : RS(S), TRI(RI) {}
181
183 const PrintRegSet &P);
184
185 private:
186 const RegisterSet &RS;
187 const TargetRegisterInfo *TRI;
188 };
189
190 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
191 OS << '{';
192 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
193 OS << ' ' << printReg(R, P.TRI);
194 OS << " }";
195 return OS;
196 }
197
198 // A convenience class to associate unsigned numbers (such as virtual
199 // registers) with unsigned numbers.
200 struct UnsignedMap : public DenseMap<unsigned,unsigned> {
201 UnsignedMap() = default;
202
203 private:
205 };
206
207 // A utility to establish an ordering between virtual registers:
208 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
209 // This is meant as a cache for the ordering of virtual registers defined
210 // by a potentially expensive comparison function, or obtained by a proce-
211 // dure that should not be repeated each time two registers are compared.
212 struct RegisterOrdering : public UnsignedMap {
213 RegisterOrdering() = default;
214
215 unsigned operator[](unsigned VR) const {
216 const_iterator F = find(VR);
217 assert(F != end());
218 return F->second;
219 }
220
221 // Add operator(), so that objects of this class can be used as
222 // comparators in std::sort et al.
223 bool operator() (unsigned VR1, unsigned VR2) const {
224 return operator[](VR1) < operator[](VR2);
225 }
226 };
227
228 // Ordering of bit values. This class does not have operator[], but
229 // is supplies a comparison operator() for use in std:: algorithms.
230 // The order is as follows:
231 // - 0 < 1 < ref
232 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
233 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
234 struct BitValueOrdering {
235 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
236
237 bool operator() (const BitTracker::BitValue &V1,
238 const BitTracker::BitValue &V2) const;
239
240 const RegisterOrdering &BaseOrd;
241 };
242
243} // end anonymous namespace
244
245bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
246 const BitTracker::BitValue &V2) const {
247 if (V1 == V2)
248 return false;
249 // V1==0 => true, V2==0 => false
250 if (V1.is(0) || V2.is(0))
251 return V1.is(0);
252 // Neither of V1,V2 is 0, and V1!=V2.
253 // V2==1 => false, V1==1 => true
254 if (V2.is(1) || V1.is(1))
255 return !V2.is(1);
256 // Both V1,V2 are refs.
257 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
258 if (Ind1 != Ind2)
259 return Ind1 < Ind2;
260 // If V1.Pos==V2.Pos
261 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
262 return V1.RefI.Pos < V2.RefI.Pos;
263}
264
265namespace {
266
267 // Cache for the BitTracker's cell map. Map lookup has a logarithmic
268 // complexity, this class will memoize the lookup results to reduce
269 // the access time for repeated lookups of the same cell.
270 struct CellMapShadow {
271 CellMapShadow(const BitTracker &T) : BT(T) {}
272
273 const BitTracker::RegisterCell &lookup(unsigned VR) {
274 unsigned RInd = Register::virtReg2Index(VR);
275 // Grow the vector to at least 32 elements.
276 if (RInd >= CVect.size())
277 CVect.resize(std::max(RInd+16, 32U), nullptr);
278 const BitTracker::RegisterCell *CP = CVect[RInd];
279 if (CP == nullptr)
280 CP = CVect[RInd] = &BT.lookup(VR);
281 return *CP;
282 }
283
284 const BitTracker &BT;
285
286 private:
287 using CellVectType = std::vector<const BitTracker::RegisterCell *>;
288
289 CellVectType CVect;
290 };
291
292 // Comparator class for lexicographic ordering of virtual registers
293 // according to the corresponding BitTracker::RegisterCell objects.
294 struct RegisterCellLexCompare {
295 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
296 : BitOrd(BO), CM(M) {}
297
298 bool operator() (unsigned VR1, unsigned VR2) const;
299
300 private:
301 const BitValueOrdering &BitOrd;
302 CellMapShadow &CM;
303 };
304
305 // Comparator class for lexicographic ordering of virtual registers
306 // according to the specified bits of the corresponding BitTracker::
307 // RegisterCell objects.
308 // Specifically, this class will be used to compare bit B of a register
309 // cell for a selected virtual register R with bit N of any register
310 // other than R.
311 struct RegisterCellBitCompareSel {
312 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
313 const BitValueOrdering &BO, CellMapShadow &M)
314 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
315
316 bool operator() (unsigned VR1, unsigned VR2) const;
317
318 private:
319 const unsigned SelR, SelB;
320 const unsigned BitN;
321 const BitValueOrdering &BitOrd;
322 CellMapShadow &CM;
323 };
324
325} // end anonymous namespace
326
327bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
328 // Ordering of registers, made up from two given orderings:
329 // - the ordering of the register numbers, and
330 // - the ordering of register cells.
331 // Def. R1 < R2 if:
332 // - cell(R1) < cell(R2), or
333 // - cell(R1) == cell(R2), and index(R1) < index(R2).
334 //
335 // For register cells, the ordering is lexicographic, with index 0 being
336 // the most significant.
337 if (VR1 == VR2)
338 return false;
339
340 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
341 uint16_t W1 = RC1.width(), W2 = RC2.width();
342 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
343 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
344 if (V1 != V2)
345 return BitOrd(V1, V2);
346 }
347 // Cells are equal up until the common length.
348 if (W1 != W2)
349 return W1 < W2;
350
351 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
352}
353
354bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
355 if (VR1 == VR2)
356 return false;
357 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
358 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
359 uint16_t W1 = RC1.width(), W2 = RC2.width();
360 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
361 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
362 // If Bit1 exceeds the width of VR1, then:
363 // - return false, if at the same time Bit2 exceeds VR2, or
364 // - return true, otherwise.
365 // (I.e. "a bit value that does not exist is less than any bit value
366 // that does exist".)
367 if (W1 <= Bit1)
368 return Bit2 < W2;
369 // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
370 if (W2 <= Bit2)
371 return false;
372
373 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
374 if (V1 != V2)
375 return BitOrd(V1, V2);
376 return false;
377}
378
379namespace {
380
381 class OrderedRegisterList {
382 using ListType = std::vector<unsigned>;
383 const unsigned MaxSize;
384
385 public:
386 OrderedRegisterList(const RegisterOrdering &RO)
387 : MaxSize(MaxORLSize), Ord(RO) {}
388
389 void insert(unsigned VR);
390 void remove(unsigned VR);
391
392 unsigned operator[](unsigned Idx) const {
393 assert(Idx < Seq.size());
394 return Seq[Idx];
395 }
396
397 unsigned size() const {
398 return Seq.size();
399 }
400
401 using iterator = ListType::iterator;
402 using const_iterator = ListType::const_iterator;
403
404 iterator begin() { return Seq.begin(); }
405 iterator end() { return Seq.end(); }
406 const_iterator begin() const { return Seq.begin(); }
407 const_iterator end() const { return Seq.end(); }
408
409 // Convenience function to convert an iterator to the corresponding index.
410 unsigned idx(iterator It) const { return It-begin(); }
411
412 private:
413 ListType Seq;
414 const RegisterOrdering &Ord;
415 };
416
417 struct PrintORL {
418 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
419 : RL(L), TRI(RI) {}
420
421 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
422
423 private:
424 const OrderedRegisterList &RL;
425 const TargetRegisterInfo *TRI;
426 };
427
428 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
429 OS << '(';
430 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
431 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
432 if (I != B)
433 OS << ", ";
434 OS << printReg(*I, P.TRI);
435 }
436 OS << ')';
437 return OS;
438 }
439
440} // end anonymous namespace
441
442void OrderedRegisterList::insert(unsigned VR) {
443 iterator L = llvm::lower_bound(Seq, VR, Ord);
444 if (L == Seq.end())
445 Seq.push_back(VR);
446 else
447 Seq.insert(L, VR);
448
449 unsigned S = Seq.size();
450 if (S > MaxSize)
451 Seq.resize(MaxSize);
452 assert(Seq.size() <= MaxSize);
453}
454
455void OrderedRegisterList::remove(unsigned VR) {
456 iterator L = llvm::lower_bound(Seq, VR, Ord);
457 if (L != Seq.end())
458 Seq.erase(L);
459}
460
461namespace {
462
463 // A record of the insert form. The fields correspond to the operands
464 // of the "insert" instruction:
465 // ... = insert(SrcR, InsR, #Wdh, #Off)
466 struct IFRecord {
467 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
468 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
469
470 unsigned SrcR, InsR;
471 uint16_t Wdh, Off;
472 };
473
474 struct PrintIFR {
475 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
476 : IFR(R), TRI(RI) {}
477
478 private:
479 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
480
481 const IFRecord &IFR;
482 const TargetRegisterInfo *TRI;
483 };
484
485 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
486 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
487 OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
488 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
489 return OS;
490 }
491
492 using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
493
494} // end anonymous namespace
495
496namespace llvm {
497
500
501} // end namespace llvm
502
503namespace {
504
505 class HexagonGenInsert : public MachineFunctionPass {
506 public:
507 static char ID;
508
509 HexagonGenInsert() : MachineFunctionPass(ID) {
511 }
512
513 StringRef getPassName() const override {
514 return "Hexagon generate \"insert\" instructions";
515 }
516
517 void getAnalysisUsage(AnalysisUsage &AU) const override {
521 }
522
523 bool runOnMachineFunction(MachineFunction &MF) override;
524
525 private:
526 using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
527
528 void buildOrderingMF(RegisterOrdering &RO) const;
529 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
530 bool isIntClass(const TargetRegisterClass *RC) const;
531 bool isConstant(unsigned VR) const;
532 bool isSmallConstant(unsigned VR) const;
533 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
534 uint16_t L, uint16_t S) const;
535 bool findSelfReference(unsigned VR) const;
536 bool findNonSelfReference(unsigned VR) const;
537 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
538 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
539 unsigned distance(const MachineBasicBlock *FromB,
540 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
541 PairMapType &M) const;
542 unsigned distance(MachineBasicBlock::const_iterator FromI,
543 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
544 PairMapType &M) const;
545 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
546 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
547 void findRemovableRegisters(unsigned VR, IFRecord IF,
548 RegisterSet &RMs) const;
549 void computeRemovableRegisters();
550
551 void pruneEmptyLists();
552 void pruneCoveredSets(unsigned VR);
553 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
554 void pruneRegCopies(unsigned VR);
555 void pruneCandidates();
556 void selectCandidates();
557 bool generateInserts();
558
559 bool removeDeadCode(MachineDomTreeNode *N);
560
561 // IFRecord coupled with a set of potentially removable registers:
562 using IFListType = std::vector<IFRecordWithRegSet>;
563 using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
564
565 void dump_map() const;
566
567 const HexagonInstrInfo *HII = nullptr;
568 const HexagonRegisterInfo *HRI = nullptr;
569
570 MachineFunction *MFN;
573 CellMapShadow *CMS;
574
575 RegisterOrdering BaseOrd;
576 RegisterOrdering CellOrd;
577 IFMapType IFMap;
578 };
579
580} // end anonymous namespace
581
582char HexagonGenInsert::ID = 0;
583
584void HexagonGenInsert::dump_map() const {
585 for (const auto &I : IFMap) {
586 dbgs() << " " << printReg(I.first, HRI) << ":\n";
587 const IFListType &LL = I.second;
588 for (const auto &J : LL)
589 dbgs() << " " << PrintIFR(J.first, HRI) << ", "
590 << PrintRegSet(J.second, HRI) << '\n';
591 }
592}
593
594void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
595 unsigned Index = 0;
596
597 for (const MachineBasicBlock &B : *MFN) {
598 if (!CMS->BT.reached(&B))
599 continue;
600
601 for (const MachineInstr &MI : B) {
602 for (const MachineOperand &MO : MI.operands()) {
603 if (MO.isReg() && MO.isDef()) {
604 Register R = MO.getReg();
605 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
606 if (R.isVirtual())
607 RO.insert(std::make_pair(R, Index++));
608 }
609 }
610 }
611 }
612 // Since some virtual registers may have had their def and uses eliminated,
613 // they are no longer referenced in the code, and so they will not appear
614 // in the map.
615}
616
617void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
618 RegisterOrdering &RO) const {
619 // Create a vector of all virtual registers (collect them from the base
620 // ordering RB), and then sort it using the RegisterCell comparator.
621 BitValueOrdering BVO(RB);
622 RegisterCellLexCompare LexCmp(BVO, *CMS);
623
624 using SortableVectorType = std::vector<unsigned>;
625
626 SortableVectorType VRs;
627 for (auto &I : RB)
628 VRs.push_back(I.first);
629 llvm::sort(VRs, LexCmp);
630 // Transfer the results to the outgoing register ordering.
631 for (unsigned i = 0, n = VRs.size(); i < n; ++i)
632 RO.insert(std::make_pair(VRs[i], i));
633}
634
635inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
636 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
637}
638
639bool HexagonGenInsert::isConstant(unsigned VR) const {
640 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
641 uint16_t W = RC.width();
642 for (uint16_t i = 0; i < W; ++i) {
643 const BitTracker::BitValue &BV = RC[i];
644 if (BV.is(0) || BV.is(1))
645 continue;
646 return false;
647 }
648 return true;
649}
650
651bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
652 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
653 uint16_t W = RC.width();
654 if (W > 64)
655 return false;
656 uint64_t V = 0, B = 1;
657 for (uint16_t i = 0; i < W; ++i) {
658 const BitTracker::BitValue &BV = RC[i];
659 if (BV.is(1))
660 V |= B;
661 else if (!BV.is(0))
662 return false;
663 B <<= 1;
664 }
665
666 // For 32-bit registers, consider: Rd = #s16.
667 if (W == 32)
668 return isInt<16>(V);
669
670 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
671 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
672}
673
674bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
675 unsigned InsR, uint16_t L, uint16_t S) const {
676 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
677 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
678 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
679 // Only integet (32-/64-bit) register classes.
680 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
681 return false;
682 // The "source" register must be of the same class as DstR.
683 if (DstRC != SrcRC)
684 return false;
685 if (DstRC == InsRC)
686 return true;
687 // A 64-bit register can only be generated from other 64-bit registers.
688 if (DstRC == &Hexagon::DoubleRegsRegClass)
689 return false;
690 // Otherwise, the L and S cannot span 32-bit word boundary.
691 if (S < 32 && S+L > 32)
692 return false;
693 return true;
694}
695
696bool HexagonGenInsert::findSelfReference(unsigned VR) const {
697 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
698 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
699 const BitTracker::BitValue &V = RC[i];
700 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
701 return true;
702 }
703 return false;
704}
705
706bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
707 BitTracker::RegisterCell RC = CMS->lookup(VR);
708 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
709 const BitTracker::BitValue &V = RC[i];
710 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
711 return true;
712 }
713 return false;
714}
715
716void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
717 RegisterSet &Defs) const {
718 for (const MachineOperand &MO : MI->operands()) {
719 if (!MO.isReg() || !MO.isDef())
720 continue;
721 Register R = MO.getReg();
722 if (!R.isVirtual())
723 continue;
724 Defs.insert(R);
725 }
726}
727
728void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
729 RegisterSet &Uses) const {
730 for (const MachineOperand &MO : MI->operands()) {
731 if (!MO.isReg() || !MO.isUse())
732 continue;
733 Register R = MO.getReg();
734 if (!R.isVirtual())
735 continue;
736 Uses.insert(R);
737 }
738}
739
740unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
741 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
742 PairMapType &M) const {
743 // Forward distance from the end of a block to the beginning of it does
744 // not make sense. This function should not be called with FromB == ToB.
745 assert(FromB != ToB);
746
747 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
748 // If we have already computed it, return the cached result.
749 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
750 if (F != M.end())
751 return F->second;
752 unsigned ToRPO = RPO.lookup(ToN);
753
754 unsigned MaxD = 0;
755
756 for (const MachineBasicBlock *PB : ToB->predecessors()) {
757 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
758 // along that path will be 0, and we don't need to do any calculations
759 // on it.
760 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
761 continue;
762 unsigned D = PB->size() + distance(FromB, PB, RPO, M);
763 if (D > MaxD)
764 MaxD = D;
765 }
766
767 // Memoize the result for later lookup.
768 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
769 return MaxD;
770}
771
772unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
773 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
774 PairMapType &M) const {
775 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
776 if (FB == TB)
777 return std::distance(FromI, ToI);
778 unsigned D1 = std::distance(TB->begin(), ToI);
779 unsigned D2 = distance(FB, TB, RPO, M);
780 unsigned D3 = std::distance(FromI, FB->end());
781 return D1+D2+D3;
782}
783
784bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
785 OrderedRegisterList &AVs) {
786 if (isDebug()) {
787 dbgs() << __func__ << ": " << printReg(VR, HRI)
788 << " AVs: " << PrintORL(AVs, HRI) << "\n";
789 }
790 if (AVs.size() == 0)
791 return false;
792
793 using iterator = OrderedRegisterList::iterator;
794
795 BitValueOrdering BVO(BaseOrd);
796 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
797 uint16_t W = RC.width();
798
799 using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
800 using RSListType = std::vector<RSRecord>;
801 // Have a map, with key being the matching prefix length, and the value
802 // being the list of pairs (R,S), where R's prefix matches VR at S.
803 // (DenseMap<uint16_t,RSListType> fails to instantiate.)
804 using LRSMapType = DenseMap<unsigned, RSListType>;
805 LRSMapType LM;
806
807 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
808 // and find matching prefixes from AVs with the rotated RC. Such a prefix
809 // would match a string of bits (of length L) in RC starting at S.
810 for (uint16_t S = 0; S < W; ++S) {
811 iterator B = AVs.begin(), E = AVs.end();
812 // The registers in AVs are ordered according to the lexical order of
813 // the corresponding register cells. This means that the range of regis-
814 // ters in AVs that match a prefix of length L+1 will be contained in
815 // the range that matches a prefix of length L. This means that we can
816 // keep narrowing the search space as the prefix length goes up. This
817 // helps reduce the overall complexity of the search.
818 uint16_t L;
819 for (L = 0; L < W-S; ++L) {
820 // Compare against VR's bits starting at S, which emulates rotation
821 // of VR by S.
822 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
823 iterator NewB = std::lower_bound(B, E, VR, RCB);
824 iterator NewE = std::upper_bound(NewB, E, VR, RCB);
825 // For the registers that are eliminated from the next range, L is
826 // the longest prefix matching VR at position S (their prefixes
827 // differ from VR at S+L). If L>0, record this information for later
828 // use.
829 if (L > 0) {
830 for (iterator I = B; I != NewB; ++I)
831 LM[L].push_back(std::make_pair(*I, S));
832 for (iterator I = NewE; I != E; ++I)
833 LM[L].push_back(std::make_pair(*I, S));
834 }
835 B = NewB, E = NewE;
836 if (B == E)
837 break;
838 }
839 // Record the final register range. If this range is non-empty, then
840 // L=W-S.
841 assert(B == E || L == W-S);
842 if (B != E) {
843 for (iterator I = B; I != E; ++I)
844 LM[L].push_back(std::make_pair(*I, S));
845 // If B!=E, then we found a range of registers whose prefixes cover the
846 // rest of VR from position S. There is no need to further advance S.
847 break;
848 }
849 }
850
851 if (isDebug()) {
852 dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
853 for (const auto &I : LM) {
854 dbgs() << " L=" << I.first << ':';
855 const RSListType &LL = I.second;
856 for (const auto &J : LL)
857 dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
858 dbgs() << '\n';
859 }
860 }
861
862 bool Recorded = false;
863
864 for (unsigned SrcR : AVs) {
865 int FDi = -1, LDi = -1; // First/last different bit.
866 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
867 uint16_t AW = AC.width();
868 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
869 if (RC[i] == AC[i])
870 continue;
871 if (FDi == -1)
872 FDi = i;
873 LDi = i;
874 }
875 if (FDi == -1)
876 continue; // TODO (future): Record identical registers.
877 // Look for a register whose prefix could patch the range [FD..LD]
878 // where VR and SrcR differ.
879 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
880 uint16_t MinL = LD-FD+1;
881 for (uint16_t L = MinL; L < W; ++L) {
882 LRSMapType::iterator F = LM.find(L);
883 if (F == LM.end())
884 continue;
885 RSListType &LL = F->second;
886 for (const auto &I : LL) {
887 uint16_t S = I.second;
888 // MinL is the minimum length of the prefix. Any length above MinL
889 // allows some flexibility as to where the prefix can start:
890 // given the extra length EL=L-MinL, the prefix must start between
891 // max(0,FD-EL) and FD.
892 if (S > FD) // Starts too late.
893 continue;
894 uint16_t EL = L-MinL;
895 uint16_t LowS = (EL < FD) ? FD-EL : 0;
896 if (S < LowS) // Starts too early.
897 continue;
898 unsigned InsR = I.first;
899 if (!isValidInsertForm(VR, SrcR, InsR, L, S))
900 continue;
901 if (isDebug()) {
902 dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
903 << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
904 << S << ")\n";
905 }
906 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
907 IFMap[VR].push_back(RR);
908 Recorded = true;
909 }
910 }
911 }
912
913 return Recorded;
914}
915
916void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
917 OrderedRegisterList &AVs) {
918 if (isDebug())
919 dbgs() << "visiting block " << printMBBReference(*B) << "\n";
920
921 // First, check if this block is reachable at all. If not, the bit tracker
922 // will not have any information about registers in it.
923 if (!CMS->BT.reached(B))
924 return;
925
926 bool DoConst = OptConst;
927 // Keep a separate set of registers defined in this block, so that we
928 // can remove them from the list of available registers once all DT
929 // successors have been processed.
930 RegisterSet BlockDefs, InsDefs;
931 for (MachineInstr &MI : *B) {
932 InsDefs.clear();
933 getInstrDefs(&MI, InsDefs);
934 // Leave those alone. They are more transparent than "insert".
935 bool Skip = MI.isCopy() || MI.isRegSequence();
936
937 if (!Skip) {
938 // Visit all defined registers, and attempt to find the corresponding
939 // "insert" representations.
940 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
941 // Do not collect registers that are known to be compile-time cons-
942 // tants, unless requested.
943 if (!DoConst && isConstant(VR))
944 continue;
945 // If VR's cell contains a reference to VR, then VR cannot be defined
946 // via "insert". If VR is a constant that can be generated in a single
947 // instruction (without constant extenders), generating it via insert
948 // makes no sense.
949 if (findSelfReference(VR) || isSmallConstant(VR))
950 continue;
951
952 findRecordInsertForms(VR, AVs);
953 // Stop if the map size is too large.
954 if (IFMap.size() > MaxIFMSize)
955 return;
956 }
957 }
958
959 // Insert the defined registers into the list of available registers
960 // after they have been processed.
961 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
962 AVs.insert(VR);
963 BlockDefs.insert(InsDefs);
964 }
965
966 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
967 MachineBasicBlock *SB = DTN->getBlock();
968 collectInBlock(SB, AVs);
969 }
970
971 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
972 AVs.remove(VR);
973}
974
975void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
976 RegisterSet &RMs) const {
977 // For a given register VR and a insert form, find the registers that are
978 // used by the current definition of VR, and which would no longer be
979 // needed for it after the definition of VR is replaced with the insert
980 // form. These are the registers that could potentially become dead.
981 RegisterSet Regs[2];
982
983 unsigned S = 0; // Register set selector.
984 Regs[S].insert(VR);
985
986 while (!Regs[S].empty()) {
987 // Breadth-first search.
988 unsigned OtherS = 1-S;
989 Regs[OtherS].clear();
990 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
991 Regs[S].remove(R);
992 if (R == IF.SrcR || R == IF.InsR)
993 continue;
994 // Check if a given register has bits that are references to any other
995 // registers. This is to detect situations where the instruction that
996 // defines register R takes register Q as an operand, but R itself does
997 // not contain any bits from Q. Loads are examples of how this could
998 // happen:
999 // R = load Q
1000 // In this case (assuming we do not have any knowledge about the loaded
1001 // value), we must not treat R as a "conveyance" of the bits from Q.
1002 // (The information in BT about R's bits would have them as constants,
1003 // in case of zero-extending loads, or refs to R.)
1004 if (!findNonSelfReference(R))
1005 continue;
1006 RMs.insert(R);
1007 const MachineInstr *DefI = MRI->getVRegDef(R);
1008 assert(DefI);
1009 // Do not iterate past PHI nodes to avoid infinite loops. This can
1010 // make the final set a bit less accurate, but the removable register
1011 // sets are an approximation anyway.
1012 if (DefI->isPHI())
1013 continue;
1014 getInstrUses(DefI, Regs[OtherS]);
1015 }
1016 S = OtherS;
1017 }
1018 // The register VR is added to the list as a side-effect of the algorithm,
1019 // but it is not "potentially removable". A potentially removable register
1020 // is one that may become unused (dead) after conversion to the insert form
1021 // IF, and obviously VR (or its replacement) will not become dead by apply-
1022 // ing IF.
1023 RMs.remove(VR);
1024}
1025
1026void HexagonGenInsert::computeRemovableRegisters() {
1027 for (auto &I : IFMap) {
1028 IFListType &LL = I.second;
1029 for (auto &J : LL)
1030 findRemovableRegisters(I.first, J.first, J.second);
1031 }
1032}
1033
1034void HexagonGenInsert::pruneEmptyLists() {
1035 // Remove all entries from the map, where the register has no insert forms
1036 // associated with it.
1037 using IterListType = SmallVector<IFMapType::iterator, 16>;
1038 IterListType Prune;
1039 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1040 if (I->second.empty())
1041 Prune.push_back(I);
1042 }
1043 for (unsigned i = 0, n = Prune.size(); i < n; ++i)
1044 IFMap.erase(Prune[i]);
1045}
1046
1047void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1048 IFMapType::iterator F = IFMap.find(VR);
1049 assert(F != IFMap.end());
1050 IFListType &LL = F->second;
1051
1052 // First, examine the IF candidates for register VR whose removable-regis-
1053 // ter sets are empty. This means that a given candidate will not help eli-
1054 // minate any registers, but since "insert" is not a constant-extendable
1055 // instruction, using such a candidate may reduce code size if the defini-
1056 // tion of VR is constant-extended.
1057 // If there exists a candidate with a non-empty set, the ones with empty
1058 // sets will not be used and can be removed.
1059 MachineInstr *DefVR = MRI->getVRegDef(VR);
1060 bool DefEx = HII->isConstExtended(*DefVR);
1061 bool HasNE = false;
1062 for (const auto &I : LL) {
1063 if (I.second.empty())
1064 continue;
1065 HasNE = true;
1066 break;
1067 }
1068 if (!DefEx || HasNE) {
1069 // The definition of VR is not constant-extended, or there is a candidate
1070 // with a non-empty set. Remove all candidates with empty sets.
1071 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1072 return IR.second.empty();
1073 };
1074 llvm::erase_if(LL, IsEmpty);
1075 } else {
1076 // The definition of VR is constant-extended, and all candidates have
1077 // empty removable-register sets. Pick the maximum candidate, and remove
1078 // all others. The "maximum" does not have any special meaning here, it
1079 // is only so that the candidate that will remain on the list is selec-
1080 // ted deterministically.
1081 IFRecord MaxIF = LL[0].first;
1082 for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1083 // If LL[MaxI] < LL[i], then MaxI = i.
1084 const IFRecord &IF = LL[i].first;
1085 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1086 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1087 if (M0 > R0)
1088 continue;
1089 if (M0 == R0) {
1090 if (M1 > R1)
1091 continue;
1092 if (M1 == R1) {
1093 if (MaxIF.Wdh > IF.Wdh)
1094 continue;
1095 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1096 continue;
1097 }
1098 }
1099 // MaxIF < IF.
1100 MaxIF = IF;
1101 }
1102 // Remove everything except the maximum candidate. All register sets
1103 // are empty, so no need to preserve anything.
1104 LL.clear();
1105 LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1106 }
1107
1108 // Now, remove those whose sets of potentially removable registers are
1109 // contained in another IF candidate for VR. For example, given these
1110 // candidates for %45,
1111 // %45:
1112 // (%44,%41,#9,#8), { %42 }
1113 // (%43,%41,#9,#8), { %42 %44 }
1114 // remove the first one, since it is contained in the second one.
1115 for (unsigned i = 0, n = LL.size(); i < n; ) {
1116 const RegisterSet &RMi = LL[i].second;
1117 unsigned j = 0;
1118 while (j < n) {
1119 if (j != i && LL[j].second.includes(RMi))
1120 break;
1121 j++;
1122 }
1123 if (j == n) { // RMi not contained in anything else.
1124 i++;
1125 continue;
1126 }
1127 LL.erase(LL.begin()+i);
1128 n = LL.size();
1129 }
1130}
1131
1132void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1133 PairMapType &M) {
1134 IFMapType::iterator F = IFMap.find(VR);
1135 assert(F != IFMap.end());
1136 IFListType &LL = F->second;
1137 unsigned Cutoff = VRegDistCutoff;
1138 const MachineInstr *DefV = MRI->getVRegDef(VR);
1139
1140 for (unsigned i = LL.size(); i > 0; --i) {
1141 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1142 const MachineInstr *DefS = MRI->getVRegDef(SR);
1143 const MachineInstr *DefI = MRI->getVRegDef(IR);
1144 unsigned DSV = distance(DefS, DefV, RPO, M);
1145 if (DSV < Cutoff) {
1146 unsigned DIV = distance(DefI, DefV, RPO, M);
1147 if (DIV < Cutoff)
1148 continue;
1149 }
1150 LL.erase(LL.begin()+(i-1));
1151 }
1152}
1153
1154void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1155 IFMapType::iterator F = IFMap.find(VR);
1156 assert(F != IFMap.end());
1157 IFListType &LL = F->second;
1158
1159 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1160 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1161 };
1162 llvm::erase_if(LL, IsCopy);
1163}
1164
1165void HexagonGenInsert::pruneCandidates() {
1166 // Remove candidates that are not beneficial, regardless of the final
1167 // selection method.
1168 // First, remove candidates whose potentially removable set is a subset
1169 // of another candidate's set.
1170 for (const auto &I : IFMap)
1171 pruneCoveredSets(I.first);
1172
1173 UnsignedMap RPO;
1174
1176
1177 RPOTType RPOT(MFN);
1178 unsigned RPON = 0;
1179 for (const auto &I : RPOT)
1180 RPO[I->getNumber()] = RPON++;
1181
1182 PairMapType Memo; // Memoization map for distance calculation.
1183 // Remove candidates that would use registers defined too far away.
1184 for (const auto &I : IFMap)
1185 pruneUsesTooFar(I.first, RPO, Memo);
1186
1187 pruneEmptyLists();
1188
1189 for (const auto &I : IFMap)
1190 pruneRegCopies(I.first);
1191}
1192
1193namespace {
1194
1195 // Class for comparing IF candidates for registers that have multiple of
1196 // them. The smaller the candidate, according to this ordering, the better.
1197 // First, compare the number of zeros in the associated potentially remova-
1198 // ble register sets. "Zero" indicates that the register is very likely to
1199 // become dead after this transformation.
1200 // Second, compare "averages", i.e. use-count per size. The lower wins.
1201 // After that, it does not really matter which one is smaller. Resolve
1202 // the tie in some deterministic way.
1203 struct IFOrdering {
1204 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1205 : UseC(UC), BaseOrd(BO) {}
1206
1207 bool operator() (const IFRecordWithRegSet &A,
1208 const IFRecordWithRegSet &B) const;
1209
1210 private:
1211 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1212 unsigned &Sum) const;
1213
1214 const UnsignedMap &UseC;
1215 const RegisterOrdering &BaseOrd;
1216 };
1217
1218} // end anonymous namespace
1219
1220bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1221 const IFRecordWithRegSet &B) const {
1222 unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1223 unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1224 stats(A.second, SizeA, ZeroA, SumA);
1225 stats(B.second, SizeB, ZeroB, SumB);
1226
1227 // We will pick the minimum element. The more zeros, the better.
1228 if (ZeroA != ZeroB)
1229 return ZeroA > ZeroB;
1230 // Compare SumA/SizeA with SumB/SizeB, lower is better.
1231 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1232 if (AvgA != AvgB)
1233 return AvgA < AvgB;
1234
1235 // The sets compare identical so far. Resort to comparing the IF records.
1236 // The actual values don't matter, this is only for determinism.
1237 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1238 if (OSA != OSB)
1239 return OSA < OSB;
1240 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1241 if (OIA != OIB)
1242 return OIA < OIB;
1243 if (A.first.Wdh != B.first.Wdh)
1244 return A.first.Wdh < B.first.Wdh;
1245 return A.first.Off < B.first.Off;
1246}
1247
1248void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1249 unsigned &Sum) const {
1250 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1251 UnsignedMap::const_iterator F = UseC.find(R);
1252 assert(F != UseC.end());
1253 unsigned UC = F->second;
1254 if (UC == 0)
1255 Zero++;
1256 Sum += UC;
1257 Size++;
1258 }
1259}
1260
1261void HexagonGenInsert::selectCandidates() {
1262 // Some registers may have multiple valid candidates. Pick the best one
1263 // (or decide not to use any).
1264
1265 // Compute the "removability" measure of R:
1266 // For each potentially removable register R, record the number of regis-
1267 // ters with IF candidates, where R appears in at least one set.
1268 RegisterSet AllRMs;
1269 UnsignedMap UseC, RemC;
1270 IFMapType::iterator End = IFMap.end();
1271
1272 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1273 const IFListType &LL = I->second;
1275 for (const auto &J : LL)
1276 TT.insert(J.second);
1277 for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1278 RemC[R]++;
1279 AllRMs.insert(TT);
1280 }
1281
1282 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1283 using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1284 using InstrSet = SmallSet<const MachineInstr *, 16>;
1285
1286 InstrSet UIs;
1287 // Count as the number of instructions in which R is used, not the
1288 // number of operands.
1289 use_iterator E = MRI->use_nodbg_end();
1290 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1291 UIs.insert(I->getParent());
1292 unsigned C = UIs.size();
1293 // Calculate a measure, which is the number of instructions using R,
1294 // minus the "removability" count computed earlier.
1295 unsigned D = RemC[R];
1296 UseC[R] = (C > D) ? C-D : 0; // doz
1297 }
1298
1299 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1300 if (!SelectAll0 && !SelectHas0)
1301 SelectAll0 = true;
1302
1303 // The smaller the number UseC for a given register R, the "less used"
1304 // R is aside from the opportunities for removal offered by generating
1305 // "insert" instructions.
1306 // Iterate over the IF map, and for those registers that have multiple
1307 // candidates, pick the minimum one according to IFOrdering.
1308 IFOrdering IFO(UseC, BaseOrd);
1309 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1310 IFListType &LL = I->second;
1311 if (LL.empty())
1312 continue;
1313 // Get the minimum element, remember it and clear the list. If the
1314 // element found is adequate, we will put it back on the list, other-
1315 // wise the list will remain empty, and the entry for this register
1316 // will be removed (i.e. this register will not be replaced by insert).
1317 IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
1318 assert(MinI != LL.end());
1319 IFRecordWithRegSet M = *MinI;
1320 LL.clear();
1321
1322 // We want to make sure that this replacement will have a chance to be
1323 // beneficial, and that means that we want to have indication that some
1324 // register will be removed. The most likely registers to be eliminated
1325 // are the use operands in the definition of I->first. Accept/reject a
1326 // candidate based on how many of its uses it can potentially eliminate.
1327
1328 RegisterSet Us;
1329 const MachineInstr *DefI = MRI->getVRegDef(I->first);
1330 getInstrUses(DefI, Us);
1331 bool Accept = false;
1332
1333 if (SelectAll0) {
1334 bool All0 = true;
1335 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1336 if (UseC[R] == 0)
1337 continue;
1338 All0 = false;
1339 break;
1340 }
1341 Accept = All0;
1342 } else if (SelectHas0) {
1343 bool Has0 = false;
1344 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1345 if (UseC[R] != 0)
1346 continue;
1347 Has0 = true;
1348 break;
1349 }
1350 Accept = Has0;
1351 }
1352 if (Accept)
1353 LL.push_back(M);
1354 }
1355
1356 // Remove candidates that add uses of removable registers, unless the
1357 // removable registers are among replacement candidates.
1358 // Recompute the removable registers, since some candidates may have
1359 // been eliminated.
1360 AllRMs.clear();
1361 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1362 const IFListType &LL = I->second;
1363 if (!LL.empty())
1364 AllRMs.insert(LL[0].second);
1365 }
1366 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1367 IFListType &LL = I->second;
1368 if (LL.empty())
1369 continue;
1370 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1371 if (AllRMs[SR] || AllRMs[IR])
1372 LL.clear();
1373 }
1374
1375 pruneEmptyLists();
1376}
1377
1378bool HexagonGenInsert::generateInserts() {
1379 // Create a new register for each one from IFMap, and store them in the
1380 // map.
1381 UnsignedMap RegMap;
1382 for (auto &I : IFMap) {
1383 unsigned VR = I.first;
1384 const TargetRegisterClass *RC = MRI->getRegClass(VR);
1385 Register NewVR = MRI->createVirtualRegister(RC);
1386 RegMap[VR] = NewVR;
1387 }
1388
1389 // We can generate the "insert" instructions using potentially stale re-
1390 // gisters: SrcR and InsR for a given VR may be among other registers that
1391 // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1392 for (auto &I : IFMap) {
1393 MachineInstr *MI = MRI->getVRegDef(I.first);
1394 MachineBasicBlock &B = *MI->getParent();
1395 DebugLoc DL = MI->getDebugLoc();
1396 unsigned NewR = RegMap[I.first];
1397 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1398 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1399 : HII->get(Hexagon::S2_insertp);
1400 IFRecord IF = I.second[0].first;
1401 unsigned Wdh = IF.Wdh, Off = IF.Off;
1402 unsigned InsS = 0;
1403 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1404 InsS = Hexagon::isub_lo;
1405 if (Off >= 32) {
1406 InsS = Hexagon::isub_hi;
1407 Off -= 32;
1408 }
1409 }
1410 // Advance to the proper location for inserting instructions. This could
1411 // be B.end().
1413 if (MI->isPHI())
1414 At = B.getFirstNonPHI();
1415
1416 BuildMI(B, At, DL, D, NewR)
1417 .addReg(IF.SrcR)
1418 .addReg(IF.InsR, 0, InsS)
1419 .addImm(Wdh)
1420 .addImm(Off);
1421
1422 MRI->clearKillFlags(IF.SrcR);
1423 MRI->clearKillFlags(IF.InsR);
1424 }
1425
1426 for (const auto &I : IFMap) {
1427 MachineInstr *DefI = MRI->getVRegDef(I.first);
1428 MRI->replaceRegWith(I.first, RegMap[I.first]);
1429 DefI->eraseFromParent();
1430 }
1431
1432 return true;
1433}
1434
1435bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1436 bool Changed = false;
1437
1438 for (auto *DTN : children<MachineDomTreeNode*>(N))
1439 Changed |= removeDeadCode(DTN);
1440
1441 MachineBasicBlock *B = N->getBlock();
1442 std::vector<MachineInstr*> Instrs;
1443 for (MachineInstr &MI : llvm::reverse(*B))
1444 Instrs.push_back(&MI);
1445
1446 for (MachineInstr *MI : Instrs) {
1447 unsigned Opc = MI->getOpcode();
1448 // Do not touch lifetime markers. This is why the target-independent DCE
1449 // cannot be used.
1450 if (Opc == TargetOpcode::LIFETIME_START ||
1451 Opc == TargetOpcode::LIFETIME_END)
1452 continue;
1453 bool Store = false;
1454 if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
1455 continue;
1456
1457 bool AllDead = true;
1459 for (const MachineOperand &MO : MI->operands()) {
1460 if (!MO.isReg() || !MO.isDef())
1461 continue;
1462 Register R = MO.getReg();
1463 if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1464 AllDead = false;
1465 break;
1466 }
1467 Regs.push_back(R);
1468 }
1469 if (!AllDead)
1470 continue;
1471
1472 B->erase(MI);
1473 for (unsigned I = 0, N = Regs.size(); I != N; ++I)
1474 MRI->markUsesInDebugValueAsUndef(Regs[I]);
1475 Changed = true;
1476 }
1477
1478 return Changed;
1479}
1480
1481bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1482 if (skipFunction(MF.getFunction()))
1483 return false;
1484
1485 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1486 bool Changed = false;
1487
1488 // Verify: one, but not both.
1490
1491 IFMap.clear();
1492 BaseOrd.clear();
1493 CellOrd.clear();
1494
1495 const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1496 HII = ST.getInstrInfo();
1497 HRI = ST.getRegisterInfo();
1498 MFN = &MF;
1499 MRI = &MF.getRegInfo();
1500 MDT = &getAnalysis<MachineDominatorTree>();
1501
1502 // Clean up before any further processing, so that dead code does not
1503 // get used in a newly generated "insert" instruction. Have a custom
1504 // version of DCE that preserves lifetime markers. Without it, merging
1505 // of stack objects can fail to recognize and merge disjoint objects
1506 // leading to unnecessary stack growth.
1507 Changed = removeDeadCode(MDT->getRootNode());
1508
1509 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1510 BitTracker BTLoc(HE, MF);
1511 BTLoc.trace(isDebug());
1512 BTLoc.run();
1513 CellMapShadow MS(BTLoc);
1514 CMS = &MS;
1515
1516 buildOrderingMF(BaseOrd);
1517 buildOrderingBT(BaseOrd, CellOrd);
1518
1519 if (isDebug()) {
1520 dbgs() << "Cell ordering:\n";
1521 for (const auto &I : CellOrd) {
1522 unsigned VR = I.first, Pos = I.second;
1523 dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1524 }
1525 }
1526
1527 // Collect candidates for conversion into the insert forms.
1528 MachineBasicBlock *RootB = MDT->getRoot();
1529 OrderedRegisterList AvailR(CellOrd);
1530
1531 const char *const TGName = "hexinsert";
1532 const char *const TGDesc = "Generate Insert Instructions";
1533
1534 {
1535 NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1536 TimingDetail);
1537 collectInBlock(RootB, AvailR);
1538 // Complete the information gathered in IFMap.
1539 computeRemovableRegisters();
1540 }
1541
1542 if (isDebug()) {
1543 dbgs() << "Candidates after collection:\n";
1544 dump_map();
1545 }
1546
1547 if (IFMap.empty())
1548 return Changed;
1549
1550 {
1551 NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1552 pruneCandidates();
1553 }
1554
1555 if (isDebug()) {
1556 dbgs() << "Candidates after pruning:\n";
1557 dump_map();
1558 }
1559
1560 if (IFMap.empty())
1561 return Changed;
1562
1563 {
1564 NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1565 selectCandidates();
1566 }
1567
1568 if (isDebug()) {
1569 dbgs() << "Candidates after selection:\n";
1570 dump_map();
1571 }
1572
1573 // Filter out vregs beyond the cutoff.
1574 if (VRegIndexCutoff.getPosition()) {
1575 unsigned Cutoff = VRegIndexCutoff;
1576
1577 using IterListType = SmallVector<IFMapType::iterator, 16>;
1578
1579 IterListType Out;
1580 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1581 unsigned Idx = Register::virtReg2Index(I->first);
1582 if (Idx >= Cutoff)
1583 Out.push_back(I);
1584 }
1585 for (unsigned i = 0, n = Out.size(); i < n; ++i)
1586 IFMap.erase(Out[i]);
1587 }
1588 if (IFMap.empty())
1589 return Changed;
1590
1591 {
1592 NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1593 TimingDetail);
1594 generateInserts();
1595 }
1596
1597 return true;
1598}
1599
1601 return new HexagonGenInsert();
1602}
1603
1604//===----------------------------------------------------------------------===//
1605// Public Constructor Functions
1606//===----------------------------------------------------------------------===//
1607
1608INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1609 "Hexagon generate \"insert\" instructions", false, false)
1611INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1612 "Hexagon generate \"insert\" instructions", false, false)
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isConstant(const MachineInstr &MI)
SmallPtrSet< MachineInstr *, 2 > Uses
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Size
rpo function Deduce function attributes in RPO
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden)
static bool isDebug()
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::desc("Maximum size of OrderedRegisterList"))
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::desc("Maximum size of IFMap"))
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::Hidden, cl::desc("Enable detailed timing of insert " "generation"))
static cl::opt< bool > OptTiming("insert-timing", cl::Hidden, cl::desc("Enable timing of insert generation"))
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::desc("Vreg distance cutoff for insert " "generation."))
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden)
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::desc("Vreg# cutoff for insert generation."))
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden)
#define DEBUG_TYPE
Hexagon generate insert instructions
IRTranslator LLVM IR MI
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:109
Statically lint checks LLVM IR
Definition: Lint.cpp:746
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
return ToRemove size() > 0
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition: BitVector.h:454
BitVector & reset()
Definition: BitVector.h:385
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:293
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:482
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:328
BitVector & set()
Definition: BitVector.h:344
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:163
BitVector & operator|=(const BitVector &RHS)
Definition: BitVector.h:558
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:301
reference operator[](unsigned Idx)
Definition: BitVector.h:437
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
std::set< RegisterRef > RegisterSet
Definition: RDFGraph.h:413
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1755
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned M1(unsigned Val)
Definition: VE.h:468
bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition: Debug.cpp:45
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1683
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:160
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createHexagonGenInsert()
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:165
bool isCurrentDebugType(const char *Type)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition: Debug.cpp:52
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1923
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
unsigned M0(unsigned Val)
Definition: VE.h:467
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1998
void initializeHexagonGenInsertPass(PassRegistry &)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
bool is(unsigned T) const
Definition: BitTracker.h:209
const RegisterCell & lookup(unsigned Reg) const
Definition: BitTracker.h:357
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:165