LLVM 22.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1//===- HexagonGenInsert.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BitTracker.h"
10#include "Hexagon.h"
11#include "HexagonBitTracker.h"
12#include "HexagonInstrInfo.h"
13#include "HexagonRegisterInfo.h"
14#include "HexagonSubtarget.h"
15#include "llvm/ADT/BitVector.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringRef.h"
31#include "llvm/IR/DebugLoc.h"
33#include "llvm/Pass.h"
35#include "llvm/Support/Debug.h"
37#include "llvm/Support/Timer.h"
39#include <algorithm>
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>
44#include <vector>
45
46#define DEBUG_TYPE "hexinsert"
47
48using namespace llvm;
49
51 VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52 cl::desc("Vreg# cutoff for insert generation."));
53// The distance cutoff is selected based on the precheckin-perf results:
54// cutoffs 20, 25, 35, and 40 are worse than 30.
56 VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57 cl::desc("Vreg distance cutoff for insert "
58 "generation."));
59
60// Limit the container sizes for extreme cases where we run out of memory.
62 MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63 cl::desc("Maximum size of OrderedRegisterList"));
64static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
66 cl::desc("Maximum size of IFMap"));
67
68static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69 cl::desc("Enable timing of insert generation"));
70static cl::opt<bool>
71 OptTimingDetail("insert-timing-detail", cl::Hidden,
72 cl::desc("Enable detailed timing of insert "
73 "generation"));
74
75static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77// Whether to construct constant values via "insert". Could eliminate constant
78// extenders, but often not practical.
79static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80
81// The preprocessor gets confused when the DEBUG macro is passed larger
82// chunks of code. Use this function to detect debugging.
83inline static bool isDebug() {
84#ifndef NDEBUG
86#else
87 return false;
88#endif
89}
90
91namespace {
92
93 // Set of virtual registers, based on BitVector.
94 struct RegisterSet : private BitVector {
95 RegisterSet() = default;
96 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97 RegisterSet(const RegisterSet &RS) = default;
98 RegisterSet &operator=(const RegisterSet &RS) = default;
99
100 using BitVector::clear;
101
102 unsigned find_first() const {
104 if (First < 0)
105 return 0;
106 return x2v(First);
107 }
108
109 unsigned find_next(unsigned Prev) const {
110 int Next = BitVector::find_next(v2x(Prev));
111 if (Next < 0)
112 return 0;
113 return x2v(Next);
114 }
115
116 RegisterSet &insert(unsigned R) {
117 unsigned Idx = v2x(R);
118 ensure(Idx);
119 return static_cast<RegisterSet&>(BitVector::set(Idx));
120 }
121 RegisterSet &remove(unsigned R) {
122 unsigned Idx = v2x(R);
123 if (Idx >= size())
124 return *this;
125 return static_cast<RegisterSet&>(BitVector::reset(Idx));
126 }
127
128 RegisterSet &insert(const RegisterSet &Rs) {
129 return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130 }
131 RegisterSet &remove(const RegisterSet &Rs) {
132 return static_cast<RegisterSet&>(BitVector::reset(Rs));
133 }
134
135 reference operator[](unsigned R) {
136 unsigned Idx = v2x(R);
137 ensure(Idx);
138 return BitVector::operator[](Idx);
139 }
140 bool operator[](unsigned R) const {
141 unsigned Idx = v2x(R);
142 assert(Idx < size());
143 return BitVector::operator[](Idx);
144 }
145 bool has(unsigned R) const {
146 unsigned Idx = v2x(R);
147 if (Idx >= size())
148 return false;
149 return BitVector::test(Idx);
150 }
151
152 bool empty() const {
153 return !BitVector::any();
154 }
155 bool includes(const RegisterSet &Rs) const {
156 // A.BitVector::test(B) <=> A-B != {}
157 return !Rs.BitVector::test(*this);
158 }
159 bool intersects(const RegisterSet &Rs) const {
160 return BitVector::anyCommon(Rs);
161 }
162
163 private:
164 void ensure(unsigned Idx) {
165 if (size() <= Idx)
166 resize(std::max(Idx+1, 32U));
167 }
168
169 static inline unsigned v2x(unsigned v) {
170 return Register(v).virtRegIndex();
171 }
172
173 static inline unsigned x2v(unsigned x) {
174 return Register::index2VirtReg(x);
175 }
176 };
177
178 struct PrintRegSet {
179 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
180 : RS(S), TRI(RI) {}
181
182 friend raw_ostream &operator<< (raw_ostream &OS,
183 const PrintRegSet &P);
184
185 private:
186 const RegisterSet &RS;
187 const TargetRegisterInfo *TRI;
188 };
189
190 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
191 OS << '{';
192 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
193 OS << ' ' << printReg(R, P.TRI);
194 OS << " }";
195 return OS;
196 }
197
198 // A convenience class to associate unsigned numbers (such as virtual
199 // registers) with unsigned numbers.
200 struct UnsignedMap : public DenseMap<unsigned,unsigned> {
201 UnsignedMap() = default;
202
203 private:
204 using BaseType = DenseMap<unsigned, unsigned>;
205 };
206
207 // A utility to establish an ordering between virtual registers:
208 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
209 // This is meant as a cache for the ordering of virtual registers defined
210 // by a potentially expensive comparison function, or obtained by a proce-
211 // dure that should not be repeated each time two registers are compared.
212 struct RegisterOrdering : public UnsignedMap {
213 RegisterOrdering() = default;
214
215 unsigned operator[](unsigned VR) const {
216 const_iterator F = find(VR);
217 assert(F != end());
218 return F->second;
219 }
220
221 // Add operator(), so that objects of this class can be used as
222 // comparators in std::sort et al.
223 bool operator() (unsigned VR1, unsigned VR2) const {
224 return operator[](VR1) < operator[](VR2);
225 }
226 };
227
228 // Ordering of bit values. This class does not have operator[], but
229 // is supplies a comparison operator() for use in std:: algorithms.
230 // The order is as follows:
231 // - 0 < 1 < ref
232 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
233 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
234 struct BitValueOrdering {
235 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
236
237 bool operator() (const BitTracker::BitValue &V1,
238 const BitTracker::BitValue &V2) const;
239
240 const RegisterOrdering &BaseOrd;
241 };
242
243} // end anonymous namespace
244
245bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
246 const BitTracker::BitValue &V2) const {
247 if (V1 == V2)
248 return false;
249 // V1==0 => true, V2==0 => false
250 if (V1.is(0) || V2.is(0))
251 return V1.is(0);
252 // Neither of V1,V2 is 0, and V1!=V2.
253 // V2==1 => false, V1==1 => true
254 if (V2.is(1) || V1.is(1))
255 return !V2.is(1);
256 // Both V1,V2 are refs.
257 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
258 if (Ind1 != Ind2)
259 return Ind1 < Ind2;
260 // If V1.Pos==V2.Pos
261 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
262 return V1.RefI.Pos < V2.RefI.Pos;
263}
264
265namespace {
266
267 // Cache for the BitTracker's cell map. Map lookup has a logarithmic
268 // complexity, this class will memoize the lookup results to reduce
269 // the access time for repeated lookups of the same cell.
270 struct CellMapShadow {
271 CellMapShadow(const BitTracker &T) : BT(T) {}
272
273 const BitTracker::RegisterCell &lookup(unsigned VR) {
274 unsigned RInd = Register(VR).virtRegIndex();
275 // Grow the vector to at least 32 elements.
276 if (RInd >= CVect.size())
277 CVect.resize(std::max(RInd+16, 32U), nullptr);
278 const BitTracker::RegisterCell *CP = CVect[RInd];
279 if (CP == nullptr)
280 CP = CVect[RInd] = &BT.lookup(VR);
281 return *CP;
282 }
283
284 const BitTracker &BT;
285
286 private:
287 using CellVectType = std::vector<const BitTracker::RegisterCell *>;
288
289 CellVectType CVect;
290 };
291
292 // Comparator class for lexicographic ordering of virtual registers
293 // according to the corresponding BitTracker::RegisterCell objects.
294 struct RegisterCellLexCompare {
295 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
296 : BitOrd(BO), CM(M) {}
297
298 bool operator() (unsigned VR1, unsigned VR2) const;
299
300 private:
301 const BitValueOrdering &BitOrd;
302 CellMapShadow &CM;
303 };
304
305 // Comparator class for lexicographic ordering of virtual registers
306 // according to the specified bits of the corresponding BitTracker::
307 // RegisterCell objects.
308 // Specifically, this class will be used to compare bit B of a register
309 // cell for a selected virtual register R with bit N of any register
310 // other than R.
311 struct RegisterCellBitCompareSel {
312 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
313 const BitValueOrdering &BO, CellMapShadow &M)
314 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
315
316 bool operator() (unsigned VR1, unsigned VR2) const;
317
318 private:
319 const unsigned SelR, SelB;
320 const unsigned BitN;
321 const BitValueOrdering &BitOrd;
322 CellMapShadow &CM;
323 };
324
325} // end anonymous namespace
326
327bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
328 // Ordering of registers, made up from two given orderings:
329 // - the ordering of the register numbers, and
330 // - the ordering of register cells.
331 // Def. R1 < R2 if:
332 // - cell(R1) < cell(R2), or
333 // - cell(R1) == cell(R2), and index(R1) < index(R2).
334 //
335 // For register cells, the ordering is lexicographic, with index 0 being
336 // the most significant.
337 if (VR1 == VR2)
338 return false;
339
340 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
341 uint16_t W1 = RC1.width(), W2 = RC2.width();
342 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
343 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
344 if (V1 != V2)
345 return BitOrd(V1, V2);
346 }
347 // Cells are equal up until the common length.
348 if (W1 != W2)
349 return W1 < W2;
350
351 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
352}
353
354bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
355 if (VR1 == VR2)
356 return false;
357 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
358 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
359 uint16_t W1 = RC1.width(), W2 = RC2.width();
360 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
361 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
362 // If Bit1 exceeds the width of VR1, then:
363 // - return false, if at the same time Bit2 exceeds VR2, or
364 // - return true, otherwise.
365 // (I.e. "a bit value that does not exist is less than any bit value
366 // that does exist".)
367 if (W1 <= Bit1)
368 return Bit2 < W2;
369 // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
370 if (W2 <= Bit2)
371 return false;
372
373 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
374 if (V1 != V2)
375 return BitOrd(V1, V2);
376 return false;
377}
378
379namespace {
380
381 class OrderedRegisterList {
382 using ListType = std::vector<unsigned>;
383 const unsigned MaxSize;
384
385 public:
386 OrderedRegisterList(const RegisterOrdering &RO)
387 : MaxSize(MaxORLSize), Ord(RO) {}
388
389 void insert(unsigned VR);
390 void remove(unsigned VR);
391
392 unsigned operator[](unsigned Idx) const {
393 assert(Idx < Seq.size());
394 return Seq[Idx];
395 }
396
397 unsigned size() const {
398 return Seq.size();
399 }
400
401 using iterator = ListType::iterator;
402 using const_iterator = ListType::const_iterator;
403
404 iterator begin() { return Seq.begin(); }
405 iterator end() { return Seq.end(); }
406 const_iterator begin() const { return Seq.begin(); }
407 const_iterator end() const { return Seq.end(); }
408
409 // Convenience function to convert an iterator to the corresponding index.
410 unsigned idx(iterator It) const { return It-begin(); }
411
412 private:
413 ListType Seq;
414 const RegisterOrdering &Ord;
415 };
416
417 struct PrintORL {
418 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
419 : RL(L), TRI(RI) {}
420
421 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
422
423 private:
424 const OrderedRegisterList &RL;
425 const TargetRegisterInfo *TRI;
426 };
427
428 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
429 OS << '(';
430 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
431 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
432 if (I != B)
433 OS << ", ";
434 OS << printReg(*I, P.TRI);
435 }
436 OS << ')';
437 return OS;
438 }
439
440} // end anonymous namespace
441
442void OrderedRegisterList::insert(unsigned VR) {
443 iterator L = llvm::lower_bound(Seq, VR, Ord);
444 if (L == Seq.end())
445 Seq.push_back(VR);
446 else
447 Seq.insert(L, VR);
448
449 unsigned S = Seq.size();
450 if (S > MaxSize)
451 Seq.resize(MaxSize);
452 assert(Seq.size() <= MaxSize);
453}
454
455void OrderedRegisterList::remove(unsigned VR) {
456 iterator L = llvm::lower_bound(Seq, VR, Ord);
457 if (L != Seq.end())
458 Seq.erase(L);
459}
460
461namespace {
462
463 // A record of the insert form. The fields correspond to the operands
464 // of the "insert" instruction:
465 // ... = insert(SrcR, InsR, #Wdh, #Off)
466 struct IFRecord {
467 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
468 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
469
470 unsigned SrcR, InsR;
471 uint16_t Wdh, Off;
472 };
473
474 struct PrintIFR {
475 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
476 : IFR(R), TRI(RI) {}
477
478 private:
479 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
480
481 const IFRecord &IFR;
482 const TargetRegisterInfo *TRI;
483 };
484
485 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
486 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
487 OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
488 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
489 return OS;
490 }
491
492 using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
493
494} // end anonymous namespace
495
496namespace {
497
498 class HexagonGenInsert : public MachineFunctionPass {
499 public:
500 static char ID;
501
502 HexagonGenInsert() : MachineFunctionPass(ID) {}
503
504 StringRef getPassName() const override {
505 return "Hexagon generate \"insert\" instructions";
506 }
507
508 void getAnalysisUsage(AnalysisUsage &AU) const override {
509 AU.addRequired<MachineDominatorTreeWrapperPass>();
510 AU.addPreserved<MachineDominatorTreeWrapperPass>();
512 }
513
514 bool runOnMachineFunction(MachineFunction &MF) override;
515
516 private:
517 using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
518
519 void buildOrderingMF(RegisterOrdering &RO) const;
520 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
521 bool isIntClass(const TargetRegisterClass *RC) const;
522 bool isConstant(unsigned VR) const;
523 bool isSmallConstant(unsigned VR) const;
524 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
525 uint16_t L, uint16_t S) const;
526 bool findSelfReference(unsigned VR) const;
527 bool findNonSelfReference(unsigned VR) const;
528 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
529 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
530 unsigned distance(const MachineBasicBlock *FromB,
531 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
532 PairMapType &M) const;
533 unsigned distance(MachineBasicBlock::const_iterator FromI,
534 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
535 PairMapType &M) const;
536 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
537 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
538 void findRemovableRegisters(unsigned VR, IFRecord IF,
539 RegisterSet &RMs) const;
540 void computeRemovableRegisters();
541
542 void pruneEmptyLists();
543 void pruneCoveredSets(unsigned VR);
544 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
545 void pruneRegCopies(unsigned VR);
546 void pruneCandidates();
547 void selectCandidates();
548 bool generateInserts();
549
550 bool removeDeadCode(MachineDomTreeNode *N);
551
552 // IFRecord coupled with a set of potentially removable registers:
553 using IFListType = std::vector<IFRecordWithRegSet>;
554 using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
555
556 void dump_map() const;
557
558 const HexagonInstrInfo *HII = nullptr;
559 const HexagonRegisterInfo *HRI = nullptr;
560
561 MachineFunction *MFN;
562 MachineRegisterInfo *MRI;
563 MachineDominatorTree *MDT;
564 CellMapShadow *CMS;
565
566 RegisterOrdering BaseOrd;
567 RegisterOrdering CellOrd;
568 IFMapType IFMap;
569 };
570
571} // end anonymous namespace
572
573char HexagonGenInsert::ID = 0;
574
575void HexagonGenInsert::dump_map() const {
576 for (const auto &I : IFMap) {
577 dbgs() << " " << printReg(I.first, HRI) << ":\n";
578 const IFListType &LL = I.second;
579 for (const auto &J : LL)
580 dbgs() << " " << PrintIFR(J.first, HRI) << ", "
581 << PrintRegSet(J.second, HRI) << '\n';
582 }
583}
584
585void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
586 unsigned Index = 0;
587
588 for (const MachineBasicBlock &B : *MFN) {
589 if (!CMS->BT.reached(&B))
590 continue;
591
592 for (const MachineInstr &MI : B) {
593 for (const MachineOperand &MO : MI.operands()) {
594 if (MO.isReg() && MO.isDef()) {
595 Register R = MO.getReg();
596 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
597 if (R.isVirtual())
598 RO.insert(std::make_pair(R, Index++));
599 }
600 }
601 }
602 }
603 // Since some virtual registers may have had their def and uses eliminated,
604 // they are no longer referenced in the code, and so they will not appear
605 // in the map.
606}
607
608void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
609 RegisterOrdering &RO) const {
610 // Create a vector of all virtual registers (collect them from the base
611 // ordering RB), and then sort it using the RegisterCell comparator.
612 BitValueOrdering BVO(RB);
613 RegisterCellLexCompare LexCmp(BVO, *CMS);
614
615 using SortableVectorType = std::vector<unsigned>;
616
617 SortableVectorType VRs;
618 for (auto &I : RB)
619 VRs.push_back(I.first);
620 llvm::sort(VRs, LexCmp);
621 // Transfer the results to the outgoing register ordering.
622 for (unsigned i = 0, n = VRs.size(); i < n; ++i)
623 RO.insert(std::make_pair(VRs[i], i));
624}
625
626inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
627 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
628}
629
630bool HexagonGenInsert::isConstant(unsigned VR) const {
631 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
632 uint16_t W = RC.width();
633 for (uint16_t i = 0; i < W; ++i) {
634 const BitTracker::BitValue &BV = RC[i];
635 if (BV.is(0) || BV.is(1))
636 continue;
637 return false;
638 }
639 return true;
640}
641
642bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
643 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
644 uint16_t W = RC.width();
645 if (W > 64)
646 return false;
647 uint64_t V = 0, B = 1;
648 for (uint16_t i = 0; i < W; ++i) {
649 const BitTracker::BitValue &BV = RC[i];
650 if (BV.is(1))
651 V |= B;
652 else if (!BV.is(0))
653 return false;
654 B <<= 1;
655 }
656
657 // For 32-bit registers, consider: Rd = #s16.
658 if (W == 32)
659 return isInt<16>(V);
660
661 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
662 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
663}
664
665bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
666 unsigned InsR, uint16_t L, uint16_t S) const {
667 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
668 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
669 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
670 // Only integet (32-/64-bit) register classes.
671 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
672 return false;
673 // The "source" register must be of the same class as DstR.
674 if (DstRC != SrcRC)
675 return false;
676 if (DstRC == InsRC)
677 return true;
678 // A 64-bit register can only be generated from other 64-bit registers.
679 if (DstRC == &Hexagon::DoubleRegsRegClass)
680 return false;
681 // Otherwise, the L and S cannot span 32-bit word boundary.
682 if (S < 32 && S+L > 32)
683 return false;
684 return true;
685}
686
687bool HexagonGenInsert::findSelfReference(unsigned VR) const {
688 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
689 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
690 const BitTracker::BitValue &V = RC[i];
691 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
692 return true;
693 }
694 return false;
695}
696
697bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
698 BitTracker::RegisterCell RC = CMS->lookup(VR);
699 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
700 const BitTracker::BitValue &V = RC[i];
701 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
702 return true;
703 }
704 return false;
705}
706
707void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
708 RegisterSet &Defs) const {
709 for (const MachineOperand &MO : MI->operands()) {
710 if (!MO.isReg() || !MO.isDef())
711 continue;
712 Register R = MO.getReg();
713 if (!R.isVirtual())
714 continue;
715 Defs.insert(R);
716 }
717}
718
719void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
720 RegisterSet &Uses) const {
721 for (const MachineOperand &MO : MI->operands()) {
722 if (!MO.isReg() || !MO.isUse())
723 continue;
724 Register R = MO.getReg();
725 if (!R.isVirtual())
726 continue;
727 Uses.insert(R);
728 }
729}
730
731unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
732 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
733 PairMapType &M) const {
734 // Forward distance from the end of a block to the beginning of it does
735 // not make sense. This function should not be called with FromB == ToB.
736 assert(FromB != ToB);
737
738 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
739 // If we have already computed it, return the cached result.
740 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
741 if (F != M.end())
742 return F->second;
743 unsigned ToRPO = RPO.lookup(ToN);
744
745 unsigned MaxD = 0;
746
747 for (const MachineBasicBlock *PB : ToB->predecessors()) {
748 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
749 // along that path will be 0, and we don't need to do any calculations
750 // on it.
751 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
752 continue;
753 unsigned D = PB->size() + distance(FromB, PB, RPO, M);
754 if (D > MaxD)
755 MaxD = D;
756 }
757
758 // Memoize the result for later lookup.
759 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
760 return MaxD;
761}
762
763unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
764 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
765 PairMapType &M) const {
766 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
767 if (FB == TB)
768 return std::distance(FromI, ToI);
769 unsigned D1 = std::distance(TB->begin(), ToI);
770 unsigned D2 = distance(FB, TB, RPO, M);
771 unsigned D3 = std::distance(FromI, FB->end());
772 return D1+D2+D3;
773}
774
775bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
776 OrderedRegisterList &AVs) {
777 if (isDebug()) {
778 dbgs() << __func__ << ": " << printReg(VR, HRI)
779 << " AVs: " << PrintORL(AVs, HRI) << "\n";
780 }
781 if (AVs.size() == 0)
782 return false;
783
784 using iterator = OrderedRegisterList::iterator;
785
786 BitValueOrdering BVO(BaseOrd);
787 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
788 uint16_t W = RC.width();
789
790 using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
791 using RSListType = std::vector<RSRecord>;
792 // Have a map, with key being the matching prefix length, and the value
793 // being the list of pairs (R,S), where R's prefix matches VR at S.
794 // (DenseMap<uint16_t,RSListType> fails to instantiate.)
795 using LRSMapType = DenseMap<unsigned, RSListType>;
796 LRSMapType LM;
797
798 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
799 // and find matching prefixes from AVs with the rotated RC. Such a prefix
800 // would match a string of bits (of length L) in RC starting at S.
801 for (uint16_t S = 0; S < W; ++S) {
802 iterator B = AVs.begin(), E = AVs.end();
803 // The registers in AVs are ordered according to the lexical order of
804 // the corresponding register cells. This means that the range of regis-
805 // ters in AVs that match a prefix of length L+1 will be contained in
806 // the range that matches a prefix of length L. This means that we can
807 // keep narrowing the search space as the prefix length goes up. This
808 // helps reduce the overall complexity of the search.
809 uint16_t L;
810 for (L = 0; L < W-S; ++L) {
811 // Compare against VR's bits starting at S, which emulates rotation
812 // of VR by S.
813 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
814 iterator NewB = std::lower_bound(B, E, VR, RCB);
815 iterator NewE = std::upper_bound(NewB, E, VR, RCB);
816 // For the registers that are eliminated from the next range, L is
817 // the longest prefix matching VR at position S (their prefixes
818 // differ from VR at S+L). If L>0, record this information for later
819 // use.
820 if (L > 0) {
821 for (iterator I = B; I != NewB; ++I)
822 LM[L].push_back(std::make_pair(*I, S));
823 for (iterator I = NewE; I != E; ++I)
824 LM[L].push_back(std::make_pair(*I, S));
825 }
826 B = NewB, E = NewE;
827 if (B == E)
828 break;
829 }
830 // Record the final register range. If this range is non-empty, then
831 // L=W-S.
832 assert(B == E || L == W-S);
833 if (B != E) {
834 for (iterator I = B; I != E; ++I)
835 LM[L].push_back(std::make_pair(*I, S));
836 // If B!=E, then we found a range of registers whose prefixes cover the
837 // rest of VR from position S. There is no need to further advance S.
838 break;
839 }
840 }
841
842 if (isDebug()) {
843 dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
844 for (const auto &I : LM) {
845 dbgs() << " L=" << I.first << ':';
846 const RSListType &LL = I.second;
847 for (const auto &J : LL)
848 dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
849 dbgs() << '\n';
850 }
851 }
852
853 bool Recorded = false;
854
855 for (unsigned SrcR : AVs) {
856 int FDi = -1, LDi = -1; // First/last different bit.
857 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
858 uint16_t AW = AC.width();
859 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
860 if (RC[i] == AC[i])
861 continue;
862 if (FDi == -1)
863 FDi = i;
864 LDi = i;
865 }
866 if (FDi == -1)
867 continue; // TODO (future): Record identical registers.
868 // Look for a register whose prefix could patch the range [FD..LD]
869 // where VR and SrcR differ.
870 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
871 uint16_t MinL = LD-FD+1;
872 for (uint16_t L = MinL; L < W; ++L) {
873 LRSMapType::iterator F = LM.find(L);
874 if (F == LM.end())
875 continue;
876 RSListType &LL = F->second;
877 for (const auto &I : LL) {
878 uint16_t S = I.second;
879 // MinL is the minimum length of the prefix. Any length above MinL
880 // allows some flexibility as to where the prefix can start:
881 // given the extra length EL=L-MinL, the prefix must start between
882 // max(0,FD-EL) and FD.
883 if (S > FD) // Starts too late.
884 continue;
885 uint16_t EL = L-MinL;
886 uint16_t LowS = (EL < FD) ? FD-EL : 0;
887 if (S < LowS) // Starts too early.
888 continue;
889 unsigned InsR = I.first;
890 if (!isValidInsertForm(VR, SrcR, InsR, L, S))
891 continue;
892 if (isDebug()) {
893 dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
894 << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
895 << S << ")\n";
896 }
897 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
898 IFMap[VR].push_back(RR);
899 Recorded = true;
900 }
901 }
902 }
903
904 return Recorded;
905}
906
907void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
908 OrderedRegisterList &AVs) {
909 if (isDebug())
910 dbgs() << "visiting block " << printMBBReference(*B) << "\n";
911
912 // First, check if this block is reachable at all. If not, the bit tracker
913 // will not have any information about registers in it.
914 if (!CMS->BT.reached(B))
915 return;
916
917 bool DoConst = OptConst;
918 // Keep a separate set of registers defined in this block, so that we
919 // can remove them from the list of available registers once all DT
920 // successors have been processed.
921 RegisterSet BlockDefs, InsDefs;
922 for (MachineInstr &MI : *B) {
923 // Stop if the map size is too large.
924 if (IFMap.size() >= MaxIFMSize)
925 break;
926
927 InsDefs.clear();
928 getInstrDefs(&MI, InsDefs);
929 // Leave those alone. They are more transparent than "insert".
930 bool Skip = MI.isCopy() || MI.isRegSequence();
931
932 if (!Skip) {
933 // Visit all defined registers, and attempt to find the corresponding
934 // "insert" representations.
935 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
936 // Do not collect registers that are known to be compile-time cons-
937 // tants, unless requested.
938 if (!DoConst && isConstant(VR))
939 continue;
940 // If VR's cell contains a reference to VR, then VR cannot be defined
941 // via "insert". If VR is a constant that can be generated in a single
942 // instruction (without constant extenders), generating it via insert
943 // makes no sense.
944 if (findSelfReference(VR) || isSmallConstant(VR))
945 continue;
946
947 findRecordInsertForms(VR, AVs);
948 // Stop if the map size is too large.
949 if (IFMap.size() >= MaxIFMSize)
950 break;
951 }
952 }
953
954 // Insert the defined registers into the list of available registers
955 // after they have been processed.
956 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
957 AVs.insert(VR);
958 BlockDefs.insert(InsDefs);
959 }
960
961 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
962 MachineBasicBlock *SB = DTN->getBlock();
963 collectInBlock(SB, AVs);
964 }
965
966 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
967 AVs.remove(VR);
968}
969
970void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
971 RegisterSet &RMs) const {
972 // For a given register VR and a insert form, find the registers that are
973 // used by the current definition of VR, and which would no longer be
974 // needed for it after the definition of VR is replaced with the insert
975 // form. These are the registers that could potentially become dead.
976 RegisterSet Regs[2];
977
978 unsigned S = 0; // Register set selector.
979 Regs[S].insert(VR);
980
981 while (!Regs[S].empty()) {
982 // Breadth-first search.
983 unsigned OtherS = 1-S;
984 Regs[OtherS].clear();
985 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
986 Regs[S].remove(R);
987 if (R == IF.SrcR || R == IF.InsR)
988 continue;
989 // Check if a given register has bits that are references to any other
990 // registers. This is to detect situations where the instruction that
991 // defines register R takes register Q as an operand, but R itself does
992 // not contain any bits from Q. Loads are examples of how this could
993 // happen:
994 // R = load Q
995 // In this case (assuming we do not have any knowledge about the loaded
996 // value), we must not treat R as a "conveyance" of the bits from Q.
997 // (The information in BT about R's bits would have them as constants,
998 // in case of zero-extending loads, or refs to R.)
999 if (!findNonSelfReference(R))
1000 continue;
1001 RMs.insert(R);
1002 const MachineInstr *DefI = MRI->getVRegDef(R);
1003 assert(DefI);
1004 // Do not iterate past PHI nodes to avoid infinite loops. This can
1005 // make the final set a bit less accurate, but the removable register
1006 // sets are an approximation anyway.
1007 if (DefI->isPHI())
1008 continue;
1009 getInstrUses(DefI, Regs[OtherS]);
1010 }
1011 S = OtherS;
1012 }
1013 // The register VR is added to the list as a side-effect of the algorithm,
1014 // but it is not "potentially removable". A potentially removable register
1015 // is one that may become unused (dead) after conversion to the insert form
1016 // IF, and obviously VR (or its replacement) will not become dead by apply-
1017 // ing IF.
1018 RMs.remove(VR);
1019}
1020
1021void HexagonGenInsert::computeRemovableRegisters() {
1022 for (auto &I : IFMap) {
1023 IFListType &LL = I.second;
1024 for (auto &J : LL)
1025 findRemovableRegisters(I.first, J.first, J.second);
1026 }
1027}
1028
1029void HexagonGenInsert::pruneEmptyLists() {
1030 // Remove all entries from the map, where the register has no insert forms
1031 // associated with it.
1032 using IterListType = SmallVector<IFMapType::iterator, 16>;
1033 IterListType Prune;
1034 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1035 if (I->second.empty())
1036 Prune.push_back(I);
1037 }
1038 for (const auto &It : Prune)
1039 IFMap.erase(It);
1040}
1041
1042void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1043 IFMapType::iterator F = IFMap.find(VR);
1044 assert(F != IFMap.end());
1045 IFListType &LL = F->second;
1046
1047 // First, examine the IF candidates for register VR whose removable-regis-
1048 // ter sets are empty. This means that a given candidate will not help eli-
1049 // minate any registers, but since "insert" is not a constant-extendable
1050 // instruction, using such a candidate may reduce code size if the defini-
1051 // tion of VR is constant-extended.
1052 // If there exists a candidate with a non-empty set, the ones with empty
1053 // sets will not be used and can be removed.
1054 MachineInstr *DefVR = MRI->getVRegDef(VR);
1055 bool DefEx = HII->isConstExtended(*DefVR);
1056 bool HasNE = false;
1057 for (const auto &I : LL) {
1058 if (I.second.empty())
1059 continue;
1060 HasNE = true;
1061 break;
1062 }
1063 if (!DefEx || HasNE) {
1064 // The definition of VR is not constant-extended, or there is a candidate
1065 // with a non-empty set. Remove all candidates with empty sets.
1066 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1067 return IR.second.empty();
1068 };
1069 llvm::erase_if(LL, IsEmpty);
1070 } else {
1071 // The definition of VR is constant-extended, and all candidates have
1072 // empty removable-register sets. Pick the maximum candidate, and remove
1073 // all others. The "maximum" does not have any special meaning here, it
1074 // is only so that the candidate that will remain on the list is selec-
1075 // ted deterministically.
1076 IFRecord MaxIF = LL[0].first;
1077 for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1078 // If LL[MaxI] < LL[i], then MaxI = i.
1079 const IFRecord &IF = LL[i].first;
1080 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1081 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1082 if (M0 > R0)
1083 continue;
1084 if (M0 == R0) {
1085 if (M1 > R1)
1086 continue;
1087 if (M1 == R1) {
1088 if (MaxIF.Wdh > IF.Wdh)
1089 continue;
1090 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1091 continue;
1092 }
1093 }
1094 // MaxIF < IF.
1095 MaxIF = IF;
1096 }
1097 // Remove everything except the maximum candidate. All register sets
1098 // are empty, so no need to preserve anything.
1099 LL.clear();
1100 LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1101 }
1102
1103 // Now, remove those whose sets of potentially removable registers are
1104 // contained in another IF candidate for VR. For example, given these
1105 // candidates for %45,
1106 // %45:
1107 // (%44,%41,#9,#8), { %42 }
1108 // (%43,%41,#9,#8), { %42 %44 }
1109 // remove the first one, since it is contained in the second one.
1110 for (unsigned i = 0, n = LL.size(); i < n; ) {
1111 const RegisterSet &RMi = LL[i].second;
1112 unsigned j = 0;
1113 while (j < n) {
1114 if (j != i && LL[j].second.includes(RMi))
1115 break;
1116 j++;
1117 }
1118 if (j == n) { // RMi not contained in anything else.
1119 i++;
1120 continue;
1121 }
1122 LL.erase(LL.begin()+i);
1123 n = LL.size();
1124 }
1125}
1126
1127void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1128 PairMapType &M) {
1129 IFMapType::iterator F = IFMap.find(VR);
1130 assert(F != IFMap.end());
1131 IFListType &LL = F->second;
1132 unsigned Cutoff = VRegDistCutoff;
1133 const MachineInstr *DefV = MRI->getVRegDef(VR);
1134
1135 for (unsigned i = LL.size(); i > 0; --i) {
1136 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1137 const MachineInstr *DefS = MRI->getVRegDef(SR);
1138 const MachineInstr *DefI = MRI->getVRegDef(IR);
1139 unsigned DSV = distance(DefS, DefV, RPO, M);
1140 if (DSV < Cutoff) {
1141 unsigned DIV = distance(DefI, DefV, RPO, M);
1142 if (DIV < Cutoff)
1143 continue;
1144 }
1145 LL.erase(LL.begin()+(i-1));
1146 }
1147}
1148
1149void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1150 IFMapType::iterator F = IFMap.find(VR);
1151 assert(F != IFMap.end());
1152 IFListType &LL = F->second;
1153
1154 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1155 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1156 };
1157 llvm::erase_if(LL, IsCopy);
1158}
1159
1160void HexagonGenInsert::pruneCandidates() {
1161 // Remove candidates that are not beneficial, regardless of the final
1162 // selection method.
1163 // First, remove candidates whose potentially removable set is a subset
1164 // of another candidate's set.
1165 for (const auto &I : IFMap)
1166 pruneCoveredSets(I.first);
1167
1168 UnsignedMap RPO;
1169
1170 using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
1171
1172 RPOTType RPOT(MFN);
1173 unsigned RPON = 0;
1174 for (const auto &I : RPOT)
1175 RPO[I->getNumber()] = RPON++;
1176
1177 PairMapType Memo; // Memoization map for distance calculation.
1178 // Remove candidates that would use registers defined too far away.
1179 for (const auto &I : IFMap)
1180 pruneUsesTooFar(I.first, RPO, Memo);
1181
1182 pruneEmptyLists();
1183
1184 for (const auto &I : IFMap)
1185 pruneRegCopies(I.first);
1186}
1187
1188namespace {
1189
1190 // Class for comparing IF candidates for registers that have multiple of
1191 // them. The smaller the candidate, according to this ordering, the better.
1192 // First, compare the number of zeros in the associated potentially remova-
1193 // ble register sets. "Zero" indicates that the register is very likely to
1194 // become dead after this transformation.
1195 // Second, compare "averages", i.e. use-count per size. The lower wins.
1196 // After that, it does not really matter which one is smaller. Resolve
1197 // the tie in some deterministic way.
1198 struct IFOrdering {
1199 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1200 : UseC(UC), BaseOrd(BO) {}
1201
1202 bool operator() (const IFRecordWithRegSet &A,
1203 const IFRecordWithRegSet &B) const;
1204
1205 private:
1206 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1207 unsigned &Sum) const;
1208
1209 const UnsignedMap &UseC;
1210 const RegisterOrdering &BaseOrd;
1211 };
1212
1213} // end anonymous namespace
1214
1215bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1216 const IFRecordWithRegSet &B) const {
1217 unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1218 unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1219 stats(A.second, SizeA, ZeroA, SumA);
1220 stats(B.second, SizeB, ZeroB, SumB);
1221
1222 // We will pick the minimum element. The more zeros, the better.
1223 if (ZeroA != ZeroB)
1224 return ZeroA > ZeroB;
1225 // Compare SumA/SizeA with SumB/SizeB, lower is better.
1226 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1227 if (AvgA != AvgB)
1228 return AvgA < AvgB;
1229
1230 // The sets compare identical so far. Resort to comparing the IF records.
1231 // The actual values don't matter, this is only for determinism.
1232 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1233 if (OSA != OSB)
1234 return OSA < OSB;
1235 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1236 if (OIA != OIB)
1237 return OIA < OIB;
1238 if (A.first.Wdh != B.first.Wdh)
1239 return A.first.Wdh < B.first.Wdh;
1240 return A.first.Off < B.first.Off;
1241}
1242
1243void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1244 unsigned &Sum) const {
1245 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1246 UnsignedMap::const_iterator F = UseC.find(R);
1247 assert(F != UseC.end());
1248 unsigned UC = F->second;
1249 if (UC == 0)
1250 Zero++;
1251 Sum += UC;
1252 Size++;
1253 }
1254}
1255
1256void HexagonGenInsert::selectCandidates() {
1257 // Some registers may have multiple valid candidates. Pick the best one
1258 // (or decide not to use any).
1259
1260 // Compute the "removability" measure of R:
1261 // For each potentially removable register R, record the number of regis-
1262 // ters with IF candidates, where R appears in at least one set.
1263 RegisterSet AllRMs;
1264 UnsignedMap UseC, RemC;
1265 IFMapType::iterator End = IFMap.end();
1266
1267 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1268 const IFListType &LL = I->second;
1270 for (const auto &J : LL)
1271 TT.insert(J.second);
1272 for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1273 RemC[R]++;
1274 AllRMs.insert(TT);
1275 }
1276
1277 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1278 using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1279 using InstrSet = SmallPtrSet<const MachineInstr *, 16>;
1280
1281 InstrSet UIs;
1282 // Count as the number of instructions in which R is used, not the
1283 // number of operands.
1284 use_iterator E = MRI->use_nodbg_end();
1285 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1286 UIs.insert(I->getParent());
1287 unsigned C = UIs.size();
1288 // Calculate a measure, which is the number of instructions using R,
1289 // minus the "removability" count computed earlier.
1290 unsigned D = RemC[R];
1291 UseC[R] = (C > D) ? C-D : 0; // doz
1292 }
1293
1294 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1295 if (!SelectAll0 && !SelectHas0)
1296 SelectAll0 = true;
1297
1298 // The smaller the number UseC for a given register R, the "less used"
1299 // R is aside from the opportunities for removal offered by generating
1300 // "insert" instructions.
1301 // Iterate over the IF map, and for those registers that have multiple
1302 // candidates, pick the minimum one according to IFOrdering.
1303 IFOrdering IFO(UseC, BaseOrd);
1304 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1305 IFListType &LL = I->second;
1306 if (LL.empty())
1307 continue;
1308 // Get the minimum element, remember it and clear the list. If the
1309 // element found is adequate, we will put it back on the list, other-
1310 // wise the list will remain empty, and the entry for this register
1311 // will be removed (i.e. this register will not be replaced by insert).
1312 IFListType::iterator MinI = llvm::min_element(LL, IFO);
1313 assert(MinI != LL.end());
1314 IFRecordWithRegSet M = *MinI;
1315 LL.clear();
1316
1317 // We want to make sure that this replacement will have a chance to be
1318 // beneficial, and that means that we want to have indication that some
1319 // register will be removed. The most likely registers to be eliminated
1320 // are the use operands in the definition of I->first. Accept/reject a
1321 // candidate based on how many of its uses it can potentially eliminate.
1322
1323 RegisterSet Us;
1324 const MachineInstr *DefI = MRI->getVRegDef(I->first);
1325 getInstrUses(DefI, Us);
1326 bool Accept = false;
1327
1328 if (SelectAll0) {
1329 bool All0 = true;
1330 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1331 if (UseC[R] == 0)
1332 continue;
1333 All0 = false;
1334 break;
1335 }
1336 Accept = All0;
1337 } else if (SelectHas0) {
1338 bool Has0 = false;
1339 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1340 if (UseC[R] != 0)
1341 continue;
1342 Has0 = true;
1343 break;
1344 }
1345 Accept = Has0;
1346 }
1347 if (Accept)
1348 LL.push_back(M);
1349 }
1350
1351 // Remove candidates that add uses of removable registers, unless the
1352 // removable registers are among replacement candidates.
1353 // Recompute the removable registers, since some candidates may have
1354 // been eliminated.
1355 AllRMs.clear();
1356 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1357 const IFListType &LL = I->second;
1358 if (!LL.empty())
1359 AllRMs.insert(LL[0].second);
1360 }
1361 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1362 IFListType &LL = I->second;
1363 if (LL.empty())
1364 continue;
1365 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1366 if (AllRMs[SR] || AllRMs[IR])
1367 LL.clear();
1368 }
1369
1370 pruneEmptyLists();
1371}
1372
1373bool HexagonGenInsert::generateInserts() {
1374 // Create a new register for each one from IFMap, and store them in the
1375 // map.
1376 UnsignedMap RegMap;
1377 for (auto &I : IFMap) {
1378 unsigned VR = I.first;
1379 const TargetRegisterClass *RC = MRI->getRegClass(VR);
1380 Register NewVR = MRI->createVirtualRegister(RC);
1381 RegMap[VR] = NewVR;
1382 }
1383
1384 // We can generate the "insert" instructions using potentially stale re-
1385 // gisters: SrcR and InsR for a given VR may be among other registers that
1386 // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1387 for (auto &I : IFMap) {
1388 MachineInstr *MI = MRI->getVRegDef(I.first);
1389 MachineBasicBlock &B = *MI->getParent();
1390 DebugLoc DL = MI->getDebugLoc();
1391 unsigned NewR = RegMap[I.first];
1392 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1393 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1394 : HII->get(Hexagon::S2_insertp);
1395 IFRecord IF = I.second[0].first;
1396 unsigned Wdh = IF.Wdh, Off = IF.Off;
1397 unsigned InsS = 0;
1398 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1399 InsS = Hexagon::isub_lo;
1400 if (Off >= 32) {
1401 InsS = Hexagon::isub_hi;
1402 Off -= 32;
1403 }
1404 }
1405 // Advance to the proper location for inserting instructions. This could
1406 // be B.end().
1408 if (MI->isPHI())
1409 At = B.getFirstNonPHI();
1410
1411 BuildMI(B, At, DL, D, NewR)
1412 .addReg(IF.SrcR)
1413 .addReg(IF.InsR, 0, InsS)
1414 .addImm(Wdh)
1415 .addImm(Off);
1416
1417 MRI->clearKillFlags(IF.SrcR);
1418 MRI->clearKillFlags(IF.InsR);
1419 }
1420
1421 for (const auto &I : IFMap) {
1422 MachineInstr *DefI = MRI->getVRegDef(I.first);
1423 MRI->replaceRegWith(I.first, RegMap[I.first]);
1424 DefI->eraseFromParent();
1425 }
1426
1427 return true;
1428}
1429
1430bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1431 bool Changed = false;
1432
1433 for (auto *DTN : children<MachineDomTreeNode*>(N))
1434 Changed |= removeDeadCode(DTN);
1435
1436 MachineBasicBlock *B = N->getBlock();
1437 std::vector<MachineInstr*> Instrs;
1438 for (MachineInstr &MI : llvm::reverse(*B))
1439 Instrs.push_back(&MI);
1440
1441 for (MachineInstr *MI : Instrs) {
1442 unsigned Opc = MI->getOpcode();
1443 // Do not touch lifetime markers. This is why the target-independent DCE
1444 // cannot be used.
1445 if (Opc == TargetOpcode::LIFETIME_START ||
1446 Opc == TargetOpcode::LIFETIME_END)
1447 continue;
1448 bool Store = false;
1449 if (MI->isInlineAsm() || !MI->isSafeToMove(Store))
1450 continue;
1451
1452 bool AllDead = true;
1453 SmallVector<unsigned,2> Regs;
1454 for (const MachineOperand &MO : MI->operands()) {
1455 if (!MO.isReg() || !MO.isDef())
1456 continue;
1457 Register R = MO.getReg();
1458 if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1459 AllDead = false;
1460 break;
1461 }
1462 Regs.push_back(R);
1463 }
1464 if (!AllDead)
1465 continue;
1466
1467 B->erase(MI);
1468 for (unsigned Reg : Regs)
1469 MRI->markUsesInDebugValueAsUndef(Reg);
1470 Changed = true;
1471 }
1472
1473 return Changed;
1474}
1475
1476bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1477 if (skipFunction(MF.getFunction()))
1478 return false;
1479
1480 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1481 bool Changed = false;
1482
1483 // Verify: one, but not both.
1485
1486 IFMap.clear();
1487 BaseOrd.clear();
1488 CellOrd.clear();
1489
1490 const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1491 HII = ST.getInstrInfo();
1492 HRI = ST.getRegisterInfo();
1493 MFN = &MF;
1494 MRI = &MF.getRegInfo();
1495 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1496
1497 // Clean up before any further processing, so that dead code does not
1498 // get used in a newly generated "insert" instruction. Have a custom
1499 // version of DCE that preserves lifetime markers. Without it, merging
1500 // of stack objects can fail to recognize and merge disjoint objects
1501 // leading to unnecessary stack growth.
1502 Changed = removeDeadCode(MDT->getRootNode());
1503
1504 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1505 BitTracker BTLoc(HE, MF);
1506 BTLoc.trace(isDebug());
1507 BTLoc.run();
1508 CellMapShadow MS(BTLoc);
1509 CMS = &MS;
1510
1511 buildOrderingMF(BaseOrd);
1512 buildOrderingBT(BaseOrd, CellOrd);
1513
1514 if (isDebug()) {
1515 dbgs() << "Cell ordering:\n";
1516 for (const auto &I : CellOrd) {
1517 unsigned VR = I.first, Pos = I.second;
1518 dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1519 }
1520 }
1521
1522 // Collect candidates for conversion into the insert forms.
1523 MachineBasicBlock *RootB = MDT->getRoot();
1524 OrderedRegisterList AvailR(CellOrd);
1525
1526 const char *const TGName = "hexinsert";
1527 const char *const TGDesc = "Generate Insert Instructions";
1528
1529 {
1530 NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1531 TimingDetail);
1532 collectInBlock(RootB, AvailR);
1533 // Complete the information gathered in IFMap.
1534 computeRemovableRegisters();
1535 }
1536
1537 if (isDebug()) {
1538 dbgs() << "Candidates after collection:\n";
1539 dump_map();
1540 }
1541
1542 if (IFMap.empty())
1543 return Changed;
1544
1545 {
1546 NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1547 pruneCandidates();
1548 }
1549
1550 if (isDebug()) {
1551 dbgs() << "Candidates after pruning:\n";
1552 dump_map();
1553 }
1554
1555 if (IFMap.empty())
1556 return Changed;
1557
1558 {
1559 NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1560 selectCandidates();
1561 }
1562
1563 if (isDebug()) {
1564 dbgs() << "Candidates after selection:\n";
1565 dump_map();
1566 }
1567
1568 // Filter out vregs beyond the cutoff.
1569 if (VRegIndexCutoff.getPosition()) {
1570 unsigned Cutoff = VRegIndexCutoff;
1571
1572 using IterListType = SmallVector<IFMapType::iterator, 16>;
1573
1574 IterListType Out;
1575 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1576 unsigned Idx = Register(I->first).virtRegIndex();
1577 if (Idx >= Cutoff)
1578 Out.push_back(I);
1579 }
1580 for (const auto &It : Out)
1581 IFMap.erase(It);
1582 }
1583 if (IFMap.empty())
1584 return Changed;
1585
1586 {
1587 NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1588 TimingDetail);
1589 generateInserts();
1590 }
1591
1592 return true;
1593}
1594
1596 return new HexagonGenInsert();
1597}
1598
1599//===----------------------------------------------------------------------===//
1600// Public Constructor Functions
1601//===----------------------------------------------------------------------===//
1602
1603INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1604 "Hexagon generate \"insert\" instructions", false, false)
1606INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1607 "Hexagon generate \"insert\" instructions", false, false)
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
BitTracker BT
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden)
static bool isDebug()
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::desc("Maximum size of OrderedRegisterList"))
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::desc("Maximum size of IFMap"))
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::Hidden, cl::desc("Enable detailed timing of insert " "generation"))
static cl::opt< bool > OptTiming("insert-timing", cl::Hidden, cl::desc("Enable timing of insert generation"))
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::desc("Vreg distance cutoff for insert " "generation."))
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden)
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::desc("Vreg# cutoff for insert generation."))
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden)
IRTranslator LLVM IR MI
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
block placement stats
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
SmallSet< unsigned, 4 > RegisterSet
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:319
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition BitVector.h:508
BitVector & set()
Definition BitVector.h:370
bool any() const
any - Returns true if any bit is set.
Definition BitVector.h:189
BitVector & operator|=(const BitVector &RHS)
Definition BitVector.h:581
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:327
reference operator[](unsigned Idx)
Definition BitVector.h:463
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isConstExtended(const MachineInstr &MI) const
MachineInstrBundleIterator< const MachineInstr > const_iterator
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
defusechain_iterator< true, false, true, true, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
SmallSet & operator=(const SmallSet &)=default
const_iterator end() const
Definition SmallSet.h:221
std::pair< const_iterator, bool > insert(const unsigned &V)
Definition SmallSet.h:183
void push_back(const T &Elt)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
initializer< Ty > init(const Ty &Val)
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
BBIterator iterator
Definition BasicBlock.h:87
LLVM_ABI iterator begin() const
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool includes(R1 &&Range1, R2 &&Range2)
Provide wrappers to std::includes which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1934
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2020
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool isCurrentDebugType(const char *Type, int Level=0)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition Debug.cpp:81
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition Debug.cpp:68
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
FunctionPass * createHexagonGenInsert()
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1994
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
bool is(unsigned T) const
Definition BitTracker.h:206
const RegisterCell & lookup(unsigned Reg) const
Definition BitTracker.h:354
bool reached(const MachineBasicBlock *B) const