LLVM 23.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1//===- HexagonGenInsert.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "BitTracker.h"
10#include "Hexagon.h"
11#include "HexagonBitTracker.h"
12#include "HexagonInstrInfo.h"
13#include "HexagonRegisterInfo.h"
14#include "HexagonSubtarget.h"
15#include "llvm/ADT/BitVector.h"
16#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringRef.h"
31#include "llvm/IR/DebugLoc.h"
33#include "llvm/Pass.h"
35#include "llvm/Support/Debug.h"
37#include "llvm/Support/Timer.h"
39#include <algorithm>
40#include <cassert>
41#include <cstdint>
42#include <iterator>
43#include <utility>
44#include <vector>
45
46#define DEBUG_TYPE "hexinsert"
47
48using namespace llvm;
49
51 VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52 cl::desc("Vreg# cutoff for insert generation."));
53// The distance cutoff is selected based on the precheckin-perf results:
54// cutoffs 20, 25, 35, and 40 are worse than 30.
56 VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57 cl::desc("Vreg distance cutoff for insert "
58 "generation."));
59
60// Limit the container sizes for extreme cases where we run out of memory.
62 MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63 cl::desc("Maximum size of OrderedRegisterList"));
64static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
66 cl::desc("Maximum size of IFMap"));
67
68static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69 cl::desc("Enable timing of insert generation"));
70static cl::opt<bool>
71 OptTimingDetail("insert-timing-detail", cl::Hidden,
72 cl::desc("Enable detailed timing of insert "
73 "generation"));
74
75static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77// Whether to construct constant values via "insert". Could eliminate constant
78// extenders, but often not practical.
79static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80
81// The preprocessor gets confused when the DEBUG macro is passed larger
82// chunks of code. Use this function to detect debugging.
83inline static bool isDebug() {
84#ifndef NDEBUG
86#else
87 return false;
88#endif
89}
90
91namespace {
92
93 // Set of virtual registers, based on BitVector.
94 struct RegisterSet : private BitVector {
95 RegisterSet() = default;
96 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97 RegisterSet(const RegisterSet &RS) = default;
98 RegisterSet &operator=(const RegisterSet &RS) = default;
99
100 using BitVector::clear;
101
102 unsigned find_first() const {
104 if (First < 0)
105 return 0;
106 return x2v(First);
107 }
108
109 unsigned find_next(unsigned Prev) const {
110 int Next = BitVector::find_next(v2x(Prev));
111 if (Next < 0)
112 return 0;
113 return x2v(Next);
114 }
115
116 RegisterSet &insert(unsigned R) {
117 unsigned Idx = v2x(R);
118 ensure(Idx);
119 return static_cast<RegisterSet&>(BitVector::set(Idx));
120 }
121 RegisterSet &remove(unsigned R) {
122 unsigned Idx = v2x(R);
123 if (Idx >= size())
124 return *this;
125 return static_cast<RegisterSet&>(BitVector::reset(Idx));
126 }
127
128 RegisterSet &insert(const RegisterSet &Rs) {
129 return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130 }
131 RegisterSet &remove(const RegisterSet &Rs) {
132 return static_cast<RegisterSet&>(BitVector::reset(Rs));
133 }
134
135 reference operator[](unsigned R) {
136 unsigned Idx = v2x(R);
137 ensure(Idx);
138 return BitVector::operator[](Idx);
139 }
140 bool operator[](unsigned R) const {
141 unsigned Idx = v2x(R);
142 assert(Idx < size());
143 return BitVector::operator[](Idx);
144 }
145 bool has(unsigned R) const {
146 unsigned Idx = v2x(R);
147 if (Idx >= size())
148 return false;
149 return BitVector::test(Idx);
150 }
151
152 bool empty() const {
153 return !BitVector::any();
154 }
155 bool includes(const RegisterSet &Rs) const {
156 return Rs.BitVector::subsetOf(*this);
157 }
158 bool intersects(const RegisterSet &Rs) const {
159 return BitVector::anyCommon(Rs);
160 }
161
162 private:
163 void ensure(unsigned Idx) {
164 if (size() <= Idx)
165 resize(std::max(Idx+1, 32U));
166 }
167
168 static inline unsigned v2x(unsigned v) {
169 return Register(v).virtRegIndex();
170 }
171
172 static inline unsigned x2v(unsigned x) {
173 return Register::index2VirtReg(x);
174 }
175 };
176
177 struct PrintRegSet {
178 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
179 : RS(S), TRI(RI) {}
180
181 friend raw_ostream &operator<< (raw_ostream &OS,
182 const PrintRegSet &P);
183
184 private:
185 const RegisterSet &RS;
186 const TargetRegisterInfo *TRI;
187 };
188
189 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
190 OS << '{';
191 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
192 OS << ' ' << printReg(R, P.TRI);
193 OS << " }";
194 return OS;
195 }
196
197 // A convenience class to associate unsigned numbers (such as virtual
198 // registers) with unsigned numbers.
199 struct UnsignedMap : public DenseMap<unsigned,unsigned> {
200 UnsignedMap() = default;
201
202 private:
203 using BaseType = DenseMap<unsigned, unsigned>;
204 };
205
206 // A utility to establish an ordering between virtual registers:
207 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
208 // This is meant as a cache for the ordering of virtual registers defined
209 // by a potentially expensive comparison function, or obtained by a proce-
210 // dure that should not be repeated each time two registers are compared.
211 struct RegisterOrdering : public UnsignedMap {
212 RegisterOrdering() = default;
213
214 unsigned operator[](unsigned VR) const {
215 const_iterator F = find(VR);
216 assert(F != end());
217 return F->second;
218 }
219
220 // Add operator(), so that objects of this class can be used as
221 // comparators in std::sort et al.
222 bool operator() (unsigned VR1, unsigned VR2) const {
223 return operator[](VR1) < operator[](VR2);
224 }
225 };
226
227 // Ordering of bit values. This class does not have operator[], but
228 // is supplies a comparison operator() for use in std:: algorithms.
229 // The order is as follows:
230 // - 0 < 1 < ref
231 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
232 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
233 struct BitValueOrdering {
234 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
235
236 bool operator() (const BitTracker::BitValue &V1,
237 const BitTracker::BitValue &V2) const;
238
239 const RegisterOrdering &BaseOrd;
240 };
241
242} // end anonymous namespace
243
244bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
245 const BitTracker::BitValue &V2) const {
246 if (V1 == V2)
247 return false;
248 // V1==0 => true, V2==0 => false
249 if (V1.is(0) || V2.is(0))
250 return V1.is(0);
251 // Neither of V1,V2 is 0, and V1!=V2.
252 // V2==1 => false, V1==1 => true
253 if (V2.is(1) || V1.is(1))
254 return !V2.is(1);
255 // Both V1,V2 are refs.
256 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
257 if (Ind1 != Ind2)
258 return Ind1 < Ind2;
259 // If V1.Pos==V2.Pos
260 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
261 return V1.RefI.Pos < V2.RefI.Pos;
262}
263
264namespace {
265
266 // Cache for the BitTracker's cell map. Map lookup has a logarithmic
267 // complexity, this class will memoize the lookup results to reduce
268 // the access time for repeated lookups of the same cell.
269 struct CellMapShadow {
270 CellMapShadow(const BitTracker &T) : BT(T) {}
271
272 const BitTracker::RegisterCell &lookup(unsigned VR) {
273 unsigned RInd = Register(VR).virtRegIndex();
274 // Grow the vector to at least 32 elements.
275 if (RInd >= CVect.size())
276 CVect.resize(std::max(RInd+16, 32U), nullptr);
277 const BitTracker::RegisterCell *CP = CVect[RInd];
278 if (CP == nullptr)
279 CP = CVect[RInd] = &BT.lookup(VR);
280 return *CP;
281 }
282
283 const BitTracker &BT;
284
285 private:
286 using CellVectType = std::vector<const BitTracker::RegisterCell *>;
287
288 CellVectType CVect;
289 };
290
291 // Comparator class for lexicographic ordering of virtual registers
292 // according to the corresponding BitTracker::RegisterCell objects.
293 struct RegisterCellLexCompare {
294 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
295 : BitOrd(BO), CM(M) {}
296
297 bool operator() (unsigned VR1, unsigned VR2) const;
298
299 private:
300 const BitValueOrdering &BitOrd;
301 CellMapShadow &CM;
302 };
303
304 // Comparator class for lexicographic ordering of virtual registers
305 // according to the specified bits of the corresponding BitTracker::
306 // RegisterCell objects.
307 // Specifically, this class will be used to compare bit B of a register
308 // cell for a selected virtual register R with bit N of any register
309 // other than R.
310 struct RegisterCellBitCompareSel {
311 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
312 const BitValueOrdering &BO, CellMapShadow &M)
313 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
314
315 bool operator() (unsigned VR1, unsigned VR2) const;
316
317 private:
318 const unsigned SelR, SelB;
319 const unsigned BitN;
320 const BitValueOrdering &BitOrd;
321 CellMapShadow &CM;
322 };
323
324} // end anonymous namespace
325
326bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
327 // Ordering of registers, made up from two given orderings:
328 // - the ordering of the register numbers, and
329 // - the ordering of register cells.
330 // Def. R1 < R2 if:
331 // - cell(R1) < cell(R2), or
332 // - cell(R1) == cell(R2), and index(R1) < index(R2).
333 //
334 // For register cells, the ordering is lexicographic, with index 0 being
335 // the most significant.
336 if (VR1 == VR2)
337 return false;
338
339 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
340 uint16_t W1 = RC1.width(), W2 = RC2.width();
341 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
342 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
343 if (V1 != V2)
344 return BitOrd(V1, V2);
345 }
346 // Cells are equal up until the common length.
347 if (W1 != W2)
348 return W1 < W2;
349
350 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
351}
352
353bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
354 if (VR1 == VR2)
355 return false;
356 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
357 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
358 uint16_t W1 = RC1.width(), W2 = RC2.width();
359 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
360 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
361 // If Bit1 exceeds the width of VR1, then:
362 // - return false, if at the same time Bit2 exceeds VR2, or
363 // - return true, otherwise.
364 // (I.e. "a bit value that does not exist is less than any bit value
365 // that does exist".)
366 if (W1 <= Bit1)
367 return Bit2 < W2;
368 // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
369 if (W2 <= Bit2)
370 return false;
371
372 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
373 if (V1 != V2)
374 return BitOrd(V1, V2);
375 return false;
376}
377
378namespace {
379
380 class OrderedRegisterList {
381 using ListType = std::vector<unsigned>;
382 const unsigned MaxSize;
383
384 public:
385 OrderedRegisterList(const RegisterOrdering &RO)
386 : MaxSize(MaxORLSize), Ord(RO) {}
387
388 void insert(unsigned VR);
389 void remove(unsigned VR);
390
391 unsigned operator[](unsigned Idx) const {
392 assert(Idx < Seq.size());
393 return Seq[Idx];
394 }
395
396 unsigned size() const {
397 return Seq.size();
398 }
399
400 using iterator = ListType::iterator;
401 using const_iterator = ListType::const_iterator;
402
403 iterator begin() { return Seq.begin(); }
404 iterator end() { return Seq.end(); }
405 const_iterator begin() const { return Seq.begin(); }
406 const_iterator end() const { return Seq.end(); }
407
408 // Convenience function to convert an iterator to the corresponding index.
409 unsigned idx(iterator It) const { return It-begin(); }
410
411 private:
412 ListType Seq;
413 const RegisterOrdering &Ord;
414 };
415
416 struct PrintORL {
417 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
418 : RL(L), TRI(RI) {}
419
420 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
421
422 private:
423 const OrderedRegisterList &RL;
424 const TargetRegisterInfo *TRI;
425 };
426
427 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
428 OS << '(';
429 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
430 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
431 if (I != B)
432 OS << ", ";
433 OS << printReg(*I, P.TRI);
434 }
435 OS << ')';
436 return OS;
437 }
438
439} // end anonymous namespace
440
441void OrderedRegisterList::insert(unsigned VR) {
442 iterator L = llvm::lower_bound(Seq, VR, Ord);
443 if (L == Seq.end())
444 Seq.push_back(VR);
445 else
446 Seq.insert(L, VR);
447
448 unsigned S = Seq.size();
449 if (S > MaxSize)
450 Seq.resize(MaxSize);
451 assert(Seq.size() <= MaxSize);
452}
453
454void OrderedRegisterList::remove(unsigned VR) {
455 iterator L = llvm::lower_bound(Seq, VR, Ord);
456 if (L != Seq.end())
457 Seq.erase(L);
458}
459
460namespace {
461
462 // A record of the insert form. The fields correspond to the operands
463 // of the "insert" instruction:
464 // ... = insert(SrcR, InsR, #Wdh, #Off)
465 struct IFRecord {
466 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
467 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
468
469 unsigned SrcR, InsR;
470 uint16_t Wdh, Off;
471 };
472
473 struct PrintIFR {
474 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
475 : IFR(R), TRI(RI) {}
476
477 private:
478 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
479
480 const IFRecord &IFR;
481 const TargetRegisterInfo *TRI;
482 };
483
484 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
485 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
486 OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
487 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
488 return OS;
489 }
490
491 using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
492
493} // end anonymous namespace
494
495namespace {
496
497 class HexagonGenInsert : public MachineFunctionPass {
498 public:
499 static char ID;
500
501 HexagonGenInsert() : MachineFunctionPass(ID) {}
502
503 StringRef getPassName() const override {
504 return "Hexagon generate \"insert\" instructions";
505 }
506
507 void getAnalysisUsage(AnalysisUsage &AU) const override {
508 AU.addRequired<MachineDominatorTreeWrapperPass>();
509 AU.addPreserved<MachineDominatorTreeWrapperPass>();
511 }
512
513 bool runOnMachineFunction(MachineFunction &MF) override;
514
515 private:
516 using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
517
518 void buildOrderingMF(RegisterOrdering &RO) const;
519 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
520 bool isIntClass(const TargetRegisterClass *RC) const;
521 bool isConstant(unsigned VR) const;
522 bool isSmallConstant(unsigned VR) const;
523 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
524 uint16_t L, uint16_t S) const;
525 bool findSelfReference(unsigned VR) const;
526 bool findNonSelfReference(unsigned VR) const;
527 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
528 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
529 unsigned distance(const MachineBasicBlock *FromB,
530 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
531 PairMapType &M) const;
532 unsigned distance(MachineBasicBlock::const_iterator FromI,
533 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
534 PairMapType &M) const;
535 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
536 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
537 void findRemovableRegisters(unsigned VR, IFRecord IF,
538 RegisterSet &RMs) const;
539 void computeRemovableRegisters();
540
541 void pruneEmptyLists();
542 void pruneCoveredSets(unsigned VR);
543 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
544 void pruneRegCopies(unsigned VR);
545 void pruneCandidates();
546 void selectCandidates();
547 bool generateInserts();
548
549 bool removeDeadCode(MachineDomTreeNode *N);
550
551 // IFRecord coupled with a set of potentially removable registers:
552 using IFListType = std::vector<IFRecordWithRegSet>;
553 using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
554
555 void dump_map() const;
556
557 const HexagonInstrInfo *HII = nullptr;
558 const HexagonRegisterInfo *HRI = nullptr;
559
560 MachineFunction *MFN;
561 MachineRegisterInfo *MRI;
562 MachineDominatorTree *MDT;
563 CellMapShadow *CMS;
564
565 RegisterOrdering BaseOrd;
566 RegisterOrdering CellOrd;
567 IFMapType IFMap;
568 };
569
570} // end anonymous namespace
571
572char HexagonGenInsert::ID = 0;
573
574void HexagonGenInsert::dump_map() const {
575 for (const auto &I : IFMap) {
576 dbgs() << " " << printReg(I.first, HRI) << ":\n";
577 const IFListType &LL = I.second;
578 for (const auto &J : LL)
579 dbgs() << " " << PrintIFR(J.first, HRI) << ", "
580 << PrintRegSet(J.second, HRI) << '\n';
581 }
582}
583
584void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
585 unsigned Index = 0;
586
587 for (const MachineBasicBlock &B : *MFN) {
588 if (!CMS->BT.reached(&B))
589 continue;
590
591 for (const MachineInstr &MI : B) {
592 for (const MachineOperand &MO : MI.operands()) {
593 if (MO.isReg() && MO.isDef()) {
594 Register R = MO.getReg();
595 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
596 if (R.isVirtual())
597 RO.insert(std::make_pair(R, Index++));
598 }
599 }
600 }
601 }
602 // Since some virtual registers may have had their def and uses eliminated,
603 // they are no longer referenced in the code, and so they will not appear
604 // in the map.
605}
606
607void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
608 RegisterOrdering &RO) const {
609 // Create a vector of all virtual registers (collect them from the base
610 // ordering RB), and then sort it using the RegisterCell comparator.
611 BitValueOrdering BVO(RB);
612 RegisterCellLexCompare LexCmp(BVO, *CMS);
613
614 using SortableVectorType = std::vector<unsigned>;
615
616 SortableVectorType VRs;
617 for (auto &I : RB)
618 VRs.push_back(I.first);
619 llvm::sort(VRs, LexCmp);
620 // Transfer the results to the outgoing register ordering.
621 for (unsigned i = 0, n = VRs.size(); i < n; ++i)
622 RO.insert(std::make_pair(VRs[i], i));
623}
624
625inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
626 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
627}
628
629bool HexagonGenInsert::isConstant(unsigned VR) const {
630 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
631 uint16_t W = RC.width();
632 for (uint16_t i = 0; i < W; ++i) {
633 const BitTracker::BitValue &BV = RC[i];
634 if (BV.is(0) || BV.is(1))
635 continue;
636 return false;
637 }
638 return true;
639}
640
641bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
642 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
643 uint16_t W = RC.width();
644 if (W > 64)
645 return false;
646 uint64_t V = 0, B = 1;
647 for (uint16_t i = 0; i < W; ++i) {
648 const BitTracker::BitValue &BV = RC[i];
649 if (BV.is(1))
650 V |= B;
651 else if (!BV.is(0))
652 return false;
653 B <<= 1;
654 }
655
656 // For 32-bit registers, consider: Rd = #s16.
657 if (W == 32)
658 return isInt<16>(V);
659
660 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
661 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
662}
663
664bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
665 unsigned InsR, uint16_t L, uint16_t S) const {
666 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
667 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
668 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
669 // Only integet (32-/64-bit) register classes.
670 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
671 return false;
672 // The "source" register must be of the same class as DstR.
673 if (DstRC != SrcRC)
674 return false;
675 if (DstRC == InsRC)
676 return true;
677 // A 64-bit register can only be generated from other 64-bit registers.
678 if (DstRC == &Hexagon::DoubleRegsRegClass)
679 return false;
680 // Otherwise, the L and S cannot span 32-bit word boundary.
681 if (S < 32 && S+L > 32)
682 return false;
683 return true;
684}
685
686bool HexagonGenInsert::findSelfReference(unsigned VR) const {
687 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
688 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
689 const BitTracker::BitValue &V = RC[i];
690 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
691 return true;
692 }
693 return false;
694}
695
696bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
697 BitTracker::RegisterCell RC = CMS->lookup(VR);
698 for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
699 const BitTracker::BitValue &V = RC[i];
700 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
701 return true;
702 }
703 return false;
704}
705
706void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
707 RegisterSet &Defs) const {
708 for (const MachineOperand &MO : MI->operands()) {
709 if (!MO.isReg() || !MO.isDef())
710 continue;
711 Register R = MO.getReg();
712 if (!R.isVirtual())
713 continue;
714 Defs.insert(R);
715 }
716}
717
718void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
719 RegisterSet &Uses) const {
720 for (const MachineOperand &MO : MI->operands()) {
721 if (!MO.isReg() || !MO.isUse())
722 continue;
723 Register R = MO.getReg();
724 if (!R.isVirtual())
725 continue;
726 Uses.insert(R);
727 }
728}
729
730unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
731 const MachineBasicBlock *ToB, const UnsignedMap &RPO,
732 PairMapType &M) const {
733 // Forward distance from the end of a block to the beginning of it does
734 // not make sense. This function should not be called with FromB == ToB.
735 assert(FromB != ToB);
736
737 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
738 // If we have already computed it, return the cached result.
739 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
740 if (F != M.end())
741 return F->second;
742 unsigned ToRPO = RPO.lookup(ToN);
743
744 unsigned MaxD = 0;
745
746 for (const MachineBasicBlock *PB : ToB->predecessors()) {
747 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
748 // along that path will be 0, and we don't need to do any calculations
749 // on it.
750 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
751 continue;
752 unsigned D = PB->size() + distance(FromB, PB, RPO, M);
753 if (D > MaxD)
754 MaxD = D;
755 }
756
757 // Memoize the result for later lookup.
758 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
759 return MaxD;
760}
761
762unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
763 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
764 PairMapType &M) const {
765 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
766 if (FB == TB)
767 return std::distance(FromI, ToI);
768 unsigned D1 = std::distance(TB->begin(), ToI);
769 unsigned D2 = distance(FB, TB, RPO, M);
770 unsigned D3 = std::distance(FromI, FB->end());
771 return D1+D2+D3;
772}
773
774bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
775 OrderedRegisterList &AVs) {
776 if (isDebug()) {
777 dbgs() << __func__ << ": " << printReg(VR, HRI)
778 << " AVs: " << PrintORL(AVs, HRI) << "\n";
779 }
780 if (AVs.size() == 0)
781 return false;
782
783 using iterator = OrderedRegisterList::iterator;
784
785 BitValueOrdering BVO(BaseOrd);
786 const BitTracker::RegisterCell &RC = CMS->lookup(VR);
787 uint16_t W = RC.width();
788
789 using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
790 using RSListType = std::vector<RSRecord>;
791 // Have a map, with key being the matching prefix length, and the value
792 // being the list of pairs (R,S), where R's prefix matches VR at S.
793 // (DenseMap<uint16_t,RSListType> fails to instantiate.)
794 using LRSMapType = DenseMap<unsigned, RSListType>;
795 LRSMapType LM;
796
797 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
798 // and find matching prefixes from AVs with the rotated RC. Such a prefix
799 // would match a string of bits (of length L) in RC starting at S.
800 for (uint16_t S = 0; S < W; ++S) {
801 iterator B = AVs.begin(), E = AVs.end();
802 // The registers in AVs are ordered according to the lexical order of
803 // the corresponding register cells. This means that the range of regis-
804 // ters in AVs that match a prefix of length L+1 will be contained in
805 // the range that matches a prefix of length L. This means that we can
806 // keep narrowing the search space as the prefix length goes up. This
807 // helps reduce the overall complexity of the search.
808 uint16_t L;
809 for (L = 0; L < W-S; ++L) {
810 // Compare against VR's bits starting at S, which emulates rotation
811 // of VR by S.
812 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
813 iterator NewB = std::lower_bound(B, E, VR, RCB);
814 iterator NewE = std::upper_bound(NewB, E, VR, RCB);
815 // For the registers that are eliminated from the next range, L is
816 // the longest prefix matching VR at position S (their prefixes
817 // differ from VR at S+L). If L>0, record this information for later
818 // use.
819 if (L > 0) {
820 for (iterator I = B; I != NewB; ++I)
821 LM[L].push_back(std::make_pair(*I, S));
822 for (iterator I = NewE; I != E; ++I)
823 LM[L].push_back(std::make_pair(*I, S));
824 }
825 B = NewB, E = NewE;
826 if (B == E)
827 break;
828 }
829 // Record the final register range. If this range is non-empty, then
830 // L=W-S.
831 assert(B == E || L == W-S);
832 if (B != E) {
833 for (iterator I = B; I != E; ++I)
834 LM[L].push_back(std::make_pair(*I, S));
835 // If B!=E, then we found a range of registers whose prefixes cover the
836 // rest of VR from position S. There is no need to further advance S.
837 break;
838 }
839 }
840
841 if (isDebug()) {
842 dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
843 for (const auto &I : LM) {
844 dbgs() << " L=" << I.first << ':';
845 const RSListType &LL = I.second;
846 for (const auto &J : LL)
847 dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
848 dbgs() << '\n';
849 }
850 }
851
852 bool Recorded = false;
853
854 for (unsigned SrcR : AVs) {
855 int FDi = -1, LDi = -1; // First/last different bit.
856 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
857 uint16_t AW = AC.width();
858 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
859 if (RC[i] == AC[i])
860 continue;
861 if (FDi == -1)
862 FDi = i;
863 LDi = i;
864 }
865 if (FDi == -1)
866 continue; // TODO (future): Record identical registers.
867 // Look for a register whose prefix could patch the range [FD..LD]
868 // where VR and SrcR differ.
869 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
870 uint16_t MinL = LD-FD+1;
871 for (uint16_t L = MinL; L < W; ++L) {
872 LRSMapType::iterator F = LM.find(L);
873 if (F == LM.end())
874 continue;
875 RSListType &LL = F->second;
876 for (const auto &I : LL) {
877 uint16_t S = I.second;
878 // MinL is the minimum length of the prefix. Any length above MinL
879 // allows some flexibility as to where the prefix can start:
880 // given the extra length EL=L-MinL, the prefix must start between
881 // max(0,FD-EL) and FD.
882 if (S > FD) // Starts too late.
883 continue;
884 uint16_t EL = L-MinL;
885 uint16_t LowS = (EL < FD) ? FD-EL : 0;
886 if (S < LowS) // Starts too early.
887 continue;
888 unsigned InsR = I.first;
889 if (!isValidInsertForm(VR, SrcR, InsR, L, S))
890 continue;
891 if (isDebug()) {
892 dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
893 << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
894 << S << ")\n";
895 }
896 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
897 IFMap[VR].push_back(RR);
898 Recorded = true;
899 }
900 }
901 }
902
903 return Recorded;
904}
905
906void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
907 OrderedRegisterList &AVs) {
908 if (isDebug())
909 dbgs() << "visiting block " << printMBBReference(*B) << "\n";
910
911 // First, check if this block is reachable at all. If not, the bit tracker
912 // will not have any information about registers in it.
913 if (!CMS->BT.reached(B))
914 return;
915
916 bool DoConst = OptConst;
917 // Keep a separate set of registers defined in this block, so that we
918 // can remove them from the list of available registers once all DT
919 // successors have been processed.
920 RegisterSet BlockDefs, InsDefs;
921 for (MachineInstr &MI : *B) {
922 // Stop if the map size is too large.
923 if (IFMap.size() >= MaxIFMSize)
924 break;
925
926 InsDefs.clear();
927 getInstrDefs(&MI, InsDefs);
928 // Leave those alone. They are more transparent than "insert".
929 bool Skip = MI.isCopy() || MI.isRegSequence();
930
931 if (!Skip) {
932 // Visit all defined registers, and attempt to find the corresponding
933 // "insert" representations.
934 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
935 // Do not collect registers that are known to be compile-time cons-
936 // tants, unless requested.
937 if (!DoConst && isConstant(VR))
938 continue;
939 // If VR's cell contains a reference to VR, then VR cannot be defined
940 // via "insert". If VR is a constant that can be generated in a single
941 // instruction (without constant extenders), generating it via insert
942 // makes no sense.
943 if (findSelfReference(VR) || isSmallConstant(VR))
944 continue;
945
946 findRecordInsertForms(VR, AVs);
947 // Stop if the map size is too large.
948 if (IFMap.size() >= MaxIFMSize)
949 break;
950 }
951 }
952
953 // Insert the defined registers into the list of available registers
954 // after they have been processed.
955 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
956 AVs.insert(VR);
957 BlockDefs.insert(InsDefs);
958 }
959
960 for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
961 MachineBasicBlock *SB = DTN->getBlock();
962 collectInBlock(SB, AVs);
963 }
964
965 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
966 AVs.remove(VR);
967}
968
969void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
970 RegisterSet &RMs) const {
971 // For a given register VR and a insert form, find the registers that are
972 // used by the current definition of VR, and which would no longer be
973 // needed for it after the definition of VR is replaced with the insert
974 // form. These are the registers that could potentially become dead.
975 RegisterSet Regs[2];
976
977 unsigned S = 0; // Register set selector.
978 Regs[S].insert(VR);
979
980 while (!Regs[S].empty()) {
981 // Breadth-first search.
982 unsigned OtherS = 1-S;
983 Regs[OtherS].clear();
984 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
985 Regs[S].remove(R);
986 if (R == IF.SrcR || R == IF.InsR)
987 continue;
988 // Check if a given register has bits that are references to any other
989 // registers. This is to detect situations where the instruction that
990 // defines register R takes register Q as an operand, but R itself does
991 // not contain any bits from Q. Loads are examples of how this could
992 // happen:
993 // R = load Q
994 // In this case (assuming we do not have any knowledge about the loaded
995 // value), we must not treat R as a "conveyance" of the bits from Q.
996 // (The information in BT about R's bits would have them as constants,
997 // in case of zero-extending loads, or refs to R.)
998 if (!findNonSelfReference(R))
999 continue;
1000 RMs.insert(R);
1001 const MachineInstr *DefI = MRI->getVRegDef(R);
1002 assert(DefI);
1003 // Do not iterate past PHI nodes to avoid infinite loops. This can
1004 // make the final set a bit less accurate, but the removable register
1005 // sets are an approximation anyway.
1006 if (DefI->isPHI())
1007 continue;
1008 getInstrUses(DefI, Regs[OtherS]);
1009 }
1010 S = OtherS;
1011 }
1012 // The register VR is added to the list as a side-effect of the algorithm,
1013 // but it is not "potentially removable". A potentially removable register
1014 // is one that may become unused (dead) after conversion to the insert form
1015 // IF, and obviously VR (or its replacement) will not become dead by apply-
1016 // ing IF.
1017 RMs.remove(VR);
1018}
1019
1020void HexagonGenInsert::computeRemovableRegisters() {
1021 for (auto &I : IFMap) {
1022 IFListType &LL = I.second;
1023 for (auto &J : LL)
1024 findRemovableRegisters(I.first, J.first, J.second);
1025 }
1026}
1027
1028void HexagonGenInsert::pruneEmptyLists() {
1029 // Remove all entries from the map, where the register has no insert forms
1030 // associated with it.
1031 IFMap.remove_if([](const auto &P) { return P.second.empty(); });
1032}
1033
1034void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1035 IFMapType::iterator F = IFMap.find(VR);
1036 assert(F != IFMap.end());
1037 IFListType &LL = F->second;
1038
1039 // First, examine the IF candidates for register VR whose removable-regis-
1040 // ter sets are empty. This means that a given candidate will not help eli-
1041 // minate any registers, but since "insert" is not a constant-extendable
1042 // instruction, using such a candidate may reduce code size if the defini-
1043 // tion of VR is constant-extended.
1044 // If there exists a candidate with a non-empty set, the ones with empty
1045 // sets will not be used and can be removed.
1046 MachineInstr *DefVR = MRI->getVRegDef(VR);
1047 bool DefEx = HII->isConstExtended(*DefVR);
1048 bool HasNE = false;
1049 for (const auto &I : LL) {
1050 if (I.second.empty())
1051 continue;
1052 HasNE = true;
1053 break;
1054 }
1055 if (!DefEx || HasNE) {
1056 // The definition of VR is not constant-extended, or there is a candidate
1057 // with a non-empty set. Remove all candidates with empty sets.
1058 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1059 return IR.second.empty();
1060 };
1061 llvm::erase_if(LL, IsEmpty);
1062 } else {
1063 // The definition of VR is constant-extended, and all candidates have
1064 // empty removable-register sets. Pick the maximum candidate, and remove
1065 // all others. The "maximum" does not have any special meaning here, it
1066 // is only so that the candidate that will remain on the list is selec-
1067 // ted deterministically.
1068 IFRecord MaxIF = LL[0].first;
1069 for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1070 // If LL[MaxI] < LL[i], then MaxI = i.
1071 const IFRecord &IF = LL[i].first;
1072 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1073 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1074 if (M0 > R0)
1075 continue;
1076 if (M0 == R0) {
1077 if (M1 > R1)
1078 continue;
1079 if (M1 == R1) {
1080 if (MaxIF.Wdh > IF.Wdh)
1081 continue;
1082 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1083 continue;
1084 }
1085 }
1086 // MaxIF < IF.
1087 MaxIF = IF;
1088 }
1089 // Remove everything except the maximum candidate. All register sets
1090 // are empty, so no need to preserve anything.
1091 LL.clear();
1092 LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1093 }
1094
1095 // Now, remove those whose sets of potentially removable registers are
1096 // contained in another IF candidate for VR. For example, given these
1097 // candidates for %45,
1098 // %45:
1099 // (%44,%41,#9,#8), { %42 }
1100 // (%43,%41,#9,#8), { %42 %44 }
1101 // remove the first one, since it is contained in the second one.
1102 for (unsigned i = 0, n = LL.size(); i < n; ) {
1103 const RegisterSet &RMi = LL[i].second;
1104 unsigned j = 0;
1105 while (j < n) {
1106 if (j != i && LL[j].second.includes(RMi))
1107 break;
1108 j++;
1109 }
1110 if (j == n) { // RMi not contained in anything else.
1111 i++;
1112 continue;
1113 }
1114 LL.erase(LL.begin()+i);
1115 n = LL.size();
1116 }
1117}
1118
1119void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1120 PairMapType &M) {
1121 IFMapType::iterator F = IFMap.find(VR);
1122 assert(F != IFMap.end());
1123 IFListType &LL = F->second;
1124 unsigned Cutoff = VRegDistCutoff;
1125 const MachineInstr *DefV = MRI->getVRegDef(VR);
1126
1127 for (unsigned i = LL.size(); i > 0; --i) {
1128 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1129 const MachineInstr *DefS = MRI->getVRegDef(SR);
1130 const MachineInstr *DefI = MRI->getVRegDef(IR);
1131 unsigned DSV = distance(DefS, DefV, RPO, M);
1132 if (DSV < Cutoff) {
1133 unsigned DIV = distance(DefI, DefV, RPO, M);
1134 if (DIV < Cutoff)
1135 continue;
1136 }
1137 LL.erase(LL.begin()+(i-1));
1138 }
1139}
1140
1141void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1142 IFMapType::iterator F = IFMap.find(VR);
1143 assert(F != IFMap.end());
1144 IFListType &LL = F->second;
1145
1146 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1147 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1148 };
1149 llvm::erase_if(LL, IsCopy);
1150}
1151
1152void HexagonGenInsert::pruneCandidates() {
1153 // Remove candidates that are not beneficial, regardless of the final
1154 // selection method.
1155 // First, remove candidates whose potentially removable set is a subset
1156 // of another candidate's set.
1157 for (const auto &I : IFMap)
1158 pruneCoveredSets(I.first);
1159
1160 UnsignedMap RPO;
1161
1162 using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
1163
1164 RPOTType RPOT(MFN);
1165 unsigned RPON = 0;
1166 for (const auto &I : RPOT)
1167 RPO[I->getNumber()] = RPON++;
1168
1169 PairMapType Memo; // Memoization map for distance calculation.
1170 // Remove candidates that would use registers defined too far away.
1171 for (const auto &I : IFMap)
1172 pruneUsesTooFar(I.first, RPO, Memo);
1173
1174 pruneEmptyLists();
1175
1176 for (const auto &I : IFMap)
1177 pruneRegCopies(I.first);
1178}
1179
1180namespace {
1181
1182 // Class for comparing IF candidates for registers that have multiple of
1183 // them. The smaller the candidate, according to this ordering, the better.
1184 // First, compare the number of zeros in the associated potentially remova-
1185 // ble register sets. "Zero" indicates that the register is very likely to
1186 // become dead after this transformation.
1187 // Second, compare "averages", i.e. use-count per size. The lower wins.
1188 // After that, it does not really matter which one is smaller. Resolve
1189 // the tie in some deterministic way.
1190 struct IFOrdering {
1191 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1192 : UseC(UC), BaseOrd(BO) {}
1193
1194 bool operator() (const IFRecordWithRegSet &A,
1195 const IFRecordWithRegSet &B) const;
1196
1197 private:
1198 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1199 unsigned &Sum) const;
1200
1201 const UnsignedMap &UseC;
1202 const RegisterOrdering &BaseOrd;
1203 };
1204
1205} // end anonymous namespace
1206
1207bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1208 const IFRecordWithRegSet &B) const {
1209 unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1210 unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1211 stats(A.second, SizeA, ZeroA, SumA);
1212 stats(B.second, SizeB, ZeroB, SumB);
1213
1214 // We will pick the minimum element. The more zeros, the better.
1215 if (ZeroA != ZeroB)
1216 return ZeroA > ZeroB;
1217 // Compare SumA/SizeA with SumB/SizeB, lower is better.
1218 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1219 if (AvgA != AvgB)
1220 return AvgA < AvgB;
1221
1222 // The sets compare identical so far. Resort to comparing the IF records.
1223 // The actual values don't matter, this is only for determinism.
1224 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1225 if (OSA != OSB)
1226 return OSA < OSB;
1227 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1228 if (OIA != OIB)
1229 return OIA < OIB;
1230 if (A.first.Wdh != B.first.Wdh)
1231 return A.first.Wdh < B.first.Wdh;
1232 return A.first.Off < B.first.Off;
1233}
1234
1235void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1236 unsigned &Sum) const {
1237 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1238 UnsignedMap::const_iterator F = UseC.find(R);
1239 assert(F != UseC.end());
1240 unsigned UC = F->second;
1241 if (UC == 0)
1242 Zero++;
1243 Sum += UC;
1244 Size++;
1245 }
1246}
1247
1248void HexagonGenInsert::selectCandidates() {
1249 // Some registers may have multiple valid candidates. Pick the best one
1250 // (or decide not to use any).
1251
1252 // Compute the "removability" measure of R:
1253 // For each potentially removable register R, record the number of regis-
1254 // ters with IF candidates, where R appears in at least one set.
1255 RegisterSet AllRMs;
1256 UnsignedMap UseC, RemC;
1257 IFMapType::iterator End = IFMap.end();
1258
1259 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1260 const IFListType &LL = I->second;
1262 for (const auto &J : LL)
1263 TT.insert(J.second);
1264 for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1265 RemC[R]++;
1266 AllRMs.insert(TT);
1267 }
1268
1269 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1270 using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1271 using InstrSet = SmallPtrSet<const MachineInstr *, 16>;
1272
1273 InstrSet UIs;
1274 // Count as the number of instructions in which R is used, not the
1275 // number of operands.
1276 use_iterator E = MRI->use_nodbg_end();
1277 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1278 UIs.insert(I->getParent());
1279 unsigned C = UIs.size();
1280 // Calculate a measure, which is the number of instructions using R,
1281 // minus the "removability" count computed earlier.
1282 unsigned D = RemC[R];
1283 UseC[R] = (C > D) ? C-D : 0; // doz
1284 }
1285
1286 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1287 if (!SelectAll0 && !SelectHas0)
1288 SelectAll0 = true;
1289
1290 // The smaller the number UseC for a given register R, the "less used"
1291 // R is aside from the opportunities for removal offered by generating
1292 // "insert" instructions.
1293 // Iterate over the IF map, and for those registers that have multiple
1294 // candidates, pick the minimum one according to IFOrdering.
1295 IFOrdering IFO(UseC, BaseOrd);
1296 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1297 IFListType &LL = I->second;
1298 if (LL.empty())
1299 continue;
1300 // Get the minimum element, remember it and clear the list. If the
1301 // element found is adequate, we will put it back on the list, other-
1302 // wise the list will remain empty, and the entry for this register
1303 // will be removed (i.e. this register will not be replaced by insert).
1304 IFListType::iterator MinI = llvm::min_element(LL, IFO);
1305 assert(MinI != LL.end());
1306 IFRecordWithRegSet M = *MinI;
1307 LL.clear();
1308
1309 // We want to make sure that this replacement will have a chance to be
1310 // beneficial, and that means that we want to have indication that some
1311 // register will be removed. The most likely registers to be eliminated
1312 // are the use operands in the definition of I->first. Accept/reject a
1313 // candidate based on how many of its uses it can potentially eliminate.
1314
1315 RegisterSet Us;
1316 const MachineInstr *DefI = MRI->getVRegDef(I->first);
1317 getInstrUses(DefI, Us);
1318 bool Accept = false;
1319
1320 if (SelectAll0) {
1321 bool All0 = true;
1322 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1323 if (UseC[R] == 0)
1324 continue;
1325 All0 = false;
1326 break;
1327 }
1328 Accept = All0;
1329 } else if (SelectHas0) {
1330 bool Has0 = false;
1331 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1332 if (UseC[R] != 0)
1333 continue;
1334 Has0 = true;
1335 break;
1336 }
1337 Accept = Has0;
1338 }
1339 if (Accept)
1340 LL.push_back(M);
1341 }
1342
1343 // Remove candidates that add uses of removable registers, unless the
1344 // removable registers are among replacement candidates.
1345 // Recompute the removable registers, since some candidates may have
1346 // been eliminated.
1347 AllRMs.clear();
1348 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1349 const IFListType &LL = I->second;
1350 if (!LL.empty())
1351 AllRMs.insert(LL[0].second);
1352 }
1353 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1354 IFListType &LL = I->second;
1355 if (LL.empty())
1356 continue;
1357 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1358 if (AllRMs[SR] || AllRMs[IR])
1359 LL.clear();
1360 }
1361
1362 pruneEmptyLists();
1363}
1364
1365bool HexagonGenInsert::generateInserts() {
1366 // Create a new register for each one from IFMap, and store them in the
1367 // map.
1368 UnsignedMap RegMap;
1369 for (auto &I : IFMap) {
1370 unsigned VR = I.first;
1371 const TargetRegisterClass *RC = MRI->getRegClass(VR);
1372 Register NewVR = MRI->createVirtualRegister(RC);
1373 RegMap[VR] = NewVR;
1374 }
1375
1376 // We can generate the "insert" instructions using potentially stale re-
1377 // gisters: SrcR and InsR for a given VR may be among other registers that
1378 // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1379 for (auto &I : IFMap) {
1380 MachineInstr *MI = MRI->getVRegDef(I.first);
1381 MachineBasicBlock &B = *MI->getParent();
1382 DebugLoc DL = MI->getDebugLoc();
1383 unsigned NewR = RegMap[I.first];
1384 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1385 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1386 : HII->get(Hexagon::S2_insertp);
1387 IFRecord IF = I.second[0].first;
1388 unsigned Wdh = IF.Wdh, Off = IF.Off;
1389 unsigned InsS = 0;
1390 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1391 InsS = Hexagon::isub_lo;
1392 if (Off >= 32) {
1393 InsS = Hexagon::isub_hi;
1394 Off -= 32;
1395 }
1396 }
1397 // Advance to the proper location for inserting instructions. This could
1398 // be B.end().
1400 if (MI->isPHI())
1401 At = B.getFirstNonPHI();
1402
1403 BuildMI(B, At, DL, D, NewR)
1404 .addReg(IF.SrcR)
1405 .addReg(IF.InsR, {}, InsS)
1406 .addImm(Wdh)
1407 .addImm(Off);
1408
1409 MRI->clearKillFlags(IF.SrcR);
1410 MRI->clearKillFlags(IF.InsR);
1411 }
1412
1413 for (const auto &I : IFMap) {
1414 MachineInstr *DefI = MRI->getVRegDef(I.first);
1415 MRI->replaceRegWith(I.first, RegMap[I.first]);
1416 DefI->eraseFromParent();
1417 }
1418
1419 return true;
1420}
1421
1422bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1423 bool Changed = false;
1424
1425 for (auto *DTN : children<MachineDomTreeNode*>(N))
1426 Changed |= removeDeadCode(DTN);
1427
1428 MachineBasicBlock *B = N->getBlock();
1429 std::vector<MachineInstr*> Instrs;
1430 for (MachineInstr &MI : llvm::reverse(*B))
1431 Instrs.push_back(&MI);
1432
1433 for (MachineInstr *MI : Instrs) {
1434 unsigned Opc = MI->getOpcode();
1435 // Do not touch lifetime markers. This is why the target-independent DCE
1436 // cannot be used.
1437 if (Opc == TargetOpcode::LIFETIME_START ||
1438 Opc == TargetOpcode::LIFETIME_END)
1439 continue;
1440 bool Store = false;
1441 if (MI->isInlineAsm() || !MI->isSafeToMove(Store))
1442 continue;
1443
1444 bool AllDead = true;
1445 SmallVector<unsigned,2> Regs;
1446 for (const MachineOperand &MO : MI->operands()) {
1447 if (!MO.isReg() || !MO.isDef())
1448 continue;
1449 Register R = MO.getReg();
1450 if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1451 AllDead = false;
1452 break;
1453 }
1454 Regs.push_back(R);
1455 }
1456 if (!AllDead)
1457 continue;
1458
1459 B->erase(MI);
1460 for (unsigned Reg : Regs)
1462 Changed = true;
1463 }
1464
1465 return Changed;
1466}
1467
1468bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1469 if (skipFunction(MF.getFunction()))
1470 return false;
1471
1472 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1473 bool Changed = false;
1474
1475 // Verify: one, but not both.
1477
1478 IFMap.clear();
1479 BaseOrd.clear();
1480 CellOrd.clear();
1481
1482 const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1483 HII = ST.getInstrInfo();
1484 HRI = ST.getRegisterInfo();
1485 MFN = &MF;
1486 MRI = &MF.getRegInfo();
1487 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1488
1489 // Clean up before any further processing, so that dead code does not
1490 // get used in a newly generated "insert" instruction. Have a custom
1491 // version of DCE that preserves lifetime markers. Without it, merging
1492 // of stack objects can fail to recognize and merge disjoint objects
1493 // leading to unnecessary stack growth.
1494 Changed = removeDeadCode(MDT->getRootNode());
1495
1496 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1497 BitTracker BTLoc(HE, MF);
1498 BTLoc.trace(isDebug());
1499 BTLoc.run();
1500 CellMapShadow MS(BTLoc);
1501 CMS = &MS;
1502
1503 buildOrderingMF(BaseOrd);
1504 buildOrderingBT(BaseOrd, CellOrd);
1505
1506 if (isDebug()) {
1507 dbgs() << "Cell ordering:\n";
1508 for (const auto &I : CellOrd) {
1509 unsigned VR = I.first, Pos = I.second;
1510 dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1511 }
1512 }
1513
1514 // Collect candidates for conversion into the insert forms.
1515 MachineBasicBlock *RootB = MDT->getRoot();
1516 OrderedRegisterList AvailR(CellOrd);
1517
1518 const char *const TGName = "hexinsert";
1519 const char *const TGDesc = "Generate Insert Instructions";
1520
1521 {
1522 NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1523 TimingDetail);
1524 collectInBlock(RootB, AvailR);
1525 // Complete the information gathered in IFMap.
1526 computeRemovableRegisters();
1527 }
1528
1529 if (isDebug()) {
1530 dbgs() << "Candidates after collection:\n";
1531 dump_map();
1532 }
1533
1534 if (IFMap.empty())
1535 return Changed;
1536
1537 {
1538 NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1539 pruneCandidates();
1540 }
1541
1542 if (isDebug()) {
1543 dbgs() << "Candidates after pruning:\n";
1544 dump_map();
1545 }
1546
1547 if (IFMap.empty())
1548 return Changed;
1549
1550 {
1551 NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1552 selectCandidates();
1553 }
1554
1555 if (isDebug()) {
1556 dbgs() << "Candidates after selection:\n";
1557 dump_map();
1558 }
1559
1560 // Filter out vregs beyond the cutoff.
1561 if (VRegIndexCutoff.getPosition()) {
1562 unsigned Cutoff = VRegIndexCutoff;
1563 IFMap.remove_if([&](const auto &P) {
1564 return Register(P.first).virtRegIndex() >= Cutoff;
1565 });
1566 }
1567 if (IFMap.empty())
1568 return Changed;
1569
1570 {
1571 NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1572 TimingDetail);
1573 generateInserts();
1574 }
1575
1576 return true;
1577}
1578
1580 return new HexagonGenInsert();
1581}
1582
1583//===----------------------------------------------------------------------===//
1584// Public Constructor Functions
1585//===----------------------------------------------------------------------===//
1586
1587INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1588 "Hexagon generate \"insert\" instructions", false, false)
1590INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1591 "Hexagon generate \"insert\" instructions", false, false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
BitTracker BT
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, ArrayRef< InsnRange > Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
This file defines the DenseMap class.
#define DEBUG_TYPE
This file defines the little GraphTraits<X> template class that should be specialized by classes that...
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden)
static bool isDebug()
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::desc("Maximum size of OrderedRegisterList"))
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::desc("Maximum size of IFMap"))
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::Hidden, cl::desc("Enable detailed timing of insert " "generation"))
static cl::opt< bool > OptTiming("insert-timing", cl::Hidden, cl::desc("Enable timing of insert generation"))
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::desc("Vreg distance cutoff for insert " "generation."))
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden)
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::desc("Vreg# cutoff for insert generation."))
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden)
IRTranslator LLVM IR MI
static bool lookup(const GsymReader &GR, GsymDataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
block placement stats
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
SmallSet< unsigned, 4 > RegisterSet
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:317
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition BitVector.h:510
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
bool any() const
Returns true if any bit is set.
Definition BitVector.h:189
BitVector & operator|=(const BitVector &RHS)
Union of this bitvector with RHS.
Definition BitVector.h:587
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:324
reference operator[](unsigned Idx)
Definition BitVector.h:464
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool isConstExtended(const MachineInstr &MI) const
MachineInstrBundleIterator< const MachineInstr > const_iterator
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
LLVM_ABI void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
static use_nodbg_iterator use_nodbg_end()
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
defusechain_iterator< true, false, true, true, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
SmallSet & operator=(const SmallSet &)=default
const_iterator end() const
Definition SmallSet.h:222
std::pair< const_iterator, bool > insert(const unsigned &V)
Definition SmallSet.h:184
void push_back(const T &Elt)
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ TB
TB - TwoByte - Set if this instruction has a two byte opcode, which starts with a 0x0F byte before th...
initializer< Ty > init(const Ty &Val)
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
BBIterator iterator
Definition BasicBlock.h:87
LLVM_ABI iterator begin() const
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool includes(R1 &&Range1, R2 &&Range2)
Provide wrappers to std::includes which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1991
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2077
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI bool isCurrentDebugType(const char *Type, int Level=0)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition Debug.cpp:81
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
unsigned M1(unsigned Val)
Definition VE.h:377
LLVM_ABI bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition Debug.cpp:68
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionPass * createHexagonGenInsert()
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2051
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
bool is(unsigned T) const
Definition BitTracker.h:206
const RegisterCell & lookup(unsigned Reg) const
Definition BitTracker.h:354
bool reached(const MachineBasicBlock *B) const