LLVM  16.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1 //===- HexagonGenInsert.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
35 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/Timer.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
44 #include <vector>
45 
46 #define DEBUG_TYPE "hexinsert"
47 
48 using namespace llvm;
49 
50 static cl::opt<unsigned>
51  VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52  cl::desc("Vreg# cutoff for insert generation."));
53 // The distance cutoff is selected based on the precheckin-perf results:
54 // cutoffs 20, 25, 35, and 40 are worse than 30.
55 static cl::opt<unsigned>
56  VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57  cl::desc("Vreg distance cutoff for insert "
58  "generation."));
59 
60 // Limit the container sizes for extreme cases where we run out of memory.
61 static cl::opt<unsigned>
62  MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63  cl::desc("Maximum size of OrderedRegisterList"));
64 static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
65  cl::Hidden,
66  cl::desc("Maximum size of IFMap"));
67 
68 static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69  cl::desc("Enable timing of insert generation"));
70 static cl::opt<bool>
71  OptTimingDetail("insert-timing-detail", cl::Hidden,
72  cl::desc("Enable detailed timing of insert "
73  "generation"));
74 
75 static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76 static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77 // Whether to construct constant values via "insert". Could eliminate constant
78 // extenders, but often not practical.
79 static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80 
81 // The preprocessor gets confused when the DEBUG macro is passed larger
82 // chunks of code. Use this function to detect debugging.
83 inline static bool isDebug() {
84 #ifndef NDEBUG
86 #else
87  return false;
88 #endif
89 }
90 
91 namespace {
92 
93  // Set of virtual registers, based on BitVector.
94  struct RegisterSet : private BitVector {
95  RegisterSet() = default;
96  explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97  RegisterSet(const RegisterSet &RS) = default;
98  RegisterSet &operator=(const RegisterSet &RS) = default;
99 
100  using BitVector::clear;
101 
102  unsigned find_first() const {
104  if (First < 0)
105  return 0;
106  return x2v(First);
107  }
108 
109  unsigned find_next(unsigned Prev) const {
110  int Next = BitVector::find_next(v2x(Prev));
111  if (Next < 0)
112  return 0;
113  return x2v(Next);
114  }
115 
116  RegisterSet &insert(unsigned R) {
117  unsigned Idx = v2x(R);
118  ensure(Idx);
119  return static_cast<RegisterSet&>(BitVector::set(Idx));
120  }
121  RegisterSet &remove(unsigned R) {
122  unsigned Idx = v2x(R);
123  if (Idx >= size())
124  return *this;
125  return static_cast<RegisterSet&>(BitVector::reset(Idx));
126  }
127 
128  RegisterSet &insert(const RegisterSet &Rs) {
129  return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130  }
131  RegisterSet &remove(const RegisterSet &Rs) {
132  return static_cast<RegisterSet&>(BitVector::reset(Rs));
133  }
134 
135  reference operator[](unsigned R) {
136  unsigned Idx = v2x(R);
137  ensure(Idx);
138  return BitVector::operator[](Idx);
139  }
140  bool operator[](unsigned R) const {
141  unsigned Idx = v2x(R);
142  assert(Idx < size());
143  return BitVector::operator[](Idx);
144  }
145  bool has(unsigned R) const {
146  unsigned Idx = v2x(R);
147  if (Idx >= size())
148  return false;
149  return BitVector::test(Idx);
150  }
151 
152  bool empty() const {
153  return !BitVector::any();
154  }
155  bool includes(const RegisterSet &Rs) const {
156  // A.BitVector::test(B) <=> A-B != {}
157  return !Rs.BitVector::test(*this);
158  }
159  bool intersects(const RegisterSet &Rs) const {
160  return BitVector::anyCommon(Rs);
161  }
162 
163  private:
164  void ensure(unsigned Idx) {
165  if (size() <= Idx)
166  resize(std::max(Idx+1, 32U));
167  }
168 
169  static inline unsigned v2x(unsigned v) {
170  return Register::virtReg2Index(v);
171  }
172 
173  static inline unsigned x2v(unsigned x) {
174  return Register::index2VirtReg(x);
175  }
176  };
177 
178  struct PrintRegSet {
179  PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
180  : RS(S), TRI(RI) {}
181 
182  friend raw_ostream &operator<< (raw_ostream &OS,
183  const PrintRegSet &P);
184 
185  private:
186  const RegisterSet &RS;
187  const TargetRegisterInfo *TRI;
188  };
189 
190  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
191  OS << '{';
192  for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
193  OS << ' ' << printReg(R, P.TRI);
194  OS << " }";
195  return OS;
196  }
197 
198  // A convenience class to associate unsigned numbers (such as virtual
199  // registers) with unsigned numbers.
200  struct UnsignedMap : public DenseMap<unsigned,unsigned> {
201  UnsignedMap() = default;
202 
203  private:
205  };
206 
207  // A utility to establish an ordering between virtual registers:
208  // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
209  // This is meant as a cache for the ordering of virtual registers defined
210  // by a potentially expensive comparison function, or obtained by a proce-
211  // dure that should not be repeated each time two registers are compared.
212  struct RegisterOrdering : public UnsignedMap {
213  RegisterOrdering() = default;
214 
215  unsigned operator[](unsigned VR) const {
216  const_iterator F = find(VR);
217  assert(F != end());
218  return F->second;
219  }
220 
221  // Add operator(), so that objects of this class can be used as
222  // comparators in std::sort et al.
223  bool operator() (unsigned VR1, unsigned VR2) const {
224  return operator[](VR1) < operator[](VR2);
225  }
226  };
227 
228  // Ordering of bit values. This class does not have operator[], but
229  // is supplies a comparison operator() for use in std:: algorithms.
230  // The order is as follows:
231  // - 0 < 1 < ref
232  // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
233  // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
234  struct BitValueOrdering {
235  BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
236 
237  bool operator() (const BitTracker::BitValue &V1,
238  const BitTracker::BitValue &V2) const;
239 
240  const RegisterOrdering &BaseOrd;
241  };
242 
243 } // end anonymous namespace
244 
245 bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
246  const BitTracker::BitValue &V2) const {
247  if (V1 == V2)
248  return false;
249  // V1==0 => true, V2==0 => false
250  if (V1.is(0) || V2.is(0))
251  return V1.is(0);
252  // Neither of V1,V2 is 0, and V1!=V2.
253  // V2==1 => false, V1==1 => true
254  if (V2.is(1) || V1.is(1))
255  return !V2.is(1);
256  // Both V1,V2 are refs.
257  unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
258  if (Ind1 != Ind2)
259  return Ind1 < Ind2;
260  // If V1.Pos==V2.Pos
261  assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
262  return V1.RefI.Pos < V2.RefI.Pos;
263 }
264 
265 namespace {
266 
267  // Cache for the BitTracker's cell map. Map lookup has a logarithmic
268  // complexity, this class will memoize the lookup results to reduce
269  // the access time for repeated lookups of the same cell.
270  struct CellMapShadow {
271  CellMapShadow(const BitTracker &T) : BT(T) {}
272 
273  const BitTracker::RegisterCell &lookup(unsigned VR) {
274  unsigned RInd = Register::virtReg2Index(VR);
275  // Grow the vector to at least 32 elements.
276  if (RInd >= CVect.size())
277  CVect.resize(std::max(RInd+16, 32U), nullptr);
278  const BitTracker::RegisterCell *CP = CVect[RInd];
279  if (CP == nullptr)
280  CP = CVect[RInd] = &BT.lookup(VR);
281  return *CP;
282  }
283 
284  const BitTracker &BT;
285 
286  private:
287  using CellVectType = std::vector<const BitTracker::RegisterCell *>;
288 
289  CellVectType CVect;
290  };
291 
292  // Comparator class for lexicographic ordering of virtual registers
293  // according to the corresponding BitTracker::RegisterCell objects.
294  struct RegisterCellLexCompare {
295  RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
296  : BitOrd(BO), CM(M) {}
297 
298  bool operator() (unsigned VR1, unsigned VR2) const;
299 
300  private:
301  const BitValueOrdering &BitOrd;
302  CellMapShadow &CM;
303  };
304 
305  // Comparator class for lexicographic ordering of virtual registers
306  // according to the specified bits of the corresponding BitTracker::
307  // RegisterCell objects.
308  // Specifically, this class will be used to compare bit B of a register
309  // cell for a selected virtual register R with bit N of any register
310  // other than R.
311  struct RegisterCellBitCompareSel {
312  RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
313  const BitValueOrdering &BO, CellMapShadow &M)
314  : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
315 
316  bool operator() (unsigned VR1, unsigned VR2) const;
317 
318  private:
319  const unsigned SelR, SelB;
320  const unsigned BitN;
321  const BitValueOrdering &BitOrd;
322  CellMapShadow &CM;
323  };
324 
325 } // end anonymous namespace
326 
327 bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
328  // Ordering of registers, made up from two given orderings:
329  // - the ordering of the register numbers, and
330  // - the ordering of register cells.
331  // Def. R1 < R2 if:
332  // - cell(R1) < cell(R2), or
333  // - cell(R1) == cell(R2), and index(R1) < index(R2).
334  //
335  // For register cells, the ordering is lexicographic, with index 0 being
336  // the most significant.
337  if (VR1 == VR2)
338  return false;
339 
340  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
341  uint16_t W1 = RC1.width(), W2 = RC2.width();
342  for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
343  const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
344  if (V1 != V2)
345  return BitOrd(V1, V2);
346  }
347  // Cells are equal up until the common length.
348  if (W1 != W2)
349  return W1 < W2;
350 
351  return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
352 }
353 
354 bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
355  if (VR1 == VR2)
356  return false;
357  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
358  const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
359  uint16_t W1 = RC1.width(), W2 = RC2.width();
360  uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
361  uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
362  // If Bit1 exceeds the width of VR1, then:
363  // - return false, if at the same time Bit2 exceeds VR2, or
364  // - return true, otherwise.
365  // (I.e. "a bit value that does not exist is less than any bit value
366  // that does exist".)
367  if (W1 <= Bit1)
368  return Bit2 < W2;
369  // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
370  if (W2 <= Bit2)
371  return false;
372 
373  const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
374  if (V1 != V2)
375  return BitOrd(V1, V2);
376  return false;
377 }
378 
379 namespace {
380 
381  class OrderedRegisterList {
382  using ListType = std::vector<unsigned>;
383  const unsigned MaxSize;
384 
385  public:
386  OrderedRegisterList(const RegisterOrdering &RO)
387  : MaxSize(MaxORLSize), Ord(RO) {}
388 
389  void insert(unsigned VR);
390  void remove(unsigned VR);
391 
392  unsigned operator[](unsigned Idx) const {
393  assert(Idx < Seq.size());
394  return Seq[Idx];
395  }
396 
397  unsigned size() const {
398  return Seq.size();
399  }
400 
401  using iterator = ListType::iterator;
402  using const_iterator = ListType::const_iterator;
403 
404  iterator begin() { return Seq.begin(); }
405  iterator end() { return Seq.end(); }
406  const_iterator begin() const { return Seq.begin(); }
407  const_iterator end() const { return Seq.end(); }
408 
409  // Convenience function to convert an iterator to the corresponding index.
410  unsigned idx(iterator It) const { return It-begin(); }
411 
412  private:
413  ListType Seq;
414  const RegisterOrdering &Ord;
415  };
416 
417  struct PrintORL {
418  PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
419  : RL(L), TRI(RI) {}
420 
421  friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
422 
423  private:
424  const OrderedRegisterList &RL;
425  const TargetRegisterInfo *TRI;
426  };
427 
428  raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
429  OS << '(';
430  OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
431  for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
432  if (I != B)
433  OS << ", ";
434  OS << printReg(*I, P.TRI);
435  }
436  OS << ')';
437  return OS;
438  }
439 
440 } // end anonymous namespace
441 
442 void OrderedRegisterList::insert(unsigned VR) {
443  iterator L = llvm::lower_bound(Seq, VR, Ord);
444  if (L == Seq.end())
445  Seq.push_back(VR);
446  else
447  Seq.insert(L, VR);
448 
449  unsigned S = Seq.size();
450  if (S > MaxSize)
451  Seq.resize(MaxSize);
452  assert(Seq.size() <= MaxSize);
453 }
454 
455 void OrderedRegisterList::remove(unsigned VR) {
456  iterator L = llvm::lower_bound(Seq, VR, Ord);
457  if (L != Seq.end())
458  Seq.erase(L);
459 }
460 
461 namespace {
462 
463  // A record of the insert form. The fields correspond to the operands
464  // of the "insert" instruction:
465  // ... = insert(SrcR, InsR, #Wdh, #Off)
466  struct IFRecord {
467  IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
468  : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
469 
470  unsigned SrcR, InsR;
471  uint16_t Wdh, Off;
472  };
473 
474  struct PrintIFR {
475  PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
476  : IFR(R), TRI(RI) {}
477 
478  private:
479  friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
480 
481  const IFRecord &IFR;
482  const TargetRegisterInfo *TRI;
483  };
484 
485  raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
486  unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
487  OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
488  << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
489  return OS;
490  }
491 
492  using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
493 
494 } // end anonymous namespace
495 
496 namespace llvm {
497 
500 
501 } // end namespace llvm
502 
503 namespace {
504 
505  class HexagonGenInsert : public MachineFunctionPass {
506  public:
507  static char ID;
508 
509  HexagonGenInsert() : MachineFunctionPass(ID) {
511  }
512 
513  StringRef getPassName() const override {
514  return "Hexagon generate \"insert\" instructions";
515  }
516 
517  void getAnalysisUsage(AnalysisUsage &AU) const override {
521  }
522 
523  bool runOnMachineFunction(MachineFunction &MF) override;
524 
525  private:
526  using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
527 
528  void buildOrderingMF(RegisterOrdering &RO) const;
529  void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
530  bool isIntClass(const TargetRegisterClass *RC) const;
531  bool isConstant(unsigned VR) const;
532  bool isSmallConstant(unsigned VR) const;
533  bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
534  uint16_t L, uint16_t S) const;
535  bool findSelfReference(unsigned VR) const;
536  bool findNonSelfReference(unsigned VR) const;
537  void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
538  void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
539  unsigned distance(const MachineBasicBlock *FromB,
540  const MachineBasicBlock *ToB, const UnsignedMap &RPO,
541  PairMapType &M) const;
542  unsigned distance(MachineBasicBlock::const_iterator FromI,
543  MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
544  PairMapType &M) const;
545  bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
546  void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
547  void findRemovableRegisters(unsigned VR, IFRecord IF,
548  RegisterSet &RMs) const;
549  void computeRemovableRegisters();
550 
551  void pruneEmptyLists();
552  void pruneCoveredSets(unsigned VR);
553  void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
554  void pruneRegCopies(unsigned VR);
555  void pruneCandidates();
556  void selectCandidates();
557  bool generateInserts();
558 
559  bool removeDeadCode(MachineDomTreeNode *N);
560 
561  // IFRecord coupled with a set of potentially removable registers:
562  using IFListType = std::vector<IFRecordWithRegSet>;
563  using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
564 
565  void dump_map() const;
566 
567  const HexagonInstrInfo *HII = nullptr;
568  const HexagonRegisterInfo *HRI = nullptr;
569 
570  MachineFunction *MFN;
573  CellMapShadow *CMS;
574 
575  RegisterOrdering BaseOrd;
576  RegisterOrdering CellOrd;
577  IFMapType IFMap;
578  };
579 
580 } // end anonymous namespace
581 
582 char HexagonGenInsert::ID = 0;
583 
584 void HexagonGenInsert::dump_map() const {
585  for (const auto &I : IFMap) {
586  dbgs() << " " << printReg(I.first, HRI) << ":\n";
587  const IFListType &LL = I.second;
588  for (const auto &J : LL)
589  dbgs() << " " << PrintIFR(J.first, HRI) << ", "
590  << PrintRegSet(J.second, HRI) << '\n';
591  }
592 }
593 
594 void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
595  unsigned Index = 0;
596 
597  for (const MachineBasicBlock &B : *MFN) {
598  if (!CMS->BT.reached(&B))
599  continue;
600 
601  for (const MachineInstr &MI : B) {
602  for (const MachineOperand &MO : MI.operands()) {
603  if (MO.isReg() && MO.isDef()) {
604  Register R = MO.getReg();
605  assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
606  if (R.isVirtual())
607  RO.insert(std::make_pair(R, Index++));
608  }
609  }
610  }
611  }
612  // Since some virtual registers may have had their def and uses eliminated,
613  // they are no longer referenced in the code, and so they will not appear
614  // in the map.
615 }
616 
617 void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
618  RegisterOrdering &RO) const {
619  // Create a vector of all virtual registers (collect them from the base
620  // ordering RB), and then sort it using the RegisterCell comparator.
621  BitValueOrdering BVO(RB);
622  RegisterCellLexCompare LexCmp(BVO, *CMS);
623 
624  using SortableVectorType = std::vector<unsigned>;
625 
626  SortableVectorType VRs;
627  for (auto &I : RB)
628  VRs.push_back(I.first);
629  llvm::sort(VRs, LexCmp);
630  // Transfer the results to the outgoing register ordering.
631  for (unsigned i = 0, n = VRs.size(); i < n; ++i)
632  RO.insert(std::make_pair(VRs[i], i));
633 }
634 
635 inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
636  return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
637 }
638 
639 bool HexagonGenInsert::isConstant(unsigned VR) const {
640  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
641  uint16_t W = RC.width();
642  for (uint16_t i = 0; i < W; ++i) {
643  const BitTracker::BitValue &BV = RC[i];
644  if (BV.is(0) || BV.is(1))
645  continue;
646  return false;
647  }
648  return true;
649 }
650 
651 bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
652  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
653  uint16_t W = RC.width();
654  if (W > 64)
655  return false;
656  uint64_t V = 0, B = 1;
657  for (uint16_t i = 0; i < W; ++i) {
658  const BitTracker::BitValue &BV = RC[i];
659  if (BV.is(1))
660  V |= B;
661  else if (!BV.is(0))
662  return false;
663  B <<= 1;
664  }
665 
666  // For 32-bit registers, consider: Rd = #s16.
667  if (W == 32)
668  return isInt<16>(V);
669 
670  // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
671  return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
672 }
673 
674 bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
675  unsigned InsR, uint16_t L, uint16_t S) const {
676  const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
677  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
678  const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
679  // Only integet (32-/64-bit) register classes.
680  if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
681  return false;
682  // The "source" register must be of the same class as DstR.
683  if (DstRC != SrcRC)
684  return false;
685  if (DstRC == InsRC)
686  return true;
687  // A 64-bit register can only be generated from other 64-bit registers.
688  if (DstRC == &Hexagon::DoubleRegsRegClass)
689  return false;
690  // Otherwise, the L and S cannot span 32-bit word boundary.
691  if (S < 32 && S+L > 32)
692  return false;
693  return true;
694 }
695 
696 bool HexagonGenInsert::findSelfReference(unsigned VR) const {
697  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
698  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
699  const BitTracker::BitValue &V = RC[i];
700  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
701  return true;
702  }
703  return false;
704 }
705 
706 bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
707  BitTracker::RegisterCell RC = CMS->lookup(VR);
708  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
709  const BitTracker::BitValue &V = RC[i];
710  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
711  return true;
712  }
713  return false;
714 }
715 
716 void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
717  RegisterSet &Defs) const {
718  for (const MachineOperand &MO : MI->operands()) {
719  if (!MO.isReg() || !MO.isDef())
720  continue;
721  Register R = MO.getReg();
722  if (!R.isVirtual())
723  continue;
724  Defs.insert(R);
725  }
726 }
727 
728 void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
729  RegisterSet &Uses) const {
730  for (const MachineOperand &MO : MI->operands()) {
731  if (!MO.isReg() || !MO.isUse())
732  continue;
733  Register R = MO.getReg();
734  if (!R.isVirtual())
735  continue;
736  Uses.insert(R);
737  }
738 }
739 
740 unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
741  const MachineBasicBlock *ToB, const UnsignedMap &RPO,
742  PairMapType &M) const {
743  // Forward distance from the end of a block to the beginning of it does
744  // not make sense. This function should not be called with FromB == ToB.
745  assert(FromB != ToB);
746 
747  unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
748  // If we have already computed it, return the cached result.
749  PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
750  if (F != M.end())
751  return F->second;
752  unsigned ToRPO = RPO.lookup(ToN);
753 
754  unsigned MaxD = 0;
755 
756  for (const MachineBasicBlock *PB : ToB->predecessors()) {
757  // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
758  // along that path will be 0, and we don't need to do any calculations
759  // on it.
760  if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
761  continue;
762  unsigned D = PB->size() + distance(FromB, PB, RPO, M);
763  if (D > MaxD)
764  MaxD = D;
765  }
766 
767  // Memoize the result for later lookup.
768  M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
769  return MaxD;
770 }
771 
772 unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
773  MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
774  PairMapType &M) const {
775  const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
776  if (FB == TB)
777  return std::distance(FromI, ToI);
778  unsigned D1 = std::distance(TB->begin(), ToI);
779  unsigned D2 = distance(FB, TB, RPO, M);
780  unsigned D3 = std::distance(FromI, FB->end());
781  return D1+D2+D3;
782 }
783 
784 bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
785  OrderedRegisterList &AVs) {
786  if (isDebug()) {
787  dbgs() << __func__ << ": " << printReg(VR, HRI)
788  << " AVs: " << PrintORL(AVs, HRI) << "\n";
789  }
790  if (AVs.size() == 0)
791  return false;
792 
793  using iterator = OrderedRegisterList::iterator;
794 
795  BitValueOrdering BVO(BaseOrd);
796  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
797  uint16_t W = RC.width();
798 
799  using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
800  using RSListType = std::vector<RSRecord>;
801  // Have a map, with key being the matching prefix length, and the value
802  // being the list of pairs (R,S), where R's prefix matches VR at S.
803  // (DenseMap<uint16_t,RSListType> fails to instantiate.)
804  using LRSMapType = DenseMap<unsigned, RSListType>;
805  LRSMapType LM;
806 
807  // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
808  // and find matching prefixes from AVs with the rotated RC. Such a prefix
809  // would match a string of bits (of length L) in RC starting at S.
810  for (uint16_t S = 0; S < W; ++S) {
811  iterator B = AVs.begin(), E = AVs.end();
812  // The registers in AVs are ordered according to the lexical order of
813  // the corresponding register cells. This means that the range of regis-
814  // ters in AVs that match a prefix of length L+1 will be contained in
815  // the range that matches a prefix of length L. This means that we can
816  // keep narrowing the search space as the prefix length goes up. This
817  // helps reduce the overall complexity of the search.
818  uint16_t L;
819  for (L = 0; L < W-S; ++L) {
820  // Compare against VR's bits starting at S, which emulates rotation
821  // of VR by S.
822  RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
823  iterator NewB = std::lower_bound(B, E, VR, RCB);
824  iterator NewE = std::upper_bound(NewB, E, VR, RCB);
825  // For the registers that are eliminated from the next range, L is
826  // the longest prefix matching VR at position S (their prefixes
827  // differ from VR at S+L). If L>0, record this information for later
828  // use.
829  if (L > 0) {
830  for (iterator I = B; I != NewB; ++I)
831  LM[L].push_back(std::make_pair(*I, S));
832  for (iterator I = NewE; I != E; ++I)
833  LM[L].push_back(std::make_pair(*I, S));
834  }
835  B = NewB, E = NewE;
836  if (B == E)
837  break;
838  }
839  // Record the final register range. If this range is non-empty, then
840  // L=W-S.
841  assert(B == E || L == W-S);
842  if (B != E) {
843  for (iterator I = B; I != E; ++I)
844  LM[L].push_back(std::make_pair(*I, S));
845  // If B!=E, then we found a range of registers whose prefixes cover the
846  // rest of VR from position S. There is no need to further advance S.
847  break;
848  }
849  }
850 
851  if (isDebug()) {
852  dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
853  for (const auto &I : LM) {
854  dbgs() << " L=" << I.first << ':';
855  const RSListType &LL = I.second;
856  for (const auto &J : LL)
857  dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
858  dbgs() << '\n';
859  }
860  }
861 
862  bool Recorded = false;
863 
864  for (unsigned SrcR : AVs) {
865  int FDi = -1, LDi = -1; // First/last different bit.
866  const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
867  uint16_t AW = AC.width();
868  for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
869  if (RC[i] == AC[i])
870  continue;
871  if (FDi == -1)
872  FDi = i;
873  LDi = i;
874  }
875  if (FDi == -1)
876  continue; // TODO (future): Record identical registers.
877  // Look for a register whose prefix could patch the range [FD..LD]
878  // where VR and SrcR differ.
879  uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
880  uint16_t MinL = LD-FD+1;
881  for (uint16_t L = MinL; L < W; ++L) {
882  LRSMapType::iterator F = LM.find(L);
883  if (F == LM.end())
884  continue;
885  RSListType &LL = F->second;
886  for (const auto &I : LL) {
887  uint16_t S = I.second;
888  // MinL is the minimum length of the prefix. Any length above MinL
889  // allows some flexibility as to where the prefix can start:
890  // given the extra length EL=L-MinL, the prefix must start between
891  // max(0,FD-EL) and FD.
892  if (S > FD) // Starts too late.
893  continue;
894  uint16_t EL = L-MinL;
895  uint16_t LowS = (EL < FD) ? FD-EL : 0;
896  if (S < LowS) // Starts too early.
897  continue;
898  unsigned InsR = I.first;
899  if (!isValidInsertForm(VR, SrcR, InsR, L, S))
900  continue;
901  if (isDebug()) {
902  dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
903  << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
904  << S << ")\n";
905  }
906  IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
907  IFMap[VR].push_back(RR);
908  Recorded = true;
909  }
910  }
911  }
912 
913  return Recorded;
914 }
915 
916 void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
917  OrderedRegisterList &AVs) {
918  if (isDebug())
919  dbgs() << "visiting block " << printMBBReference(*B) << "\n";
920 
921  // First, check if this block is reachable at all. If not, the bit tracker
922  // will not have any information about registers in it.
923  if (!CMS->BT.reached(B))
924  return;
925 
926  bool DoConst = OptConst;
927  // Keep a separate set of registers defined in this block, so that we
928  // can remove them from the list of available registers once all DT
929  // successors have been processed.
930  RegisterSet BlockDefs, InsDefs;
931  for (MachineInstr &MI : *B) {
932  InsDefs.clear();
933  getInstrDefs(&MI, InsDefs);
934  // Leave those alone. They are more transparent than "insert".
935  bool Skip = MI.isCopy() || MI.isRegSequence();
936 
937  if (!Skip) {
938  // Visit all defined registers, and attempt to find the corresponding
939  // "insert" representations.
940  for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
941  // Do not collect registers that are known to be compile-time cons-
942  // tants, unless requested.
943  if (!DoConst && isConstant(VR))
944  continue;
945  // If VR's cell contains a reference to VR, then VR cannot be defined
946  // via "insert". If VR is a constant that can be generated in a single
947  // instruction (without constant extenders), generating it via insert
948  // makes no sense.
949  if (findSelfReference(VR) || isSmallConstant(VR))
950  continue;
951 
952  findRecordInsertForms(VR, AVs);
953  // Stop if the map size is too large.
954  if (IFMap.size() > MaxIFMSize)
955  return;
956  }
957  }
958 
959  // Insert the defined registers into the list of available registers
960  // after they have been processed.
961  for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
962  AVs.insert(VR);
963  BlockDefs.insert(InsDefs);
964  }
965 
966  for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
967  MachineBasicBlock *SB = DTN->getBlock();
968  collectInBlock(SB, AVs);
969  }
970 
971  for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
972  AVs.remove(VR);
973 }
974 
975 void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
976  RegisterSet &RMs) const {
977  // For a given register VR and a insert form, find the registers that are
978  // used by the current definition of VR, and which would no longer be
979  // needed for it after the definition of VR is replaced with the insert
980  // form. These are the registers that could potentially become dead.
981  RegisterSet Regs[2];
982 
983  unsigned S = 0; // Register set selector.
984  Regs[S].insert(VR);
985 
986  while (!Regs[S].empty()) {
987  // Breadth-first search.
988  unsigned OtherS = 1-S;
989  Regs[OtherS].clear();
990  for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
991  Regs[S].remove(R);
992  if (R == IF.SrcR || R == IF.InsR)
993  continue;
994  // Check if a given register has bits that are references to any other
995  // registers. This is to detect situations where the instruction that
996  // defines register R takes register Q as an operand, but R itself does
997  // not contain any bits from Q. Loads are examples of how this could
998  // happen:
999  // R = load Q
1000  // In this case (assuming we do not have any knowledge about the loaded
1001  // value), we must not treat R as a "conveyance" of the bits from Q.
1002  // (The information in BT about R's bits would have them as constants,
1003  // in case of zero-extending loads, or refs to R.)
1004  if (!findNonSelfReference(R))
1005  continue;
1006  RMs.insert(R);
1007  const MachineInstr *DefI = MRI->getVRegDef(R);
1008  assert(DefI);
1009  // Do not iterate past PHI nodes to avoid infinite loops. This can
1010  // make the final set a bit less accurate, but the removable register
1011  // sets are an approximation anyway.
1012  if (DefI->isPHI())
1013  continue;
1014  getInstrUses(DefI, Regs[OtherS]);
1015  }
1016  S = OtherS;
1017  }
1018  // The register VR is added to the list as a side-effect of the algorithm,
1019  // but it is not "potentially removable". A potentially removable register
1020  // is one that may become unused (dead) after conversion to the insert form
1021  // IF, and obviously VR (or its replacement) will not become dead by apply-
1022  // ing IF.
1023  RMs.remove(VR);
1024 }
1025 
1026 void HexagonGenInsert::computeRemovableRegisters() {
1027  for (auto &I : IFMap) {
1028  IFListType &LL = I.second;
1029  for (auto &J : LL)
1030  findRemovableRegisters(I.first, J.first, J.second);
1031  }
1032 }
1033 
1034 void HexagonGenInsert::pruneEmptyLists() {
1035  // Remove all entries from the map, where the register has no insert forms
1036  // associated with it.
1037  using IterListType = SmallVector<IFMapType::iterator, 16>;
1038  IterListType Prune;
1039  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1040  if (I->second.empty())
1041  Prune.push_back(I);
1042  }
1043  for (unsigned i = 0, n = Prune.size(); i < n; ++i)
1044  IFMap.erase(Prune[i]);
1045 }
1046 
1047 void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1048  IFMapType::iterator F = IFMap.find(VR);
1049  assert(F != IFMap.end());
1050  IFListType &LL = F->second;
1051 
1052  // First, examine the IF candidates for register VR whose removable-regis-
1053  // ter sets are empty. This means that a given candidate will not help eli-
1054  // minate any registers, but since "insert" is not a constant-extendable
1055  // instruction, using such a candidate may reduce code size if the defini-
1056  // tion of VR is constant-extended.
1057  // If there exists a candidate with a non-empty set, the ones with empty
1058  // sets will not be used and can be removed.
1059  MachineInstr *DefVR = MRI->getVRegDef(VR);
1060  bool DefEx = HII->isConstExtended(*DefVR);
1061  bool HasNE = false;
1062  for (const auto &I : LL) {
1063  if (I.second.empty())
1064  continue;
1065  HasNE = true;
1066  break;
1067  }
1068  if (!DefEx || HasNE) {
1069  // The definition of VR is not constant-extended, or there is a candidate
1070  // with a non-empty set. Remove all candidates with empty sets.
1071  auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1072  return IR.second.empty();
1073  };
1074  llvm::erase_if(LL, IsEmpty);
1075  } else {
1076  // The definition of VR is constant-extended, and all candidates have
1077  // empty removable-register sets. Pick the maximum candidate, and remove
1078  // all others. The "maximum" does not have any special meaning here, it
1079  // is only so that the candidate that will remain on the list is selec-
1080  // ted deterministically.
1081  IFRecord MaxIF = LL[0].first;
1082  for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1083  // If LL[MaxI] < LL[i], then MaxI = i.
1084  const IFRecord &IF = LL[i].first;
1085  unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1086  unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1087  if (M0 > R0)
1088  continue;
1089  if (M0 == R0) {
1090  if (M1 > R1)
1091  continue;
1092  if (M1 == R1) {
1093  if (MaxIF.Wdh > IF.Wdh)
1094  continue;
1095  if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1096  continue;
1097  }
1098  }
1099  // MaxIF < IF.
1100  MaxIF = IF;
1101  }
1102  // Remove everything except the maximum candidate. All register sets
1103  // are empty, so no need to preserve anything.
1104  LL.clear();
1105  LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1106  }
1107 
1108  // Now, remove those whose sets of potentially removable registers are
1109  // contained in another IF candidate for VR. For example, given these
1110  // candidates for %45,
1111  // %45:
1112  // (%44,%41,#9,#8), { %42 }
1113  // (%43,%41,#9,#8), { %42 %44 }
1114  // remove the first one, since it is contained in the second one.
1115  for (unsigned i = 0, n = LL.size(); i < n; ) {
1116  const RegisterSet &RMi = LL[i].second;
1117  unsigned j = 0;
1118  while (j < n) {
1119  if (j != i && LL[j].second.includes(RMi))
1120  break;
1121  j++;
1122  }
1123  if (j == n) { // RMi not contained in anything else.
1124  i++;
1125  continue;
1126  }
1127  LL.erase(LL.begin()+i);
1128  n = LL.size();
1129  }
1130 }
1131 
1132 void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1133  PairMapType &M) {
1134  IFMapType::iterator F = IFMap.find(VR);
1135  assert(F != IFMap.end());
1136  IFListType &LL = F->second;
1137  unsigned Cutoff = VRegDistCutoff;
1138  const MachineInstr *DefV = MRI->getVRegDef(VR);
1139 
1140  for (unsigned i = LL.size(); i > 0; --i) {
1141  unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1142  const MachineInstr *DefS = MRI->getVRegDef(SR);
1143  const MachineInstr *DefI = MRI->getVRegDef(IR);
1144  unsigned DSV = distance(DefS, DefV, RPO, M);
1145  if (DSV < Cutoff) {
1146  unsigned DIV = distance(DefI, DefV, RPO, M);
1147  if (DIV < Cutoff)
1148  continue;
1149  }
1150  LL.erase(LL.begin()+(i-1));
1151  }
1152 }
1153 
1154 void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1155  IFMapType::iterator F = IFMap.find(VR);
1156  assert(F != IFMap.end());
1157  IFListType &LL = F->second;
1158 
1159  auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1160  return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1161  };
1162  llvm::erase_if(LL, IsCopy);
1163 }
1164 
1165 void HexagonGenInsert::pruneCandidates() {
1166  // Remove candidates that are not beneficial, regardless of the final
1167  // selection method.
1168  // First, remove candidates whose potentially removable set is a subset
1169  // of another candidate's set.
1170  for (const auto &I : IFMap)
1171  pruneCoveredSets(I.first);
1172 
1173  UnsignedMap RPO;
1174 
1176 
1177  RPOTType RPOT(MFN);
1178  unsigned RPON = 0;
1179  for (const auto &I : RPOT)
1180  RPO[I->getNumber()] = RPON++;
1181 
1182  PairMapType Memo; // Memoization map for distance calculation.
1183  // Remove candidates that would use registers defined too far away.
1184  for (const auto &I : IFMap)
1185  pruneUsesTooFar(I.first, RPO, Memo);
1186 
1187  pruneEmptyLists();
1188 
1189  for (const auto &I : IFMap)
1190  pruneRegCopies(I.first);
1191 }
1192 
1193 namespace {
1194 
1195  // Class for comparing IF candidates for registers that have multiple of
1196  // them. The smaller the candidate, according to this ordering, the better.
1197  // First, compare the number of zeros in the associated potentially remova-
1198  // ble register sets. "Zero" indicates that the register is very likely to
1199  // become dead after this transformation.
1200  // Second, compare "averages", i.e. use-count per size. The lower wins.
1201  // After that, it does not really matter which one is smaller. Resolve
1202  // the tie in some deterministic way.
1203  struct IFOrdering {
1204  IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1205  : UseC(UC), BaseOrd(BO) {}
1206 
1207  bool operator() (const IFRecordWithRegSet &A,
1208  const IFRecordWithRegSet &B) const;
1209 
1210  private:
1211  void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1212  unsigned &Sum) const;
1213 
1214  const UnsignedMap &UseC;
1215  const RegisterOrdering &BaseOrd;
1216  };
1217 
1218 } // end anonymous namespace
1219 
1220 bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1221  const IFRecordWithRegSet &B) const {
1222  unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1223  unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1224  stats(A.second, SizeA, ZeroA, SumA);
1225  stats(B.second, SizeB, ZeroB, SumB);
1226 
1227  // We will pick the minimum element. The more zeros, the better.
1228  if (ZeroA != ZeroB)
1229  return ZeroA > ZeroB;
1230  // Compare SumA/SizeA with SumB/SizeB, lower is better.
1231  uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1232  if (AvgA != AvgB)
1233  return AvgA < AvgB;
1234 
1235  // The sets compare identical so far. Resort to comparing the IF records.
1236  // The actual values don't matter, this is only for determinism.
1237  unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1238  if (OSA != OSB)
1239  return OSA < OSB;
1240  unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1241  if (OIA != OIB)
1242  return OIA < OIB;
1243  if (A.first.Wdh != B.first.Wdh)
1244  return A.first.Wdh < B.first.Wdh;
1245  return A.first.Off < B.first.Off;
1246 }
1247 
1248 void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1249  unsigned &Sum) const {
1250  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1251  UnsignedMap::const_iterator F = UseC.find(R);
1252  assert(F != UseC.end());
1253  unsigned UC = F->second;
1254  if (UC == 0)
1255  Zero++;
1256  Sum += UC;
1257  Size++;
1258  }
1259 }
1260 
1261 void HexagonGenInsert::selectCandidates() {
1262  // Some registers may have multiple valid candidates. Pick the best one
1263  // (or decide not to use any).
1264 
1265  // Compute the "removability" measure of R:
1266  // For each potentially removable register R, record the number of regis-
1267  // ters with IF candidates, where R appears in at least one set.
1268  RegisterSet AllRMs;
1269  UnsignedMap UseC, RemC;
1270  IFMapType::iterator End = IFMap.end();
1271 
1272  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1273  const IFListType &LL = I->second;
1274  RegisterSet TT;
1275  for (const auto &J : LL)
1276  TT.insert(J.second);
1277  for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1278  RemC[R]++;
1279  AllRMs.insert(TT);
1280  }
1281 
1282  for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1283  using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1284  using InstrSet = SmallSet<const MachineInstr *, 16>;
1285 
1286  InstrSet UIs;
1287  // Count as the number of instructions in which R is used, not the
1288  // number of operands.
1289  use_iterator E = MRI->use_nodbg_end();
1290  for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1291  UIs.insert(I->getParent());
1292  unsigned C = UIs.size();
1293  // Calculate a measure, which is the number of instructions using R,
1294  // minus the "removability" count computed earlier.
1295  unsigned D = RemC[R];
1296  UseC[R] = (C > D) ? C-D : 0; // doz
1297  }
1298 
1299  bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1300  if (!SelectAll0 && !SelectHas0)
1301  SelectAll0 = true;
1302 
1303  // The smaller the number UseC for a given register R, the "less used"
1304  // R is aside from the opportunities for removal offered by generating
1305  // "insert" instructions.
1306  // Iterate over the IF map, and for those registers that have multiple
1307  // candidates, pick the minimum one according to IFOrdering.
1308  IFOrdering IFO(UseC, BaseOrd);
1309  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1310  IFListType &LL = I->second;
1311  if (LL.empty())
1312  continue;
1313  // Get the minimum element, remember it and clear the list. If the
1314  // element found is adequate, we will put it back on the list, other-
1315  // wise the list will remain empty, and the entry for this register
1316  // will be removed (i.e. this register will not be replaced by insert).
1317  IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
1318  assert(MinI != LL.end());
1319  IFRecordWithRegSet M = *MinI;
1320  LL.clear();
1321 
1322  // We want to make sure that this replacement will have a chance to be
1323  // beneficial, and that means that we want to have indication that some
1324  // register will be removed. The most likely registers to be eliminated
1325  // are the use operands in the definition of I->first. Accept/reject a
1326  // candidate based on how many of its uses it can potentially eliminate.
1327 
1328  RegisterSet Us;
1329  const MachineInstr *DefI = MRI->getVRegDef(I->first);
1330  getInstrUses(DefI, Us);
1331  bool Accept = false;
1332 
1333  if (SelectAll0) {
1334  bool All0 = true;
1335  for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1336  if (UseC[R] == 0)
1337  continue;
1338  All0 = false;
1339  break;
1340  }
1341  Accept = All0;
1342  } else if (SelectHas0) {
1343  bool Has0 = false;
1344  for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1345  if (UseC[R] != 0)
1346  continue;
1347  Has0 = true;
1348  break;
1349  }
1350  Accept = Has0;
1351  }
1352  if (Accept)
1353  LL.push_back(M);
1354  }
1355 
1356  // Remove candidates that add uses of removable registers, unless the
1357  // removable registers are among replacement candidates.
1358  // Recompute the removable registers, since some candidates may have
1359  // been eliminated.
1360  AllRMs.clear();
1361  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1362  const IFListType &LL = I->second;
1363  if (!LL.empty())
1364  AllRMs.insert(LL[0].second);
1365  }
1366  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1367  IFListType &LL = I->second;
1368  if (LL.empty())
1369  continue;
1370  unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1371  if (AllRMs[SR] || AllRMs[IR])
1372  LL.clear();
1373  }
1374 
1375  pruneEmptyLists();
1376 }
1377 
1378 bool HexagonGenInsert::generateInserts() {
1379  // Create a new register for each one from IFMap, and store them in the
1380  // map.
1381  UnsignedMap RegMap;
1382  for (auto &I : IFMap) {
1383  unsigned VR = I.first;
1384  const TargetRegisterClass *RC = MRI->getRegClass(VR);
1385  Register NewVR = MRI->createVirtualRegister(RC);
1386  RegMap[VR] = NewVR;
1387  }
1388 
1389  // We can generate the "insert" instructions using potentially stale re-
1390  // gisters: SrcR and InsR for a given VR may be among other registers that
1391  // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1392  for (auto &I : IFMap) {
1393  MachineInstr *MI = MRI->getVRegDef(I.first);
1394  MachineBasicBlock &B = *MI->getParent();
1395  DebugLoc DL = MI->getDebugLoc();
1396  unsigned NewR = RegMap[I.first];
1397  bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1398  const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1399  : HII->get(Hexagon::S2_insertp);
1400  IFRecord IF = I.second[0].first;
1401  unsigned Wdh = IF.Wdh, Off = IF.Off;
1402  unsigned InsS = 0;
1403  if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1404  InsS = Hexagon::isub_lo;
1405  if (Off >= 32) {
1406  InsS = Hexagon::isub_hi;
1407  Off -= 32;
1408  }
1409  }
1410  // Advance to the proper location for inserting instructions. This could
1411  // be B.end().
1413  if (MI->isPHI())
1414  At = B.getFirstNonPHI();
1415 
1416  BuildMI(B, At, DL, D, NewR)
1417  .addReg(IF.SrcR)
1418  .addReg(IF.InsR, 0, InsS)
1419  .addImm(Wdh)
1420  .addImm(Off);
1421 
1422  MRI->clearKillFlags(IF.SrcR);
1423  MRI->clearKillFlags(IF.InsR);
1424  }
1425 
1426  for (const auto &I : IFMap) {
1427  MachineInstr *DefI = MRI->getVRegDef(I.first);
1428  MRI->replaceRegWith(I.first, RegMap[I.first]);
1429  DefI->eraseFromParent();
1430  }
1431 
1432  return true;
1433 }
1434 
1435 bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1436  bool Changed = false;
1437 
1438  for (auto *DTN : children<MachineDomTreeNode*>(N))
1439  Changed |= removeDeadCode(DTN);
1440 
1441  MachineBasicBlock *B = N->getBlock();
1442  std::vector<MachineInstr*> Instrs;
1443  for (MachineInstr &MI : llvm::reverse(*B))
1444  Instrs.push_back(&MI);
1445 
1446  for (MachineInstr *MI : Instrs) {
1447  unsigned Opc = MI->getOpcode();
1448  // Do not touch lifetime markers. This is why the target-independent DCE
1449  // cannot be used.
1450  if (Opc == TargetOpcode::LIFETIME_START ||
1452  continue;
1453  bool Store = false;
1454  if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
1455  continue;
1456 
1457  bool AllDead = true;
1459  for (const MachineOperand &MO : MI->operands()) {
1460  if (!MO.isReg() || !MO.isDef())
1461  continue;
1462  Register R = MO.getReg();
1463  if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1464  AllDead = false;
1465  break;
1466  }
1467  Regs.push_back(R);
1468  }
1469  if (!AllDead)
1470  continue;
1471 
1472  B->erase(MI);
1473  for (unsigned I = 0, N = Regs.size(); I != N; ++I)
1475  Changed = true;
1476  }
1477 
1478  return Changed;
1479 }
1480 
1481 bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1482  if (skipFunction(MF.getFunction()))
1483  return false;
1484 
1485  bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1486  bool Changed = false;
1487 
1488  // Verify: one, but not both.
1490 
1491  IFMap.clear();
1492  BaseOrd.clear();
1493  CellOrd.clear();
1494 
1495  const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1496  HII = ST.getInstrInfo();
1497  HRI = ST.getRegisterInfo();
1498  MFN = &MF;
1499  MRI = &MF.getRegInfo();
1500  MDT = &getAnalysis<MachineDominatorTree>();
1501 
1502  // Clean up before any further processing, so that dead code does not
1503  // get used in a newly generated "insert" instruction. Have a custom
1504  // version of DCE that preserves lifetime markers. Without it, merging
1505  // of stack objects can fail to recognize and merge disjoint objects
1506  // leading to unnecessary stack growth.
1507  Changed = removeDeadCode(MDT->getRootNode());
1508 
1509  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1510  BitTracker BTLoc(HE, MF);
1511  BTLoc.trace(isDebug());
1512  BTLoc.run();
1513  CellMapShadow MS(BTLoc);
1514  CMS = &MS;
1515 
1516  buildOrderingMF(BaseOrd);
1517  buildOrderingBT(BaseOrd, CellOrd);
1518 
1519  if (isDebug()) {
1520  dbgs() << "Cell ordering:\n";
1521  for (const auto &I : CellOrd) {
1522  unsigned VR = I.first, Pos = I.second;
1523  dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1524  }
1525  }
1526 
1527  // Collect candidates for conversion into the insert forms.
1528  MachineBasicBlock *RootB = MDT->getRoot();
1529  OrderedRegisterList AvailR(CellOrd);
1530 
1531  const char *const TGName = "hexinsert";
1532  const char *const TGDesc = "Generate Insert Instructions";
1533 
1534  {
1535  NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1536  TimingDetail);
1537  collectInBlock(RootB, AvailR);
1538  // Complete the information gathered in IFMap.
1539  computeRemovableRegisters();
1540  }
1541 
1542  if (isDebug()) {
1543  dbgs() << "Candidates after collection:\n";
1544  dump_map();
1545  }
1546 
1547  if (IFMap.empty())
1548  return Changed;
1549 
1550  {
1551  NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1552  pruneCandidates();
1553  }
1554 
1555  if (isDebug()) {
1556  dbgs() << "Candidates after pruning:\n";
1557  dump_map();
1558  }
1559 
1560  if (IFMap.empty())
1561  return Changed;
1562 
1563  {
1564  NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1565  selectCandidates();
1566  }
1567 
1568  if (isDebug()) {
1569  dbgs() << "Candidates after selection:\n";
1570  dump_map();
1571  }
1572 
1573  // Filter out vregs beyond the cutoff.
1574  if (VRegIndexCutoff.getPosition()) {
1575  unsigned Cutoff = VRegIndexCutoff;
1576 
1577  using IterListType = SmallVector<IFMapType::iterator, 16>;
1578 
1579  IterListType Out;
1580  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1581  unsigned Idx = Register::virtReg2Index(I->first);
1582  if (Idx >= Cutoff)
1583  Out.push_back(I);
1584  }
1585  for (unsigned i = 0, n = Out.size(); i < n; ++i)
1586  IFMap.erase(Out[i]);
1587  }
1588  if (IFMap.empty())
1589  return Changed;
1590 
1591  {
1592  NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1593  TimingDetail);
1594  generateInserts();
1595  }
1596 
1597  return true;
1598 }
1599 
1601  return new HexagonGenInsert();
1602 }
1603 
1604 //===----------------------------------------------------------------------===//
1605 // Public Constructor Functions
1606 //===----------------------------------------------------------------------===//
1607 
1608 INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1609  "Hexagon generate \"insert\" instructions", false, false)
1611 INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1612  "Hexagon generate \"insert\" instructions", false, false)
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::BitVector::operator[]
reference operator[](unsigned Idx)
Definition: BitVector.h:437
llvm::MachineRegisterInfo::markUsesInDebugValueAsUndef
void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
Definition: MachineRegisterInfo.cpp:534
OptSelectHas0
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden)
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::upper_bound
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1740
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
MaxIFMSize
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::desc("Maximum size of IFMap"))
llvm::ARM::PredBlockMask::TT
@ TT
MaxORLSize
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::desc("Maximum size of OrderedRegisterList"))
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1225
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::BitTracker::lookup
const RegisterCell & lookup(unsigned Reg) const
Definition: BitTracker.h:357
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
T
llvm::BitTracker::BitValue::RefI
BitRef RefI
Definition: BitTracker.h:192
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1727
StringRef.h
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1224
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:328
llvm::BitVector::operator|=
BitVector & operator|=(const BitVector &RHS)
Definition: BitVector.h:558
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip
@ Skip
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:116
llvm::AMDGPUISD::IF
@ IF
Definition: AMDGPUISelLowering.h:358
HexagonSubtarget.h
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1802
intersects
static Optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Definition: DbgEntityHistoryCalculator.cpp:114
RPO
rpo function Deduce function attributes in RPO
Definition: FunctionAttrs.cpp:1974
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::BitTracker::BitValue::Ref
@ Ref
Definition: BitTracker.h:160
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
VRegDistCutoff
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::desc("Vreg distance cutoff for insert " "generation."))
DenseMap.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
STLExtras.h
BaseType
RegisterSet
SmallSet< unsigned, 4 > RegisterSet
Definition: Thumb2ITBlockPass.cpp:39
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:336
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:526
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
OptSelectAll0
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden)
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GraphTraits.h
CommandLine.h
BitTracker.h
llvm::BitTracker::BitValue::is
bool is(unsigned T) const
Definition: BitTracker.h:209
llvm::initializeHexagonGenInsertPass
void initializeHexagonGenInsertPass(PassRegistry &)
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::BitTracker::RegisterCell::width
uint16_t width() const
Definition: BitTracker.h:303
OptTiming
static cl::opt< bool > OptTiming("insert-timing", cl::Hidden, cl::desc("Enable timing of insert generation"))
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
llvm::BitTracker
Definition: BitTracker.h:35
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::HexagonEvaluator
Definition: HexagonBitTracker.h:25
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:369
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
BitVector.h
llvm::MachineRegisterInfo::use_nodbg_end
static use_nodbg_iterator use_nodbg_end()
Definition: MachineRegisterInfo.h:529
DebugLoc.h
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
llvm::BitVector
Definition: BitVector.h:75
HexagonInstrInfo.h
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
isDebug
static bool isDebug()
Definition: HexagonGenInsert.cpp:83
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
OptTimingDetail
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::Hidden, cl::desc("Enable detailed timing of insert " "generation"))
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:331
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::createHexagonGenInsert
FunctionPass * createHexagonGenInsert()
Definition: HexagonGenInsert.cpp:1600
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1538
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:433
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
Timer.h
llvm::cl::opt
Definition: CommandLine.h:1399
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::BitTracker::BitRef::Reg
Register Reg
Definition: BitTracker.h:134
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:163
stats
block placement stats
Definition: MachineBlockPlacement.cpp:3655
llvm::MachineRegisterInfo::use_nodbg_iterator
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
Definition: MachineRegisterInfo.h:525
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1610
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2387
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
MachineFunctionPass.h
HexagonRegisterInfo.h
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1300
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::HexagonInstrInfo
Definition: HexagonInstrInfo.h:38
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonGenInsert.cpp:46
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:574
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1571
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
llvm::isCurrentDebugType
bool isCurrentDebugType(const char *Type)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition: Debug.cpp:52
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1115
HexagonBitTracker.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
OptConst
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden)
llvm::DomTreeNodeBase
Base class for the actual dominator tree node.
Definition: LiveIntervalCalc.h:24
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
llvm::BitTracker::RegisterCell
Definition: BitTracker.h:300
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:454
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:256
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
uint16_t
VRegIndexCutoff
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::desc("Vreg# cutoff for insert generation."))
llvm::NamedRegionTimer
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:165
x
TODO unsigned x
Definition: README.txt:10
llvm::BitVector::find_next
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:301
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", "Hexagon generate \"insert\" instructions", false, false) INITIALIZE_PASS_END(HexagonGenInsert
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:385
llvm::BitTracker::BitRef::Pos
uint16_t Pos
Definition: BitTracker.h:135
llvm::DebugFlag
bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition: Debug.cpp:45
PostOrderIterator.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
lookup
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:108
MachineInstrBuilder.h
BT
BitTracker BT
Definition: BitTracker.cpp:73
N
#define N
llvm::BitVector::find_first
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:293
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
generate
We currently generate
Definition: README.txt:597
llvm::BitTracker::BitValue::Type
ValueType Type
Definition: BitTracker.h:191
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
MachineOperand.h
llvm::BitVector::anyCommon
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:482
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::HexagonRegisterInfo
Definition: HexagonRegisterInfo.h:29
llvm::Register::virtReg2Index
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::X86II::TB
@ TB
Definition: X86BaseInfo.h:805
llvm::cl::desc
Definition: CommandLine.h:412
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:370
raw_ostream.h
llvm::BitTracker::BitValue
Definition: BitTracker.h:155
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:684
llvm::MachineInstrBundleIterator< const MachineInstr >
hexinsert
hexinsert
Definition: HexagonGenInsert.cpp:1611
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
MachineDominators.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38