LLVM  14.0.0git
HexagonGenInsert.cpp
Go to the documentation of this file.
1 //===- HexagonGenInsert.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
35 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/Timer.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
44 #include <vector>
45 
46 #define DEBUG_TYPE "hexinsert"
47 
48 using namespace llvm;
49 
50 static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
51  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
52 // The distance cutoff is selected based on the precheckin-perf results:
53 // cutoffs 20, 25, 35, and 40 are worse than 30.
54 static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
55  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
56  "generation."));
57 
58 // Limit the container sizes for extreme cases where we run out of memory.
59 static cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096),
60  cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList"));
61 static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
62  cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap"));
63 
64 static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
65  cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
66 static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
67  cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
68  "generation"));
69 
70 static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
72 static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
74 // Whether to construct constant values via "insert". Could eliminate constant
75 // extenders, but often not practical.
76 static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
78 
79 // The preprocessor gets confused when the DEBUG macro is passed larger
80 // chunks of code. Use this function to detect debugging.
81 inline static bool isDebug() {
82 #ifndef NDEBUG
84 #else
85  return false;
86 #endif
87 }
88 
89 namespace {
90 
91  // Set of virtual registers, based on BitVector.
92  struct RegisterSet : private BitVector {
93  RegisterSet() = default;
94  explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
95  RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
96  RegisterSet &operator=(const RegisterSet &RS) {
97  BitVector::operator=(RS);
98  return *this;
99  }
100 
101  using BitVector::clear;
102 
103  unsigned find_first() const {
105  if (First < 0)
106  return 0;
107  return x2v(First);
108  }
109 
110  unsigned find_next(unsigned Prev) const {
111  int Next = BitVector::find_next(v2x(Prev));
112  if (Next < 0)
113  return 0;
114  return x2v(Next);
115  }
116 
117  RegisterSet &insert(unsigned R) {
118  unsigned Idx = v2x(R);
119  ensure(Idx);
120  return static_cast<RegisterSet&>(BitVector::set(Idx));
121  }
122  RegisterSet &remove(unsigned R) {
123  unsigned Idx = v2x(R);
124  if (Idx >= size())
125  return *this;
126  return static_cast<RegisterSet&>(BitVector::reset(Idx));
127  }
128 
129  RegisterSet &insert(const RegisterSet &Rs) {
130  return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
131  }
132  RegisterSet &remove(const RegisterSet &Rs) {
133  return static_cast<RegisterSet&>(BitVector::reset(Rs));
134  }
135 
136  reference operator[](unsigned R) {
137  unsigned Idx = v2x(R);
138  ensure(Idx);
139  return BitVector::operator[](Idx);
140  }
141  bool operator[](unsigned R) const {
142  unsigned Idx = v2x(R);
143  assert(Idx < size());
144  return BitVector::operator[](Idx);
145  }
146  bool has(unsigned R) const {
147  unsigned Idx = v2x(R);
148  if (Idx >= size())
149  return false;
150  return BitVector::test(Idx);
151  }
152 
153  bool empty() const {
154  return !BitVector::any();
155  }
156  bool includes(const RegisterSet &Rs) const {
157  // A.BitVector::test(B) <=> A-B != {}
158  return !Rs.BitVector::test(*this);
159  }
160  bool intersects(const RegisterSet &Rs) const {
161  return BitVector::anyCommon(Rs);
162  }
163 
164  private:
165  void ensure(unsigned Idx) {
166  if (size() <= Idx)
167  resize(std::max(Idx+1, 32U));
168  }
169 
170  static inline unsigned v2x(unsigned v) {
171  return Register::virtReg2Index(v);
172  }
173 
174  static inline unsigned x2v(unsigned x) {
175  return Register::index2VirtReg(x);
176  }
177  };
178 
179  struct PrintRegSet {
180  PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
181  : RS(S), TRI(RI) {}
182 
183  friend raw_ostream &operator<< (raw_ostream &OS,
184  const PrintRegSet &P);
185 
186  private:
187  const RegisterSet &RS;
188  const TargetRegisterInfo *TRI;
189  };
190 
191  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
192  OS << '{';
193  for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
194  OS << ' ' << printReg(R, P.TRI);
195  OS << " }";
196  return OS;
197  }
198 
199  // A convenience class to associate unsigned numbers (such as virtual
200  // registers) with unsigned numbers.
201  struct UnsignedMap : public DenseMap<unsigned,unsigned> {
202  UnsignedMap() = default;
203 
204  private:
206  };
207 
208  // A utility to establish an ordering between virtual registers:
209  // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
210  // This is meant as a cache for the ordering of virtual registers defined
211  // by a potentially expensive comparison function, or obtained by a proce-
212  // dure that should not be repeated each time two registers are compared.
213  struct RegisterOrdering : public UnsignedMap {
214  RegisterOrdering() = default;
215 
216  unsigned operator[](unsigned VR) const {
217  const_iterator F = find(VR);
218  assert(F != end());
219  return F->second;
220  }
221 
222  // Add operator(), so that objects of this class can be used as
223  // comparators in std::sort et al.
224  bool operator() (unsigned VR1, unsigned VR2) const {
225  return operator[](VR1) < operator[](VR2);
226  }
227  };
228 
229  // Ordering of bit values. This class does not have operator[], but
230  // is supplies a comparison operator() for use in std:: algorithms.
231  // The order is as follows:
232  // - 0 < 1 < ref
233  // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
234  // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
235  struct BitValueOrdering {
236  BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
237 
238  bool operator() (const BitTracker::BitValue &V1,
239  const BitTracker::BitValue &V2) const;
240 
241  const RegisterOrdering &BaseOrd;
242  };
243 
244 } // end anonymous namespace
245 
246 bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
247  const BitTracker::BitValue &V2) const {
248  if (V1 == V2)
249  return false;
250  // V1==0 => true, V2==0 => false
251  if (V1.is(0) || V2.is(0))
252  return V1.is(0);
253  // Neither of V1,V2 is 0, and V1!=V2.
254  // V2==1 => false, V1==1 => true
255  if (V2.is(1) || V1.is(1))
256  return !V2.is(1);
257  // Both V1,V2 are refs.
258  unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
259  if (Ind1 != Ind2)
260  return Ind1 < Ind2;
261  // If V1.Pos==V2.Pos
262  assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
263  return V1.RefI.Pos < V2.RefI.Pos;
264 }
265 
266 namespace {
267 
268  // Cache for the BitTracker's cell map. Map lookup has a logarithmic
269  // complexity, this class will memoize the lookup results to reduce
270  // the access time for repeated lookups of the same cell.
271  struct CellMapShadow {
272  CellMapShadow(const BitTracker &T) : BT(T) {}
273 
274  const BitTracker::RegisterCell &lookup(unsigned VR) {
275  unsigned RInd = Register::virtReg2Index(VR);
276  // Grow the vector to at least 32 elements.
277  if (RInd >= CVect.size())
278  CVect.resize(std::max(RInd+16, 32U), nullptr);
279  const BitTracker::RegisterCell *CP = CVect[RInd];
280  if (CP == nullptr)
281  CP = CVect[RInd] = &BT.lookup(VR);
282  return *CP;
283  }
284 
285  const BitTracker &BT;
286 
287  private:
288  using CellVectType = std::vector<const BitTracker::RegisterCell *>;
289 
290  CellVectType CVect;
291  };
292 
293  // Comparator class for lexicographic ordering of virtual registers
294  // according to the corresponding BitTracker::RegisterCell objects.
295  struct RegisterCellLexCompare {
296  RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
297  : BitOrd(BO), CM(M) {}
298 
299  bool operator() (unsigned VR1, unsigned VR2) const;
300 
301  private:
302  const BitValueOrdering &BitOrd;
303  CellMapShadow &CM;
304  };
305 
306  // Comparator class for lexicographic ordering of virtual registers
307  // according to the specified bits of the corresponding BitTracker::
308  // RegisterCell objects.
309  // Specifically, this class will be used to compare bit B of a register
310  // cell for a selected virtual register R with bit N of any register
311  // other than R.
312  struct RegisterCellBitCompareSel {
313  RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
314  const BitValueOrdering &BO, CellMapShadow &M)
315  : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
316 
317  bool operator() (unsigned VR1, unsigned VR2) const;
318 
319  private:
320  const unsigned SelR, SelB;
321  const unsigned BitN;
322  const BitValueOrdering &BitOrd;
323  CellMapShadow &CM;
324  };
325 
326 } // end anonymous namespace
327 
328 bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
329  // Ordering of registers, made up from two given orderings:
330  // - the ordering of the register numbers, and
331  // - the ordering of register cells.
332  // Def. R1 < R2 if:
333  // - cell(R1) < cell(R2), or
334  // - cell(R1) == cell(R2), and index(R1) < index(R2).
335  //
336  // For register cells, the ordering is lexicographic, with index 0 being
337  // the most significant.
338  if (VR1 == VR2)
339  return false;
340 
341  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
342  uint16_t W1 = RC1.width(), W2 = RC2.width();
343  for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
344  const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
345  if (V1 != V2)
346  return BitOrd(V1, V2);
347  }
348  // Cells are equal up until the common length.
349  if (W1 != W2)
350  return W1 < W2;
351 
352  return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
353 }
354 
355 bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
356  if (VR1 == VR2)
357  return false;
358  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
359  const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
360  uint16_t W1 = RC1.width(), W2 = RC2.width();
361  uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
362  uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
363  // If Bit1 exceeds the width of VR1, then:
364  // - return false, if at the same time Bit2 exceeds VR2, or
365  // - return true, otherwise.
366  // (I.e. "a bit value that does not exist is less than any bit value
367  // that does exist".)
368  if (W1 <= Bit1)
369  return Bit2 < W2;
370  // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
371  if (W2 <= Bit2)
372  return false;
373 
374  const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
375  if (V1 != V2)
376  return BitOrd(V1, V2);
377  return false;
378 }
379 
380 namespace {
381 
382  class OrderedRegisterList {
383  using ListType = std::vector<unsigned>;
384  const unsigned MaxSize;
385 
386  public:
387  OrderedRegisterList(const RegisterOrdering &RO)
388  : MaxSize(MaxORLSize), Ord(RO) {}
389 
390  void insert(unsigned VR);
391  void remove(unsigned VR);
392 
393  unsigned operator[](unsigned Idx) const {
394  assert(Idx < Seq.size());
395  return Seq[Idx];
396  }
397 
398  unsigned size() const {
399  return Seq.size();
400  }
401 
402  using iterator = ListType::iterator;
403  using const_iterator = ListType::const_iterator;
404 
405  iterator begin() { return Seq.begin(); }
406  iterator end() { return Seq.end(); }
407  const_iterator begin() const { return Seq.begin(); }
408  const_iterator end() const { return Seq.end(); }
409 
410  // Convenience function to convert an iterator to the corresponding index.
411  unsigned idx(iterator It) const { return It-begin(); }
412 
413  private:
414  ListType Seq;
415  const RegisterOrdering &Ord;
416  };
417 
418  struct PrintORL {
419  PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
420  : RL(L), TRI(RI) {}
421 
422  friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
423 
424  private:
425  const OrderedRegisterList &RL;
426  const TargetRegisterInfo *TRI;
427  };
428 
429  raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
430  OS << '(';
431  OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
432  for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
433  if (I != B)
434  OS << ", ";
435  OS << printReg(*I, P.TRI);
436  }
437  OS << ')';
438  return OS;
439  }
440 
441 } // end anonymous namespace
442 
443 void OrderedRegisterList::insert(unsigned VR) {
444  iterator L = llvm::lower_bound(Seq, VR, Ord);
445  if (L == Seq.end())
446  Seq.push_back(VR);
447  else
448  Seq.insert(L, VR);
449 
450  unsigned S = Seq.size();
451  if (S > MaxSize)
452  Seq.resize(MaxSize);
453  assert(Seq.size() <= MaxSize);
454 }
455 
456 void OrderedRegisterList::remove(unsigned VR) {
457  iterator L = llvm::lower_bound(Seq, VR, Ord);
458  if (L != Seq.end())
459  Seq.erase(L);
460 }
461 
462 namespace {
463 
464  // A record of the insert form. The fields correspond to the operands
465  // of the "insert" instruction:
466  // ... = insert(SrcR, InsR, #Wdh, #Off)
467  struct IFRecord {
468  IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
469  : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
470 
471  unsigned SrcR, InsR;
472  uint16_t Wdh, Off;
473  };
474 
475  struct PrintIFR {
476  PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
477  : IFR(R), TRI(RI) {}
478 
479  private:
480  friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
481 
482  const IFRecord &IFR;
483  const TargetRegisterInfo *TRI;
484  };
485 
486  raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
487  unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
488  OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
489  << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
490  return OS;
491  }
492 
493  using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
494 
495 } // end anonymous namespace
496 
497 namespace llvm {
498 
501 
502 } // end namespace llvm
503 
504 namespace {
505 
506  class HexagonGenInsert : public MachineFunctionPass {
507  public:
508  static char ID;
509 
510  HexagonGenInsert() : MachineFunctionPass(ID) {
512  }
513 
514  StringRef getPassName() const override {
515  return "Hexagon generate \"insert\" instructions";
516  }
517 
518  void getAnalysisUsage(AnalysisUsage &AU) const override {
522  }
523 
524  bool runOnMachineFunction(MachineFunction &MF) override;
525 
526  private:
527  using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
528 
529  void buildOrderingMF(RegisterOrdering &RO) const;
530  void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
531  bool isIntClass(const TargetRegisterClass *RC) const;
532  bool isConstant(unsigned VR) const;
533  bool isSmallConstant(unsigned VR) const;
534  bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
535  uint16_t L, uint16_t S) const;
536  bool findSelfReference(unsigned VR) const;
537  bool findNonSelfReference(unsigned VR) const;
538  void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
539  void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
540  unsigned distance(const MachineBasicBlock *FromB,
541  const MachineBasicBlock *ToB, const UnsignedMap &RPO,
542  PairMapType &M) const;
543  unsigned distance(MachineBasicBlock::const_iterator FromI,
544  MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
545  PairMapType &M) const;
546  bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
547  void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
548  void findRemovableRegisters(unsigned VR, IFRecord IF,
549  RegisterSet &RMs) const;
550  void computeRemovableRegisters();
551 
552  void pruneEmptyLists();
553  void pruneCoveredSets(unsigned VR);
554  void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
555  void pruneRegCopies(unsigned VR);
556  void pruneCandidates();
557  void selectCandidates();
558  bool generateInserts();
559 
560  bool removeDeadCode(MachineDomTreeNode *N);
561 
562  // IFRecord coupled with a set of potentially removable registers:
563  using IFListType = std::vector<IFRecordWithRegSet>;
564  using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
565 
566  void dump_map() const;
567 
568  const HexagonInstrInfo *HII = nullptr;
569  const HexagonRegisterInfo *HRI = nullptr;
570 
571  MachineFunction *MFN;
574  CellMapShadow *CMS;
575 
576  RegisterOrdering BaseOrd;
577  RegisterOrdering CellOrd;
578  IFMapType IFMap;
579  };
580 
581 } // end anonymous namespace
582 
583 char HexagonGenInsert::ID = 0;
584 
585 void HexagonGenInsert::dump_map() const {
586  using iterator = IFMapType::const_iterator;
587 
588  for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
589  dbgs() << " " << printReg(I->first, HRI) << ":\n";
590  const IFListType &LL = I->second;
591  for (unsigned i = 0, n = LL.size(); i < n; ++i)
592  dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", "
593  << PrintRegSet(LL[i].second, HRI) << '\n';
594  }
595 }
596 
597 void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
598  unsigned Index = 0;
599 
600  for (const MachineBasicBlock &B : *MFN) {
601  if (!CMS->BT.reached(&B))
602  continue;
603 
604  for (const MachineInstr &MI : B) {
605  for (const MachineOperand &MO : MI.operands()) {
606  if (MO.isReg() && MO.isDef()) {
607  Register R = MO.getReg();
608  assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
609  if (R.isVirtual())
610  RO.insert(std::make_pair(R, Index++));
611  }
612  }
613  }
614  }
615  // Since some virtual registers may have had their def and uses eliminated,
616  // they are no longer referenced in the code, and so they will not appear
617  // in the map.
618 }
619 
620 void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
621  RegisterOrdering &RO) const {
622  // Create a vector of all virtual registers (collect them from the base
623  // ordering RB), and then sort it using the RegisterCell comparator.
624  BitValueOrdering BVO(RB);
625  RegisterCellLexCompare LexCmp(BVO, *CMS);
626 
627  using SortableVectorType = std::vector<unsigned>;
628 
629  SortableVectorType VRs;
630  for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
631  VRs.push_back(I->first);
632  llvm::sort(VRs, LexCmp);
633  // Transfer the results to the outgoing register ordering.
634  for (unsigned i = 0, n = VRs.size(); i < n; ++i)
635  RO.insert(std::make_pair(VRs[i], i));
636 }
637 
638 inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
639  return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
640 }
641 
642 bool HexagonGenInsert::isConstant(unsigned VR) const {
643  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
644  uint16_t W = RC.width();
645  for (uint16_t i = 0; i < W; ++i) {
646  const BitTracker::BitValue &BV = RC[i];
647  if (BV.is(0) || BV.is(1))
648  continue;
649  return false;
650  }
651  return true;
652 }
653 
654 bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
655  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
656  uint16_t W = RC.width();
657  if (W > 64)
658  return false;
659  uint64_t V = 0, B = 1;
660  for (uint16_t i = 0; i < W; ++i) {
661  const BitTracker::BitValue &BV = RC[i];
662  if (BV.is(1))
663  V |= B;
664  else if (!BV.is(0))
665  return false;
666  B <<= 1;
667  }
668 
669  // For 32-bit registers, consider: Rd = #s16.
670  if (W == 32)
671  return isInt<16>(V);
672 
673  // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
674  return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
675 }
676 
677 bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
678  unsigned InsR, uint16_t L, uint16_t S) const {
679  const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
680  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
681  const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
682  // Only integet (32-/64-bit) register classes.
683  if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
684  return false;
685  // The "source" register must be of the same class as DstR.
686  if (DstRC != SrcRC)
687  return false;
688  if (DstRC == InsRC)
689  return true;
690  // A 64-bit register can only be generated from other 64-bit registers.
691  if (DstRC == &Hexagon::DoubleRegsRegClass)
692  return false;
693  // Otherwise, the L and S cannot span 32-bit word boundary.
694  if (S < 32 && S+L > 32)
695  return false;
696  return true;
697 }
698 
699 bool HexagonGenInsert::findSelfReference(unsigned VR) const {
700  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
701  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
702  const BitTracker::BitValue &V = RC[i];
703  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
704  return true;
705  }
706  return false;
707 }
708 
709 bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
710  BitTracker::RegisterCell RC = CMS->lookup(VR);
711  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
712  const BitTracker::BitValue &V = RC[i];
713  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
714  return true;
715  }
716  return false;
717 }
718 
719 void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
720  RegisterSet &Defs) const {
721  for (const MachineOperand &MO : MI->operands()) {
722  if (!MO.isReg() || !MO.isDef())
723  continue;
724  Register R = MO.getReg();
725  if (!R.isVirtual())
726  continue;
727  Defs.insert(R);
728  }
729 }
730 
731 void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
732  RegisterSet &Uses) const {
733  for (const MachineOperand &MO : MI->operands()) {
734  if (!MO.isReg() || !MO.isUse())
735  continue;
736  Register R = MO.getReg();
737  if (!R.isVirtual())
738  continue;
739  Uses.insert(R);
740  }
741 }
742 
743 unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
744  const MachineBasicBlock *ToB, const UnsignedMap &RPO,
745  PairMapType &M) const {
746  // Forward distance from the end of a block to the beginning of it does
747  // not make sense. This function should not be called with FromB == ToB.
748  assert(FromB != ToB);
749 
750  unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
751  // If we have already computed it, return the cached result.
752  PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
753  if (F != M.end())
754  return F->second;
755  unsigned ToRPO = RPO.lookup(ToN);
756 
757  unsigned MaxD = 0;
758 
759  for (const MachineBasicBlock *PB : ToB->predecessors()) {
760  // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
761  // along that path will be 0, and we don't need to do any calculations
762  // on it.
763  if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
764  continue;
765  unsigned D = PB->size() + distance(FromB, PB, RPO, M);
766  if (D > MaxD)
767  MaxD = D;
768  }
769 
770  // Memoize the result for later lookup.
771  M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
772  return MaxD;
773 }
774 
775 unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
776  MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
777  PairMapType &M) const {
778  const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
779  if (FB == TB)
780  return std::distance(FromI, ToI);
781  unsigned D1 = std::distance(TB->begin(), ToI);
782  unsigned D2 = distance(FB, TB, RPO, M);
783  unsigned D3 = std::distance(FromI, FB->end());
784  return D1+D2+D3;
785 }
786 
787 bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
788  OrderedRegisterList &AVs) {
789  if (isDebug()) {
790  dbgs() << __func__ << ": " << printReg(VR, HRI)
791  << " AVs: " << PrintORL(AVs, HRI) << "\n";
792  }
793  if (AVs.size() == 0)
794  return false;
795 
796  using iterator = OrderedRegisterList::iterator;
797 
798  BitValueOrdering BVO(BaseOrd);
799  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
800  uint16_t W = RC.width();
801 
802  using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
803  using RSListType = std::vector<RSRecord>;
804  // Have a map, with key being the matching prefix length, and the value
805  // being the list of pairs (R,S), where R's prefix matches VR at S.
806  // (DenseMap<uint16_t,RSListType> fails to instantiate.)
807  using LRSMapType = DenseMap<unsigned, RSListType>;
808  LRSMapType LM;
809 
810  // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
811  // and find matching prefixes from AVs with the rotated RC. Such a prefix
812  // would match a string of bits (of length L) in RC starting at S.
813  for (uint16_t S = 0; S < W; ++S) {
814  iterator B = AVs.begin(), E = AVs.end();
815  // The registers in AVs are ordered according to the lexical order of
816  // the corresponding register cells. This means that the range of regis-
817  // ters in AVs that match a prefix of length L+1 will be contained in
818  // the range that matches a prefix of length L. This means that we can
819  // keep narrowing the search space as the prefix length goes up. This
820  // helps reduce the overall complexity of the search.
821  uint16_t L;
822  for (L = 0; L < W-S; ++L) {
823  // Compare against VR's bits starting at S, which emulates rotation
824  // of VR by S.
825  RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
826  iterator NewB = std::lower_bound(B, E, VR, RCB);
827  iterator NewE = std::upper_bound(NewB, E, VR, RCB);
828  // For the registers that are eliminated from the next range, L is
829  // the longest prefix matching VR at position S (their prefixes
830  // differ from VR at S+L). If L>0, record this information for later
831  // use.
832  if (L > 0) {
833  for (iterator I = B; I != NewB; ++I)
834  LM[L].push_back(std::make_pair(*I, S));
835  for (iterator I = NewE; I != E; ++I)
836  LM[L].push_back(std::make_pair(*I, S));
837  }
838  B = NewB, E = NewE;
839  if (B == E)
840  break;
841  }
842  // Record the final register range. If this range is non-empty, then
843  // L=W-S.
844  assert(B == E || L == W-S);
845  if (B != E) {
846  for (iterator I = B; I != E; ++I)
847  LM[L].push_back(std::make_pair(*I, S));
848  // If B!=E, then we found a range of registers whose prefixes cover the
849  // rest of VR from position S. There is no need to further advance S.
850  break;
851  }
852  }
853 
854  if (isDebug()) {
855  dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
856  for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
857  dbgs() << " L=" << I->first << ':';
858  const RSListType &LL = I->second;
859  for (unsigned i = 0, n = LL.size(); i < n; ++i)
860  dbgs() << " (" << printReg(LL[i].first, HRI) << ",@"
861  << LL[i].second << ')';
862  dbgs() << '\n';
863  }
864  }
865 
866  bool Recorded = false;
867 
868  for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
869  unsigned SrcR = *I;
870  int FDi = -1, LDi = -1; // First/last different bit.
871  const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
872  uint16_t AW = AC.width();
873  for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
874  if (RC[i] == AC[i])
875  continue;
876  if (FDi == -1)
877  FDi = i;
878  LDi = i;
879  }
880  if (FDi == -1)
881  continue; // TODO (future): Record identical registers.
882  // Look for a register whose prefix could patch the range [FD..LD]
883  // where VR and SrcR differ.
884  uint16_t FD = FDi, LD = LDi; // Switch to unsigned type.
885  uint16_t MinL = LD-FD+1;
886  for (uint16_t L = MinL; L < W; ++L) {
887  LRSMapType::iterator F = LM.find(L);
888  if (F == LM.end())
889  continue;
890  RSListType &LL = F->second;
891  for (unsigned i = 0, n = LL.size(); i < n; ++i) {
892  uint16_t S = LL[i].second;
893  // MinL is the minimum length of the prefix. Any length above MinL
894  // allows some flexibility as to where the prefix can start:
895  // given the extra length EL=L-MinL, the prefix must start between
896  // max(0,FD-EL) and FD.
897  if (S > FD) // Starts too late.
898  continue;
899  uint16_t EL = L-MinL;
900  uint16_t LowS = (EL < FD) ? FD-EL : 0;
901  if (S < LowS) // Starts too early.
902  continue;
903  unsigned InsR = LL[i].first;
904  if (!isValidInsertForm(VR, SrcR, InsR, L, S))
905  continue;
906  if (isDebug()) {
907  dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
908  << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
909  << S << ")\n";
910  }
911  IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
912  IFMap[VR].push_back(RR);
913  Recorded = true;
914  }
915  }
916  }
917 
918  return Recorded;
919 }
920 
921 void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
922  OrderedRegisterList &AVs) {
923  if (isDebug())
924  dbgs() << "visiting block " << printMBBReference(*B) << "\n";
925 
926  // First, check if this block is reachable at all. If not, the bit tracker
927  // will not have any information about registers in it.
928  if (!CMS->BT.reached(B))
929  return;
930 
931  bool DoConst = OptConst;
932  // Keep a separate set of registers defined in this block, so that we
933  // can remove them from the list of available registers once all DT
934  // successors have been processed.
935  RegisterSet BlockDefs, InsDefs;
936  for (MachineInstr &MI : *B) {
937  InsDefs.clear();
938  getInstrDefs(&MI, InsDefs);
939  // Leave those alone. They are more transparent than "insert".
940  bool Skip = MI.isCopy() || MI.isRegSequence();
941 
942  if (!Skip) {
943  // Visit all defined registers, and attempt to find the corresponding
944  // "insert" representations.
945  for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
946  // Do not collect registers that are known to be compile-time cons-
947  // tants, unless requested.
948  if (!DoConst && isConstant(VR))
949  continue;
950  // If VR's cell contains a reference to VR, then VR cannot be defined
951  // via "insert". If VR is a constant that can be generated in a single
952  // instruction (without constant extenders), generating it via insert
953  // makes no sense.
954  if (findSelfReference(VR) || isSmallConstant(VR))
955  continue;
956 
957  findRecordInsertForms(VR, AVs);
958  // Stop if the map size is too large.
959  if (IFMap.size() > MaxIFMSize)
960  return;
961  }
962  }
963 
964  // Insert the defined registers into the list of available registers
965  // after they have been processed.
966  for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
967  AVs.insert(VR);
968  BlockDefs.insert(InsDefs);
969  }
970 
971  for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
972  MachineBasicBlock *SB = DTN->getBlock();
973  collectInBlock(SB, AVs);
974  }
975 
976  for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
977  AVs.remove(VR);
978 }
979 
980 void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
981  RegisterSet &RMs) const {
982  // For a given register VR and a insert form, find the registers that are
983  // used by the current definition of VR, and which would no longer be
984  // needed for it after the definition of VR is replaced with the insert
985  // form. These are the registers that could potentially become dead.
986  RegisterSet Regs[2];
987 
988  unsigned S = 0; // Register set selector.
989  Regs[S].insert(VR);
990 
991  while (!Regs[S].empty()) {
992  // Breadth-first search.
993  unsigned OtherS = 1-S;
994  Regs[OtherS].clear();
995  for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
996  Regs[S].remove(R);
997  if (R == IF.SrcR || R == IF.InsR)
998  continue;
999  // Check if a given register has bits that are references to any other
1000  // registers. This is to detect situations where the instruction that
1001  // defines register R takes register Q as an operand, but R itself does
1002  // not contain any bits from Q. Loads are examples of how this could
1003  // happen:
1004  // R = load Q
1005  // In this case (assuming we do not have any knowledge about the loaded
1006  // value), we must not treat R as a "conveyance" of the bits from Q.
1007  // (The information in BT about R's bits would have them as constants,
1008  // in case of zero-extending loads, or refs to R.)
1009  if (!findNonSelfReference(R))
1010  continue;
1011  RMs.insert(R);
1012  const MachineInstr *DefI = MRI->getVRegDef(R);
1013  assert(DefI);
1014  // Do not iterate past PHI nodes to avoid infinite loops. This can
1015  // make the final set a bit less accurate, but the removable register
1016  // sets are an approximation anyway.
1017  if (DefI->isPHI())
1018  continue;
1019  getInstrUses(DefI, Regs[OtherS]);
1020  }
1021  S = OtherS;
1022  }
1023  // The register VR is added to the list as a side-effect of the algorithm,
1024  // but it is not "potentially removable". A potentially removable register
1025  // is one that may become unused (dead) after conversion to the insert form
1026  // IF, and obviously VR (or its replacement) will not become dead by apply-
1027  // ing IF.
1028  RMs.remove(VR);
1029 }
1030 
1031 void HexagonGenInsert::computeRemovableRegisters() {
1032  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1033  IFListType &LL = I->second;
1034  for (unsigned i = 0, n = LL.size(); i < n; ++i)
1035  findRemovableRegisters(I->first, LL[i].first, LL[i].second);
1036  }
1037 }
1038 
1039 void HexagonGenInsert::pruneEmptyLists() {
1040  // Remove all entries from the map, where the register has no insert forms
1041  // associated with it.
1042  using IterListType = SmallVector<IFMapType::iterator, 16>;
1043  IterListType Prune;
1044  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1045  if (I->second.empty())
1046  Prune.push_back(I);
1047  }
1048  for (unsigned i = 0, n = Prune.size(); i < n; ++i)
1049  IFMap.erase(Prune[i]);
1050 }
1051 
1052 void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1053  IFMapType::iterator F = IFMap.find(VR);
1054  assert(F != IFMap.end());
1055  IFListType &LL = F->second;
1056 
1057  // First, examine the IF candidates for register VR whose removable-regis-
1058  // ter sets are empty. This means that a given candidate will not help eli-
1059  // minate any registers, but since "insert" is not a constant-extendable
1060  // instruction, using such a candidate may reduce code size if the defini-
1061  // tion of VR is constant-extended.
1062  // If there exists a candidate with a non-empty set, the ones with empty
1063  // sets will not be used and can be removed.
1064  MachineInstr *DefVR = MRI->getVRegDef(VR);
1065  bool DefEx = HII->isConstExtended(*DefVR);
1066  bool HasNE = false;
1067  for (unsigned i = 0, n = LL.size(); i < n; ++i) {
1068  if (LL[i].second.empty())
1069  continue;
1070  HasNE = true;
1071  break;
1072  }
1073  if (!DefEx || HasNE) {
1074  // The definition of VR is not constant-extended, or there is a candidate
1075  // with a non-empty set. Remove all candidates with empty sets.
1076  auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1077  return IR.second.empty();
1078  };
1079  llvm::erase_if(LL, IsEmpty);
1080  } else {
1081  // The definition of VR is constant-extended, and all candidates have
1082  // empty removable-register sets. Pick the maximum candidate, and remove
1083  // all others. The "maximum" does not have any special meaning here, it
1084  // is only so that the candidate that will remain on the list is selec-
1085  // ted deterministically.
1086  IFRecord MaxIF = LL[0].first;
1087  for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1088  // If LL[MaxI] < LL[i], then MaxI = i.
1089  const IFRecord &IF = LL[i].first;
1090  unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1091  unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1092  if (M0 > R0)
1093  continue;
1094  if (M0 == R0) {
1095  if (M1 > R1)
1096  continue;
1097  if (M1 == R1) {
1098  if (MaxIF.Wdh > IF.Wdh)
1099  continue;
1100  if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1101  continue;
1102  }
1103  }
1104  // MaxIF < IF.
1105  MaxIF = IF;
1106  }
1107  // Remove everything except the maximum candidate. All register sets
1108  // are empty, so no need to preserve anything.
1109  LL.clear();
1110  LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1111  }
1112 
1113  // Now, remove those whose sets of potentially removable registers are
1114  // contained in another IF candidate for VR. For example, given these
1115  // candidates for %45,
1116  // %45:
1117  // (%44,%41,#9,#8), { %42 }
1118  // (%43,%41,#9,#8), { %42 %44 }
1119  // remove the first one, since it is contained in the second one.
1120  for (unsigned i = 0, n = LL.size(); i < n; ) {
1121  const RegisterSet &RMi = LL[i].second;
1122  unsigned j = 0;
1123  while (j < n) {
1124  if (j != i && LL[j].second.includes(RMi))
1125  break;
1126  j++;
1127  }
1128  if (j == n) { // RMi not contained in anything else.
1129  i++;
1130  continue;
1131  }
1132  LL.erase(LL.begin()+i);
1133  n = LL.size();
1134  }
1135 }
1136 
1137 void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1138  PairMapType &M) {
1139  IFMapType::iterator F = IFMap.find(VR);
1140  assert(F != IFMap.end());
1141  IFListType &LL = F->second;
1142  unsigned Cutoff = VRegDistCutoff;
1143  const MachineInstr *DefV = MRI->getVRegDef(VR);
1144 
1145  for (unsigned i = LL.size(); i > 0; --i) {
1146  unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1147  const MachineInstr *DefS = MRI->getVRegDef(SR);
1148  const MachineInstr *DefI = MRI->getVRegDef(IR);
1149  unsigned DSV = distance(DefS, DefV, RPO, M);
1150  if (DSV < Cutoff) {
1151  unsigned DIV = distance(DefI, DefV, RPO, M);
1152  if (DIV < Cutoff)
1153  continue;
1154  }
1155  LL.erase(LL.begin()+(i-1));
1156  }
1157 }
1158 
1159 void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1160  IFMapType::iterator F = IFMap.find(VR);
1161  assert(F != IFMap.end());
1162  IFListType &LL = F->second;
1163 
1164  auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1165  return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1166  };
1167  llvm::erase_if(LL, IsCopy);
1168 }
1169 
1170 void HexagonGenInsert::pruneCandidates() {
1171  // Remove candidates that are not beneficial, regardless of the final
1172  // selection method.
1173  // First, remove candidates whose potentially removable set is a subset
1174  // of another candidate's set.
1175  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
1176  pruneCoveredSets(I->first);
1177 
1178  UnsignedMap RPO;
1179 
1181 
1182  RPOTType RPOT(MFN);
1183  unsigned RPON = 0;
1184  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
1185  RPO[(*I)->getNumber()] = RPON++;
1186 
1187  PairMapType Memo; // Memoization map for distance calculation.
1188  // Remove candidates that would use registers defined too far away.
1189  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
1190  pruneUsesTooFar(I->first, RPO, Memo);
1191 
1192  pruneEmptyLists();
1193 
1194  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
1195  pruneRegCopies(I->first);
1196 }
1197 
1198 namespace {
1199 
1200  // Class for comparing IF candidates for registers that have multiple of
1201  // them. The smaller the candidate, according to this ordering, the better.
1202  // First, compare the number of zeros in the associated potentially remova-
1203  // ble register sets. "Zero" indicates that the register is very likely to
1204  // become dead after this transformation.
1205  // Second, compare "averages", i.e. use-count per size. The lower wins.
1206  // After that, it does not really matter which one is smaller. Resolve
1207  // the tie in some deterministic way.
1208  struct IFOrdering {
1209  IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1210  : UseC(UC), BaseOrd(BO) {}
1211 
1212  bool operator() (const IFRecordWithRegSet &A,
1213  const IFRecordWithRegSet &B) const;
1214 
1215  private:
1216  void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1217  unsigned &Sum) const;
1218 
1219  const UnsignedMap &UseC;
1220  const RegisterOrdering &BaseOrd;
1221  };
1222 
1223 } // end anonymous namespace
1224 
1225 bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1226  const IFRecordWithRegSet &B) const {
1227  unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1228  unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1229  stats(A.second, SizeA, ZeroA, SumA);
1230  stats(B.second, SizeB, ZeroB, SumB);
1231 
1232  // We will pick the minimum element. The more zeros, the better.
1233  if (ZeroA != ZeroB)
1234  return ZeroA > ZeroB;
1235  // Compare SumA/SizeA with SumB/SizeB, lower is better.
1236  uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1237  if (AvgA != AvgB)
1238  return AvgA < AvgB;
1239 
1240  // The sets compare identical so far. Resort to comparing the IF records.
1241  // The actual values don't matter, this is only for determinism.
1242  unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1243  if (OSA != OSB)
1244  return OSA < OSB;
1245  unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1246  if (OIA != OIB)
1247  return OIA < OIB;
1248  if (A.first.Wdh != B.first.Wdh)
1249  return A.first.Wdh < B.first.Wdh;
1250  return A.first.Off < B.first.Off;
1251 }
1252 
1253 void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1254  unsigned &Sum) const {
1255  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1256  UnsignedMap::const_iterator F = UseC.find(R);
1257  assert(F != UseC.end());
1258  unsigned UC = F->second;
1259  if (UC == 0)
1260  Zero++;
1261  Sum += UC;
1262  Size++;
1263  }
1264 }
1265 
1266 void HexagonGenInsert::selectCandidates() {
1267  // Some registers may have multiple valid candidates. Pick the best one
1268  // (or decide not to use any).
1269 
1270  // Compute the "removability" measure of R:
1271  // For each potentially removable register R, record the number of regis-
1272  // ters with IF candidates, where R appears in at least one set.
1273  RegisterSet AllRMs;
1274  UnsignedMap UseC, RemC;
1275  IFMapType::iterator End = IFMap.end();
1276 
1277  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1278  const IFListType &LL = I->second;
1279  RegisterSet TT;
1280  for (unsigned i = 0, n = LL.size(); i < n; ++i)
1281  TT.insert(LL[i].second);
1282  for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1283  RemC[R]++;
1284  AllRMs.insert(TT);
1285  }
1286 
1287  for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1288  using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1289  using InstrSet = SmallSet<const MachineInstr *, 16>;
1290 
1291  InstrSet UIs;
1292  // Count as the number of instructions in which R is used, not the
1293  // number of operands.
1294  use_iterator E = MRI->use_nodbg_end();
1295  for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1296  UIs.insert(I->getParent());
1297  unsigned C = UIs.size();
1298  // Calculate a measure, which is the number of instructions using R,
1299  // minus the "removability" count computed earlier.
1300  unsigned D = RemC[R];
1301  UseC[R] = (C > D) ? C-D : 0; // doz
1302  }
1303 
1304  bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1305  if (!SelectAll0 && !SelectHas0)
1306  SelectAll0 = true;
1307 
1308  // The smaller the number UseC for a given register R, the "less used"
1309  // R is aside from the opportunities for removal offered by generating
1310  // "insert" instructions.
1311  // Iterate over the IF map, and for those registers that have multiple
1312  // candidates, pick the minimum one according to IFOrdering.
1313  IFOrdering IFO(UseC, BaseOrd);
1314  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1315  IFListType &LL = I->second;
1316  if (LL.empty())
1317  continue;
1318  // Get the minimum element, remember it and clear the list. If the
1319  // element found is adequate, we will put it back on the list, other-
1320  // wise the list will remain empty, and the entry for this register
1321  // will be removed (i.e. this register will not be replaced by insert).
1322  IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
1323  assert(MinI != LL.end());
1324  IFRecordWithRegSet M = *MinI;
1325  LL.clear();
1326 
1327  // We want to make sure that this replacement will have a chance to be
1328  // beneficial, and that means that we want to have indication that some
1329  // register will be removed. The most likely registers to be eliminated
1330  // are the use operands in the definition of I->first. Accept/reject a
1331  // candidate based on how many of its uses it can potentially eliminate.
1332 
1333  RegisterSet Us;
1334  const MachineInstr *DefI = MRI->getVRegDef(I->first);
1335  getInstrUses(DefI, Us);
1336  bool Accept = false;
1337 
1338  if (SelectAll0) {
1339  bool All0 = true;
1340  for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1341  if (UseC[R] == 0)
1342  continue;
1343  All0 = false;
1344  break;
1345  }
1346  Accept = All0;
1347  } else if (SelectHas0) {
1348  bool Has0 = false;
1349  for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1350  if (UseC[R] != 0)
1351  continue;
1352  Has0 = true;
1353  break;
1354  }
1355  Accept = Has0;
1356  }
1357  if (Accept)
1358  LL.push_back(M);
1359  }
1360 
1361  // Remove candidates that add uses of removable registers, unless the
1362  // removable registers are among replacement candidates.
1363  // Recompute the removable registers, since some candidates may have
1364  // been eliminated.
1365  AllRMs.clear();
1366  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1367  const IFListType &LL = I->second;
1368  if (!LL.empty())
1369  AllRMs.insert(LL[0].second);
1370  }
1371  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1372  IFListType &LL = I->second;
1373  if (LL.empty())
1374  continue;
1375  unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1376  if (AllRMs[SR] || AllRMs[IR])
1377  LL.clear();
1378  }
1379 
1380  pruneEmptyLists();
1381 }
1382 
1383 bool HexagonGenInsert::generateInserts() {
1384  // Create a new register for each one from IFMap, and store them in the
1385  // map.
1386  UnsignedMap RegMap;
1387  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1388  unsigned VR = I->first;
1389  const TargetRegisterClass *RC = MRI->getRegClass(VR);
1390  Register NewVR = MRI->createVirtualRegister(RC);
1391  RegMap[VR] = NewVR;
1392  }
1393 
1394  // We can generate the "insert" instructions using potentially stale re-
1395  // gisters: SrcR and InsR for a given VR may be among other registers that
1396  // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1397  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1398  MachineInstr *MI = MRI->getVRegDef(I->first);
1399  MachineBasicBlock &B = *MI->getParent();
1400  DebugLoc DL = MI->getDebugLoc();
1401  unsigned NewR = RegMap[I->first];
1402  bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1403  const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1404  : HII->get(Hexagon::S2_insertp);
1405  IFRecord IF = I->second[0].first;
1406  unsigned Wdh = IF.Wdh, Off = IF.Off;
1407  unsigned InsS = 0;
1408  if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1409  InsS = Hexagon::isub_lo;
1410  if (Off >= 32) {
1411  InsS = Hexagon::isub_hi;
1412  Off -= 32;
1413  }
1414  }
1415  // Advance to the proper location for inserting instructions. This could
1416  // be B.end().
1418  if (MI->isPHI())
1419  At = B.getFirstNonPHI();
1420 
1421  BuildMI(B, At, DL, D, NewR)
1422  .addReg(IF.SrcR)
1423  .addReg(IF.InsR, 0, InsS)
1424  .addImm(Wdh)
1425  .addImm(Off);
1426 
1427  MRI->clearKillFlags(IF.SrcR);
1428  MRI->clearKillFlags(IF.InsR);
1429  }
1430 
1431  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1432  MachineInstr *DefI = MRI->getVRegDef(I->first);
1433  MRI->replaceRegWith(I->first, RegMap[I->first]);
1434  DefI->eraseFromParent();
1435  }
1436 
1437  return true;
1438 }
1439 
1440 bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1441  bool Changed = false;
1442 
1443  for (auto *DTN : children<MachineDomTreeNode*>(N))
1444  Changed |= removeDeadCode(DTN);
1445 
1446  MachineBasicBlock *B = N->getBlock();
1447  std::vector<MachineInstr*> Instrs;
1448  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
1449  Instrs.push_back(&*I);
1450 
1451  for (MachineInstr *MI : Instrs) {
1452  unsigned Opc = MI->getOpcode();
1453  // Do not touch lifetime markers. This is why the target-independent DCE
1454  // cannot be used.
1455  if (Opc == TargetOpcode::LIFETIME_START ||
1457  continue;
1458  bool Store = false;
1459  if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
1460  continue;
1461 
1462  bool AllDead = true;
1464  for (const MachineOperand &MO : MI->operands()) {
1465  if (!MO.isReg() || !MO.isDef())
1466  continue;
1467  Register R = MO.getReg();
1468  if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1469  AllDead = false;
1470  break;
1471  }
1472  Regs.push_back(R);
1473  }
1474  if (!AllDead)
1475  continue;
1476 
1477  B->erase(MI);
1478  for (unsigned I = 0, N = Regs.size(); I != N; ++I)
1480  Changed = true;
1481  }
1482 
1483  return Changed;
1484 }
1485 
1486 bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1487  if (skipFunction(MF.getFunction()))
1488  return false;
1489 
1490  bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1491  bool Changed = false;
1492 
1493  // Verify: one, but not both.
1495 
1496  IFMap.clear();
1497  BaseOrd.clear();
1498  CellOrd.clear();
1499 
1500  const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1501  HII = ST.getInstrInfo();
1502  HRI = ST.getRegisterInfo();
1503  MFN = &MF;
1504  MRI = &MF.getRegInfo();
1505  MDT = &getAnalysis<MachineDominatorTree>();
1506 
1507  // Clean up before any further processing, so that dead code does not
1508  // get used in a newly generated "insert" instruction. Have a custom
1509  // version of DCE that preserves lifetime markers. Without it, merging
1510  // of stack objects can fail to recognize and merge disjoint objects
1511  // leading to unnecessary stack growth.
1512  Changed = removeDeadCode(MDT->getRootNode());
1513 
1514  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1515  BitTracker BTLoc(HE, MF);
1516  BTLoc.trace(isDebug());
1517  BTLoc.run();
1518  CellMapShadow MS(BTLoc);
1519  CMS = &MS;
1520 
1521  buildOrderingMF(BaseOrd);
1522  buildOrderingBT(BaseOrd, CellOrd);
1523 
1524  if (isDebug()) {
1525  dbgs() << "Cell ordering:\n";
1526  for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
1527  I != E; ++I) {
1528  unsigned VR = I->first, Pos = I->second;
1529  dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1530  }
1531  }
1532 
1533  // Collect candidates for conversion into the insert forms.
1534  MachineBasicBlock *RootB = MDT->getRoot();
1535  OrderedRegisterList AvailR(CellOrd);
1536 
1537  const char *const TGName = "hexinsert";
1538  const char *const TGDesc = "Generate Insert Instructions";
1539 
1540  {
1541  NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1542  TimingDetail);
1543  collectInBlock(RootB, AvailR);
1544  // Complete the information gathered in IFMap.
1545  computeRemovableRegisters();
1546  }
1547 
1548  if (isDebug()) {
1549  dbgs() << "Candidates after collection:\n";
1550  dump_map();
1551  }
1552 
1553  if (IFMap.empty())
1554  return Changed;
1555 
1556  {
1557  NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1558  pruneCandidates();
1559  }
1560 
1561  if (isDebug()) {
1562  dbgs() << "Candidates after pruning:\n";
1563  dump_map();
1564  }
1565 
1566  if (IFMap.empty())
1567  return Changed;
1568 
1569  {
1570  NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1571  selectCandidates();
1572  }
1573 
1574  if (isDebug()) {
1575  dbgs() << "Candidates after selection:\n";
1576  dump_map();
1577  }
1578 
1579  // Filter out vregs beyond the cutoff.
1580  if (VRegIndexCutoff.getPosition()) {
1581  unsigned Cutoff = VRegIndexCutoff;
1582 
1583  using IterListType = SmallVector<IFMapType::iterator, 16>;
1584 
1585  IterListType Out;
1586  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1587  unsigned Idx = Register::virtReg2Index(I->first);
1588  if (Idx >= Cutoff)
1589  Out.push_back(I);
1590  }
1591  for (unsigned i = 0, n = Out.size(); i < n; ++i)
1592  IFMap.erase(Out[i]);
1593  }
1594  if (IFMap.empty())
1595  return Changed;
1596 
1597  {
1598  NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1599  TimingDetail);
1600  generateInserts();
1601  }
1602 
1603  return true;
1604 }
1605 
1607  return new HexagonGenInsert();
1608 }
1609 
1610 //===----------------------------------------------------------------------===//
1611 // Public Constructor Functions
1612 //===----------------------------------------------------------------------===//
1613 
1614 INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1615  "Hexagon generate \"insert\" instructions", false, false)
1617 INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1618  "Hexagon generate \"insert\" instructions", false, false)
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::BitVector::operator[]
reference operator[](unsigned Idx)
Definition: BitVector.h:436
llvm::MachineRegisterInfo::markUsesInDebugValueAsUndef
void markUsesInDebugValueAsUndef(Register Reg) const
markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the specified register as undefined wh...
Definition: MachineRegisterInfo.cpp:530
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
MathExtras.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::upper_bound
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1704
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ARM::PredBlockMask::TT
@ TT
OptConst
static cl::opt< bool > OptConst("insert-const", cl::init(false), cl::Hidden, cl::ZeroOrMore)
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1178
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::BitTracker::lookup
const RegisterCell & lookup(unsigned Reg) const
Definition: BitTracker.h:357
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
T
llvm::BitTracker::BitValue::RefI
BitRef RefI
Definition: BitTracker.h:192
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1691
StringRef.h
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1177
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:327
llvm::BitVector::operator|=
BitVector & operator|=(const BitVector &RHS)
Definition: BitVector.h:545
llvm::ARM_MB::LD
@ LD
Definition: ARMBaseInfo.h:72
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:208
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::AMDGPUISD::IF
@ IF
Definition: AMDGPUISelLowering.h:357
HexagonSubtarget.h
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1762
intersects
static Optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Definition: DbgEntityHistoryCalculator.cpp:115
RPO
rpo function Deduce function attributes in RPO
Definition: FunctionAttrs.cpp:1937
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::BitTracker::BitValue::Ref
@ Ref
Definition: BitTracker.h:160
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
DenseMap.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
OptSelectAll0
static cl::opt< bool > OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, cl::ZeroOrMore)
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
STLExtras.h
BaseType
RegisterSet
SmallSet< unsigned, 4 > RegisterSet
Definition: Thumb2ITBlockPass.cpp:39
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1559
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
llvm::MachineRegisterInfo::use_nodbg_begin
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:518
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:589
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
GraphTraits.h
CommandLine.h
BitTracker.h
llvm::BitTracker::BitValue::is
bool is(unsigned T) const
Definition: BitTracker.h:209
llvm::initializeHexagonGenInsertPass
void initializeHexagonGenInsertPass(PassRegistry &)
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:644
MaxIFMSize
static cl::opt< unsigned > MaxIFMSize("insert-max-ifmap", cl::init(1024), cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap"))
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::BitTracker::RegisterCell::width
uint16_t width() const
Definition: BitTracker.h:303
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
t
bitcast float %x to i32 %s=and i32 %t, 2147483647 %d=bitcast i32 %s to float ret float %d } declare float @fabsf(float %n) define float @bar(float %x) nounwind { %d=call float @fabsf(float %x) ret float %d } This IR(from PR6194):target datalayout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple="x86_64-apple-darwin10.0.0" %0=type { double, double } %struct.float3=type { float, float, float } define void @test(%0, %struct.float3 *nocapture %res) nounwind noinline ssp { entry:%tmp18=extractvalue %0 %0, 0 t
Definition: README-SSE.txt:788
llvm::BitTracker
Definition: BitTracker.h:35
llvm::HexagonEvaluator
Definition: HexagonBitTracker.h:25
false
Definition: StackSlotColoring.cpp:142
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::M0
unsigned M0(unsigned Val)
Definition: VE.h:371
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
BitVector.h
llvm::MachineRegisterInfo::use_nodbg_end
static use_nodbg_iterator use_nodbg_end()
Definition: MachineRegisterInfo.h:521
DebugLoc.h
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:746
llvm::BitVector
Definition: BitVector.h:74
HexagonInstrInfo.h
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
isDebug
static bool isDebug()
Definition: HexagonGenInsert.cpp:81
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::createHexagonGenInsert
FunctionPass * createHexagonGenInsert()
Definition: HexagonGenInsert.cpp:1606
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
PB
PassBuilder PB(Machine, PassOpts->PTO, None, &PIC)
llvm::isInt< 8 >
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:367
llvm::MachineRegisterInfo::clearKillFlags
void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
Definition: MachineRegisterInfo.cpp:429
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
Timer.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:206
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
VRegIndexCutoff
static cl::opt< unsigned > VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."))
llvm::BitTracker::BitRef::Reg
Register Reg
Definition: BitTracker.h:134
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
uint64_t
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::BitVector::any
bool any() const
any - Returns true if any bit is set.
Definition: BitVector.h:162
stats
block placement stats
Definition: MachineBlockPlacement.cpp:3464
llvm::MachineRegisterInfo::use_nodbg_iterator
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
Definition: MachineRegisterInfo.h:517
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1601
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
isConstant
static bool isConstant(const MachineInstr &MI)
Definition: AMDGPUInstructionSelector.cpp:2312
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
MachineFunctionPass.h
OptTiming
static cl::opt< bool > OptTiming("insert-timing", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable timing of insert generation"))
HexagonRegisterInfo.h
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:226
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1255
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::HexagonInstrInfo
Definition: HexagonInstrInfo.h:38
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonGenInsert.cpp:46
llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition: MachineRegisterInfo.h:566
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1562
OptSelectHas0
static cl::opt< bool > OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, cl::ZeroOrMore)
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
llvm::isCurrentDebugType
bool isCurrentDebugType(const char *Type)
isCurrentDebugType - Return true if the specified string is the debug type specified on the command l...
Definition: Debug.cpp:52
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1056
HexagonBitTracker.h
OptTimingDetail
static cl::opt< bool > OptTimingDetail("insert-timing-detail", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " "generation"))
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::DomTreeNodeBase
Base class for the actual dominator tree node.
Definition: LiveIntervalCalc.h:24
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
llvm::BitTracker::RegisterCell
Definition: BitTracker.h:300
llvm::X86II::TB
@ TB
Definition: X86BaseInfo.h:805
llvm::HexagonISD::CP
@ CP
Definition: HexagonISelLowering.h:53
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
j
return j(j<< 16)
MaxORLSize
static cl::opt< unsigned > MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList"))
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::BitVector::test
bool test(unsigned Idx) const
Definition: BitVector.h:447
VRegDistCutoff
static cl::opt< unsigned > VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " "generation."))
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:600
uint16_t
llvm::NamedRegionTimer
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:166
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1522
x
TODO unsigned x
Definition: README.txt:10
llvm::BitVector::find_next
int find_next(unsigned Prev) const
find_next - Returns the index of the next set bit following the "Prev" bit.
Definition: BitVector.h:300
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", "Hexagon generate \"insert\" instructions", false, false) INITIALIZE_PASS_END(HexagonGenInsert
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:290
llvm::BitVector::reset
BitVector & reset()
Definition: BitVector.h:384
llvm::BitTracker::BitRef::Pos
uint16_t Pos
Definition: BitTracker.h:135
llvm::DebugFlag
bool DebugFlag
This boolean is set to true if the '-debug' command line option is specified.
Definition: Debug.cpp:45
PostOrderIterator.h
SmallVector.h
lookup
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:108
MachineInstrBuilder.h
BT
BitTracker BT
Definition: BitTracker.cpp:73
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::BitVector::find_first
int find_first() const
find_first - Returns the index of the first set bit, -1 if none of the bits are set.
Definition: BitVector.h:292
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
generate
We currently generate
Definition: README.txt:597
llvm::BitTracker::BitValue::Type
ValueType Type
Definition: BitTracker.h:191
MachineOperand.h
llvm::BitVector::anyCommon
bool anyCommon(const BitVector &RHS) const
Test if any common bits are set.
Definition: BitVector.h:469
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::HexagonRegisterInfo
Definition: HexagonRegisterInfo.h:29
llvm::Register::virtReg2Index
static unsigned virtReg2Index(Register Reg)
Convert a virtual register number to a 0-based index.
Definition: Register.h:77
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
llvm::M1
unsigned M1(unsigned Val)
Definition: VE.h:372
raw_ostream.h
llvm::BitTracker::BitValue
Definition: BitTracker.h:155
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:46
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:680
llvm::MachineInstrBundleIterator< const MachineInstr >
hexinsert
hexinsert
Definition: HexagonGenInsert.cpp:1617
InitializePasses.h
TargetRegisterInfo.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
MachineDominators.h
SmallSet.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38