LLVM  9.0.0svn
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1 //===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass identifies loops where we can generate the Hexagon hardware
10 // loop instruction. The hardware loop can perform loop branches with a
11 // zero-cycle overhead.
12 //
13 // The pattern that defines the induction variable can changed depending on
14 // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15 // normalizes induction variables, and the Loop Strength Reduction pass
16 // run by 'llc' may also make changes to the induction variable.
17 // The pattern detected by this phase is due to running Strength Reduction.
18 //
19 // Criteria for hardware loops:
20 // - Countable loops (w/ ind. var for a trip count)
21 // - Assumes loops are normalized by IndVarSimplify
22 // - Try inner-most loops first
23 // - No function calls in loops.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "HexagonInstrInfo.h"
28 #include "HexagonSubtarget.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/StringRef.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DebugLoc.h"
47 #include "llvm/Pass.h"
49 #include "llvm/Support/Debug.h"
53 #include <cassert>
54 #include <cstdint>
55 #include <cstdlib>
56 #include <iterator>
57 #include <map>
58 #include <set>
59 #include <string>
60 #include <utility>
61 #include <vector>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "hwloops"
66 
67 #ifndef NDEBUG
68 static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
69 
70 // Option to create preheader only for a specific function.
71 static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
72  cl::init(""));
73 #endif
74 
75 // Option to create a preheader if one doesn't exist.
76 static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
77  cl::Hidden, cl::init(true),
78  cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
79 
80 // Turn it off by default. If a preheader block is not created here, the
81 // software pipeliner may be unable to find a block suitable to serve as
82 // a preheader. In that case SWP will not run.
83 static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
84  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
85  "instructions"));
86 
87 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
88 
89 namespace llvm {
90 
93 
94 } // end namespace llvm
95 
96 namespace {
97 
98  class CountValue;
99 
100  struct HexagonHardwareLoops : public MachineFunctionPass {
101  MachineLoopInfo *MLI;
104  const HexagonInstrInfo *TII;
105  const HexagonRegisterInfo *TRI;
106 #ifndef NDEBUG
107  static int Counter;
108 #endif
109 
110  public:
111  static char ID;
112 
113  HexagonHardwareLoops() : MachineFunctionPass(ID) {}
114 
115  bool runOnMachineFunction(MachineFunction &MF) override;
116 
117  StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
118 
119  void getAnalysisUsage(AnalysisUsage &AU) const override {
123  }
124 
125  private:
126  using LoopFeederMap = std::map<unsigned, MachineInstr *>;
127 
128  /// Kinds of comparisons in the compare instructions.
129  struct Comparison {
130  enum Kind {
131  EQ = 0x01,
132  NE = 0x02,
133  L = 0x04,
134  G = 0x08,
135  U = 0x40,
136  LTs = L,
137  LEs = L | EQ,
138  GTs = G,
139  GEs = G | EQ,
140  LTu = L | U,
141  LEu = L | EQ | U,
142  GTu = G | U,
143  GEu = G | EQ | U
144  };
145 
146  static Kind getSwappedComparison(Kind Cmp) {
147  assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
148  if ((Cmp & L) || (Cmp & G))
149  return (Kind)(Cmp ^ (L|G));
150  return Cmp;
151  }
152 
153  static Kind getNegatedComparison(Kind Cmp) {
154  if ((Cmp & L) || (Cmp & G))
155  return (Kind)((Cmp ^ (L | G)) ^ EQ);
156  if ((Cmp & NE) || (Cmp & EQ))
157  return (Kind)(Cmp ^ (EQ | NE));
158  return (Kind)0;
159  }
160 
161  static bool isSigned(Kind Cmp) {
162  return (Cmp & (L | G) && !(Cmp & U));
163  }
164 
165  static bool isUnsigned(Kind Cmp) {
166  return (Cmp & U);
167  }
168  };
169 
170  /// Find the register that contains the loop controlling
171  /// induction variable.
172  /// If successful, it will return true and set the \p Reg, \p IVBump
173  /// and \p IVOp arguments. Otherwise it will return false.
174  /// The returned induction register is the register R that follows the
175  /// following induction pattern:
176  /// loop:
177  /// R = phi ..., [ R.next, LatchBlock ]
178  /// R.next = R + #bump
179  /// if (R.next < #N) goto loop
180  /// IVBump is the immediate value added to R, and IVOp is the instruction
181  /// "R.next = R + #bump".
182  bool findInductionRegister(MachineLoop *L, unsigned &Reg,
183  int64_t &IVBump, MachineInstr *&IVOp) const;
184 
185  /// Return the comparison kind for the specified opcode.
186  Comparison::Kind getComparisonKind(unsigned CondOpc,
187  MachineOperand *InitialValue,
188  const MachineOperand *Endvalue,
189  int64_t IVBump) const;
190 
191  /// Analyze the statements in a loop to determine if the loop
192  /// has a computable trip count and, if so, return a value that represents
193  /// the trip count expression.
194  CountValue *getLoopTripCount(MachineLoop *L,
196 
197  /// Return the expression that represents the number of times
198  /// a loop iterates. The function takes the operands that represent the
199  /// loop start value, loop end value, and induction value. Based upon
200  /// these operands, the function attempts to compute the trip count.
201  /// If the trip count is not directly available (as an immediate value,
202  /// or a register), the function will attempt to insert computation of it
203  /// to the loop's preheader.
204  CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
205  const MachineOperand *End, unsigned IVReg,
206  int64_t IVBump, Comparison::Kind Cmp) const;
207 
208  /// Return true if the instruction is not valid within a hardware
209  /// loop.
210  bool isInvalidLoopOperation(const MachineInstr *MI,
211  bool IsInnerHWLoop) const;
212 
213  /// Return true if the loop contains an instruction that inhibits
214  /// using the hardware loop.
215  bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
216 
217  /// Given a loop, check if we can convert it to a hardware loop.
218  /// If so, then perform the conversion and return true.
219  bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
220 
221  /// Return true if the instruction is now dead.
222  bool isDead(const MachineInstr *MI,
223  SmallVectorImpl<MachineInstr *> &DeadPhis) const;
224 
225  /// Remove the instruction if it is now dead.
226  void removeIfDead(MachineInstr *MI);
227 
228  /// Make sure that the "bump" instruction executes before the
229  /// compare. We need that for the IV fixup, so that the compare
230  /// instruction would not use a bumped value that has not yet been
231  /// defined. If the instructions are out of order, try to reorder them.
232  bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
233 
234  /// Return true if MO and MI pair is visited only once. If visited
235  /// more than once, this indicates there is recursion. In such a case,
236  /// return false.
237  bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
238  const MachineOperand *MO,
239  LoopFeederMap &LoopFeederPhi) const;
240 
241  /// Return true if the Phi may generate a value that may underflow,
242  /// or may wrap.
243  bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
245  LoopFeederMap &LoopFeederPhi) const;
246 
247  /// Return true if the induction variable may underflow an unsigned
248  /// value in the first iteration.
249  bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
250  const MachineOperand *EndVal,
252  LoopFeederMap &LoopFeederPhi) const;
253 
254  /// Check if the given operand has a compile-time known constant
255  /// value. Return true if yes, and false otherwise. When returning true, set
256  /// Val to the corresponding constant value.
257  bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
258 
259  /// Check if the operand has a compile-time known constant value.
260  bool isImmediate(const MachineOperand &MO) const {
261  int64_t V;
262  return checkForImmediate(MO, V);
263  }
264 
265  /// Return the immediate for the specified operand.
266  int64_t getImmediate(const MachineOperand &MO) const {
267  int64_t V;
268  if (!checkForImmediate(MO, V))
269  llvm_unreachable("Invalid operand");
270  return V;
271  }
272 
273  /// Reset the given machine operand to now refer to a new immediate
274  /// value. Assumes that the operand was already referencing an immediate
275  /// value, either directly, or via a register.
276  void setImmediate(MachineOperand &MO, int64_t Val);
277 
278  /// Fix the data flow of the induction variable.
279  /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
280  /// |
281  /// +-> back to phi
282  /// where "bump" is the increment of the induction variable:
283  /// iv = iv + #const.
284  /// Due to some prior code transformations, the actual flow may look
285  /// like this:
286  /// phi -+-> bump ---> back to phi
287  /// |
288  /// +-> comparison-in-latch (against upper_bound-bump),
289  /// i.e. the comparison that controls the loop execution may be using
290  /// the value of the induction variable from before the increment.
291  ///
292  /// Return true if the loop's flow is the desired one (i.e. it's
293  /// either been fixed, or no fixing was necessary).
294  /// Otherwise, return false. This can happen if the induction variable
295  /// couldn't be identified, or if the value in the latch's comparison
296  /// cannot be adjusted to reflect the post-bump value.
297  bool fixupInductionVariable(MachineLoop *L);
298 
299  /// Given a loop, if it does not have a preheader, create one.
300  /// Return the block that is the preheader.
301  MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
302  };
303 
304  char HexagonHardwareLoops::ID = 0;
305 #ifndef NDEBUG
306  int HexagonHardwareLoops::Counter = 0;
307 #endif
308 
309  /// Abstraction for a trip count of a loop. A smaller version
310  /// of the MachineOperand class without the concerns of changing the
311  /// operand representation.
312  class CountValue {
313  public:
314  enum CountValueType {
315  CV_Register,
316  CV_Immediate
317  };
318 
319  private:
320  CountValueType Kind;
321  union Values {
322  struct {
323  unsigned Reg;
324  unsigned Sub;
325  } R;
326  unsigned ImmVal;
327  } Contents;
328 
329  public:
330  explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
331  Kind = t;
332  if (Kind == CV_Register) {
333  Contents.R.Reg = v;
334  Contents.R.Sub = u;
335  } else {
336  Contents.ImmVal = v;
337  }
338  }
339 
340  bool isReg() const { return Kind == CV_Register; }
341  bool isImm() const { return Kind == CV_Immediate; }
342 
343  unsigned getReg() const {
344  assert(isReg() && "Wrong CountValue accessor");
345  return Contents.R.Reg;
346  }
347 
348  unsigned getSubReg() const {
349  assert(isReg() && "Wrong CountValue accessor");
350  return Contents.R.Sub;
351  }
352 
353  unsigned getImm() const {
354  assert(isImm() && "Wrong CountValue accessor");
355  return Contents.ImmVal;
356  }
357 
358  void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
359  if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
360  if (isImm()) { OS << Contents.ImmVal; }
361  }
362  };
363 
364 } // end anonymous namespace
365 
366 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
367  "Hexagon Hardware Loops", false, false)
370 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
371  "Hexagon Hardware Loops", false, false)
372 
374  return new HexagonHardwareLoops();
375 }
376 
377 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
378  LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
379  if (skipFunction(MF.getFunction()))
380  return false;
381 
382  bool Changed = false;
383 
384  MLI = &getAnalysis<MachineLoopInfo>();
385  MRI = &MF.getRegInfo();
386  MDT = &getAnalysis<MachineDominatorTree>();
388  TII = HST.getInstrInfo();
389  TRI = HST.getRegisterInfo();
390 
391  for (auto &L : *MLI)
392  if (!L->getParentLoop()) {
393  bool L0Used = false;
394  bool L1Used = false;
395  Changed |= convertToHardwareLoop(L, L0Used, L1Used);
396  }
397 
398  return Changed;
399 }
400 
401 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
402  unsigned &Reg,
403  int64_t &IVBump,
404  MachineInstr *&IVOp
405  ) const {
406  MachineBasicBlock *Header = L->getHeader();
407  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
408  MachineBasicBlock *Latch = L->getLoopLatch();
409  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
410  if (!Header || !Preheader || !Latch || !ExitingBlock)
411  return false;
412 
413  // This pair represents an induction register together with an immediate
414  // value that will be added to it in each loop iteration.
415  using RegisterBump = std::pair<unsigned, int64_t>;
416 
417  // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
418  // from an induction operation
419  // R.next = R + bump
420  // where bump is an immediate value.
421  using InductionMap = std::map<unsigned, RegisterBump>;
422 
423  InductionMap IndMap;
424 
425  using instr_iterator = MachineBasicBlock::instr_iterator;
426 
427  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
428  I != E && I->isPHI(); ++I) {
429  MachineInstr *Phi = &*I;
430 
431  // Have a PHI instruction. Get the operand that corresponds to the
432  // latch block, and see if is a result of an addition of form "reg+imm",
433  // where the "reg" is defined by the PHI node we are looking at.
434  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
435  if (Phi->getOperand(i+1).getMBB() != Latch)
436  continue;
437 
438  unsigned PhiOpReg = Phi->getOperand(i).getReg();
439  MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
440 
441  if (DI->getDesc().isAdd()) {
442  // If the register operand to the add is the PHI we're looking at, this
443  // meets the induction pattern.
444  unsigned IndReg = DI->getOperand(1).getReg();
445  MachineOperand &Opnd2 = DI->getOperand(2);
446  int64_t V;
447  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
448  unsigned UpdReg = DI->getOperand(0).getReg();
449  IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
450  }
451  }
452  } // for (i)
453  } // for (instr)
454 
456  MachineBasicBlock *TB = nullptr, *FB = nullptr;
457  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
458  if (NotAnalyzed)
459  return false;
460 
461  unsigned PredR, PredPos, PredRegFlags;
462  if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
463  return false;
464 
465  MachineInstr *PredI = MRI->getVRegDef(PredR);
466  if (!PredI->isCompare())
467  return false;
468 
469  unsigned CmpReg1 = 0, CmpReg2 = 0;
470  int CmpImm = 0, CmpMask = 0;
471  bool CmpAnalyzed =
472  TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
473  // Fail if the compare was not analyzed, or it's not comparing a register
474  // with an immediate value. Not checking the mask here, since we handle
475  // the individual compare opcodes (including A4_cmpb*) later on.
476  if (!CmpAnalyzed)
477  return false;
478 
479  // Exactly one of the input registers to the comparison should be among
480  // the induction registers.
481  InductionMap::iterator IndMapEnd = IndMap.end();
482  InductionMap::iterator F = IndMapEnd;
483  if (CmpReg1 != 0) {
484  InductionMap::iterator F1 = IndMap.find(CmpReg1);
485  if (F1 != IndMapEnd)
486  F = F1;
487  }
488  if (CmpReg2 != 0) {
489  InductionMap::iterator F2 = IndMap.find(CmpReg2);
490  if (F2 != IndMapEnd) {
491  if (F != IndMapEnd)
492  return false;
493  F = F2;
494  }
495  }
496  if (F == IndMapEnd)
497  return false;
498 
499  Reg = F->second.first;
500  IVBump = F->second.second;
501  IVOp = MRI->getVRegDef(F->first);
502  return true;
503 }
504 
505 // Return the comparison kind for the specified opcode.
506 HexagonHardwareLoops::Comparison::Kind
507 HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
508  MachineOperand *InitialValue,
509  const MachineOperand *EndValue,
510  int64_t IVBump) const {
512  switch (CondOpc) {
513  case Hexagon::C2_cmpeq:
514  case Hexagon::C2_cmpeqi:
515  case Hexagon::C2_cmpeqp:
516  Cmp = Comparison::EQ;
517  break;
518  case Hexagon::C4_cmpneq:
519  case Hexagon::C4_cmpneqi:
520  Cmp = Comparison::NE;
521  break;
522  case Hexagon::C2_cmplt:
523  Cmp = Comparison::LTs;
524  break;
525  case Hexagon::C2_cmpltu:
526  Cmp = Comparison::LTu;
527  break;
528  case Hexagon::C4_cmplte:
529  case Hexagon::C4_cmpltei:
530  Cmp = Comparison::LEs;
531  break;
532  case Hexagon::C4_cmplteu:
533  case Hexagon::C4_cmplteui:
534  Cmp = Comparison::LEu;
535  break;
536  case Hexagon::C2_cmpgt:
537  case Hexagon::C2_cmpgti:
538  case Hexagon::C2_cmpgtp:
539  Cmp = Comparison::GTs;
540  break;
541  case Hexagon::C2_cmpgtu:
542  case Hexagon::C2_cmpgtui:
543  case Hexagon::C2_cmpgtup:
544  Cmp = Comparison::GTu;
545  break;
546  case Hexagon::C2_cmpgei:
547  Cmp = Comparison::GEs;
548  break;
549  case Hexagon::C2_cmpgeui:
550  Cmp = Comparison::GEs;
551  break;
552  default:
553  return (Comparison::Kind)0;
554  }
555  return Cmp;
556 }
557 
558 /// Analyze the statements in a loop to determine if the loop has
559 /// a computable trip count and, if so, return a value that represents
560 /// the trip count expression.
561 ///
562 /// This function iterates over the phi nodes in the loop to check for
563 /// induction variable patterns that are used in the calculation for
564 /// the number of time the loop is executed.
565 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
567  MachineBasicBlock *TopMBB = L->getTopBlock();
569  assert(PI != TopMBB->pred_end() &&
570  "Loop must have more than one incoming edge!");
571  MachineBasicBlock *Backedge = *PI++;
572  if (PI == TopMBB->pred_end()) // dead loop?
573  return nullptr;
574  MachineBasicBlock *Incoming = *PI++;
575  if (PI != TopMBB->pred_end()) // multiple backedges?
576  return nullptr;
577 
578  // Make sure there is one incoming and one backedge and determine which
579  // is which.
580  if (L->contains(Incoming)) {
581  if (L->contains(Backedge))
582  return nullptr;
583  std::swap(Incoming, Backedge);
584  } else if (!L->contains(Backedge))
585  return nullptr;
586 
587  // Look for the cmp instruction to determine if we can get a useful trip
588  // count. The trip count can be either a register or an immediate. The
589  // location of the value depends upon the type (reg or imm).
590  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
591  if (!ExitingBlock)
592  return nullptr;
593 
594  unsigned IVReg = 0;
595  int64_t IVBump = 0;
596  MachineInstr *IVOp;
597  bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
598  if (!FoundIV)
599  return nullptr;
600 
601  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
602 
603  MachineOperand *InitialValue = nullptr;
604  MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
605  MachineBasicBlock *Latch = L->getLoopLatch();
606  for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
607  MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
608  if (MBB == Preheader)
609  InitialValue = &IV_Phi->getOperand(i);
610  else if (MBB == Latch)
611  IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
612  }
613  if (!InitialValue)
614  return nullptr;
615 
617  MachineBasicBlock *TB = nullptr, *FB = nullptr;
618  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
619  if (NotAnalyzed)
620  return nullptr;
621 
622  MachineBasicBlock *Header = L->getHeader();
623  // TB must be non-null. If FB is also non-null, one of them must be
624  // the header. Otherwise, branch to TB could be exiting the loop, and
625  // the fall through can go to the header.
626  assert (TB && "Exit block without a branch?");
627  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
628  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
630  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
631  if (NotAnalyzed)
632  return nullptr;
633  if (TB == Latch)
634  TB = (LTB == Header) ? LTB : LFB;
635  else
636  FB = (LTB == Header) ? LTB: LFB;
637  }
638  assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
639  if (!TB || (FB && TB != Header && FB != Header))
640  return nullptr;
641 
642  // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
643  // to put imm(0), followed by P in the vector Cond.
644  // If TB is not the header, it means that the "not-taken" path must lead
645  // to the header.
646  bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
647  unsigned PredReg, PredPos, PredRegFlags;
648  if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
649  return nullptr;
650  MachineInstr *CondI = MRI->getVRegDef(PredReg);
651  unsigned CondOpc = CondI->getOpcode();
652 
653  unsigned CmpReg1 = 0, CmpReg2 = 0;
654  int Mask = 0, ImmValue = 0;
655  bool AnalyzedCmp =
656  TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
657  if (!AnalyzedCmp)
658  return nullptr;
659 
660  // The comparison operator type determines how we compute the loop
661  // trip count.
662  OldInsts.push_back(CondI);
663  OldInsts.push_back(IVOp);
664 
665  // Sadly, the following code gets information based on the position
666  // of the operands in the compare instruction. This has to be done
667  // this way, because the comparisons check for a specific relationship
668  // between the operands (e.g. is-less-than), rather than to find out
669  // what relationship the operands are in (as on PPC).
670  Comparison::Kind Cmp;
671  bool isSwapped = false;
672  const MachineOperand &Op1 = CondI->getOperand(1);
673  const MachineOperand &Op2 = CondI->getOperand(2);
674  const MachineOperand *EndValue = nullptr;
675 
676  if (Op1.isReg()) {
677  if (Op2.isImm() || Op1.getReg() == IVReg)
678  EndValue = &Op2;
679  else {
680  EndValue = &Op1;
681  isSwapped = true;
682  }
683  }
684 
685  if (!EndValue)
686  return nullptr;
687 
688  Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
689  if (!Cmp)
690  return nullptr;
691  if (Negated)
692  Cmp = Comparison::getNegatedComparison(Cmp);
693  if (isSwapped)
694  Cmp = Comparison::getSwappedComparison(Cmp);
695 
696  if (InitialValue->isReg()) {
697  unsigned R = InitialValue->getReg();
698  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
699  if (!MDT->properlyDominates(DefBB, Header)) {
700  int64_t V;
701  if (!checkForImmediate(*InitialValue, V))
702  return nullptr;
703  }
704  OldInsts.push_back(MRI->getVRegDef(R));
705  }
706  if (EndValue->isReg()) {
707  unsigned R = EndValue->getReg();
708  MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
709  if (!MDT->properlyDominates(DefBB, Header)) {
710  int64_t V;
711  if (!checkForImmediate(*EndValue, V))
712  return nullptr;
713  }
714  OldInsts.push_back(MRI->getVRegDef(R));
715  }
716 
717  return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
718 }
719 
720 /// Helper function that returns the expression that represents the
721 /// number of times a loop iterates. The function takes the operands that
722 /// represent the loop start value, loop end value, and induction value.
723 /// Based upon these operands, the function attempts to compute the trip count.
724 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
725  const MachineOperand *Start,
726  const MachineOperand *End,
727  unsigned IVReg,
728  int64_t IVBump,
729  Comparison::Kind Cmp) const {
730  // Cannot handle comparison EQ, i.e. while (A == B).
731  if (Cmp == Comparison::EQ)
732  return nullptr;
733 
734  // Check if either the start or end values are an assignment of an immediate.
735  // If so, use the immediate value rather than the register.
736  if (Start->isReg()) {
737  const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
738  if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
739  StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
740  Start = &StartValInstr->getOperand(1);
741  }
742  if (End->isReg()) {
743  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
744  if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
745  EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
746  End = &EndValInstr->getOperand(1);
747  }
748 
749  if (!Start->isReg() && !Start->isImm())
750  return nullptr;
751  if (!End->isReg() && !End->isImm())
752  return nullptr;
753 
754  bool CmpLess = Cmp & Comparison::L;
755  bool CmpGreater = Cmp & Comparison::G;
756  bool CmpHasEqual = Cmp & Comparison::EQ;
757 
758  // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
759  if (CmpLess && IVBump < 0)
760  // Loop going while iv is "less" with the iv value going down. Must wrap.
761  return nullptr;
762 
763  if (CmpGreater && IVBump > 0)
764  // Loop going while iv is "greater" with the iv value going up. Must wrap.
765  return nullptr;
766 
767  // Phis that may feed into the loop.
768  LoopFeederMap LoopFeederPhi;
769 
770  // Check if the initial value may be zero and can be decremented in the first
771  // iteration. If the value is zero, the endloop instruction will not decrement
772  // the loop counter, so we shouldn't generate a hardware loop in this case.
773  if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
774  LoopFeederPhi))
775  return nullptr;
776 
777  if (Start->isImm() && End->isImm()) {
778  // Both, start and end are immediates.
779  int64_t StartV = Start->getImm();
780  int64_t EndV = End->getImm();
781  int64_t Dist = EndV - StartV;
782  if (Dist == 0)
783  return nullptr;
784 
785  bool Exact = (Dist % IVBump) == 0;
786 
787  if (Cmp == Comparison::NE) {
788  if (!Exact)
789  return nullptr;
790  if ((Dist < 0) ^ (IVBump < 0))
791  return nullptr;
792  }
793 
794  // For comparisons that include the final value (i.e. include equality
795  // with the final value), we need to increase the distance by 1.
796  if (CmpHasEqual)
797  Dist = Dist > 0 ? Dist+1 : Dist-1;
798 
799  // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
800  // CmpGreater should imply Dist < 0. These conditions could actually
801  // fail, for example, in unreachable code (which may still appear to be
802  // reachable in the CFG).
803  if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
804  return nullptr;
805 
806  // "Normalized" distance, i.e. with the bump set to +-1.
807  int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
808  : (-Dist + (-IVBump - 1)) / (-IVBump);
809  assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
810 
811  uint64_t Count = Dist1;
812 
813  if (Count > 0xFFFFFFFFULL)
814  return nullptr;
815 
816  return new CountValue(CountValue::CV_Immediate, Count);
817  }
818 
819  // A general case: Start and End are some values, but the actual
820  // iteration count may not be available. If it is not, insert
821  // a computation of it into the preheader.
822 
823  // If the induction variable bump is not a power of 2, quit.
824  // Othwerise we'd need a general integer division.
825  if (!isPowerOf2_64(std::abs(IVBump)))
826  return nullptr;
827 
829  assert (PH && "Should have a preheader by now");
831  DebugLoc DL;
832  if (InsertPos != PH->end())
833  DL = InsertPos->getDebugLoc();
834 
835  // If Start is an immediate and End is a register, the trip count
836  // will be "reg - imm". Hexagon's "subtract immediate" instruction
837  // is actually "reg + -imm".
838 
839  // If the loop IV is going downwards, i.e. if the bump is negative,
840  // then the iteration count (computed as End-Start) will need to be
841  // negated. To avoid the negation, just swap Start and End.
842  if (IVBump < 0) {
843  std::swap(Start, End);
844  IVBump = -IVBump;
845  }
846  // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
847  // Signedness, and "including equality" are preserved.
848 
849  bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
850  bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
851 
852  int64_t StartV = 0, EndV = 0;
853  if (Start->isImm())
854  StartV = Start->getImm();
855  if (End->isImm())
856  EndV = End->getImm();
857 
858  int64_t AdjV = 0;
859  // To compute the iteration count, we would need this computation:
860  // Count = (End - Start + (IVBump-1)) / IVBump
861  // or, when CmpHasEqual:
862  // Count = (End - Start + (IVBump-1)+1) / IVBump
863  // The "IVBump-1" part is the adjustment (AdjV). We can avoid
864  // generating an instruction specifically to add it if we can adjust
865  // the immediate values for Start or End.
866 
867  if (CmpHasEqual) {
868  // Need to add 1 to the total iteration count.
869  if (Start->isImm())
870  StartV--;
871  else if (End->isImm())
872  EndV++;
873  else
874  AdjV += 1;
875  }
876 
877  if (Cmp != Comparison::NE) {
878  if (Start->isImm())
879  StartV -= (IVBump-1);
880  else if (End->isImm())
881  EndV += (IVBump-1);
882  else
883  AdjV += (IVBump-1);
884  }
885 
886  unsigned R = 0, SR = 0;
887  if (Start->isReg()) {
888  R = Start->getReg();
889  SR = Start->getSubReg();
890  } else {
891  R = End->getReg();
892  SR = End->getSubReg();
893  }
894  const TargetRegisterClass *RC = MRI->getRegClass(R);
895  // Hardware loops cannot handle 64-bit registers. If it's a double
896  // register, it has to have a subregister.
897  if (!SR && RC == &Hexagon::DoubleRegsRegClass)
898  return nullptr;
899  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
900 
901  // Compute DistR (register with the distance between Start and End).
902  unsigned DistR, DistSR;
903 
904  // Avoid special case, where the start value is an imm(0).
905  if (Start->isImm() && StartV == 0) {
906  DistR = End->getReg();
907  DistSR = End->getSubReg();
908  } else {
909  const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
910  (RegToImm ? TII->get(Hexagon::A2_subri) :
911  TII->get(Hexagon::A2_addi));
912  if (RegToReg || RegToImm) {
913  unsigned SubR = MRI->createVirtualRegister(IntRC);
914  MachineInstrBuilder SubIB =
915  BuildMI(*PH, InsertPos, DL, SubD, SubR);
916 
917  if (RegToReg)
918  SubIB.addReg(End->getReg(), 0, End->getSubReg())
919  .addReg(Start->getReg(), 0, Start->getSubReg());
920  else
921  SubIB.addImm(EndV)
922  .addReg(Start->getReg(), 0, Start->getSubReg());
923  DistR = SubR;
924  } else {
925  // If the loop has been unrolled, we should use the original loop count
926  // instead of recalculating the value. This will avoid additional
927  // 'Add' instruction.
928  const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
929  if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
930  EndValInstr->getOperand(1).getSubReg() == 0 &&
931  EndValInstr->getOperand(2).getImm() == StartV) {
932  DistR = EndValInstr->getOperand(1).getReg();
933  } else {
934  unsigned SubR = MRI->createVirtualRegister(IntRC);
935  MachineInstrBuilder SubIB =
936  BuildMI(*PH, InsertPos, DL, SubD, SubR);
937  SubIB.addReg(End->getReg(), 0, End->getSubReg())
938  .addImm(-StartV);
939  DistR = SubR;
940  }
941  }
942  DistSR = 0;
943  }
944 
945  // From DistR, compute AdjR (register with the adjusted distance).
946  unsigned AdjR, AdjSR;
947 
948  if (AdjV == 0) {
949  AdjR = DistR;
950  AdjSR = DistSR;
951  } else {
952  // Generate CountR = ADD DistR, AdjVal
953  unsigned AddR = MRI->createVirtualRegister(IntRC);
954  MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
955  BuildMI(*PH, InsertPos, DL, AddD, AddR)
956  .addReg(DistR, 0, DistSR)
957  .addImm(AdjV);
958 
959  AdjR = AddR;
960  AdjSR = 0;
961  }
962 
963  // From AdjR, compute CountR (register with the final count).
964  unsigned CountR, CountSR;
965 
966  if (IVBump == 1) {
967  CountR = AdjR;
968  CountSR = AdjSR;
969  } else {
970  // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
971  unsigned Shift = Log2_32(IVBump);
972 
973  // Generate NormR = LSR DistR, Shift.
974  unsigned LsrR = MRI->createVirtualRegister(IntRC);
975  const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
976  BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
977  .addReg(AdjR, 0, AdjSR)
978  .addImm(Shift);
979 
980  CountR = LsrR;
981  CountSR = 0;
982  }
983 
984  return new CountValue(CountValue::CV_Register, CountR, CountSR);
985 }
986 
987 /// Return true if the operation is invalid within hardware loop.
988 bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
989  bool IsInnerHWLoop) const {
990  // Call is not allowed because the callee may use a hardware loop except for
991  // the case when the call never returns.
992  if (MI->getDesc().isCall())
993  return !TII->doesNotReturn(*MI);
994 
995  // Check if the instruction defines a hardware loop register.
996  using namespace Hexagon;
997 
998  static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
999  static const unsigned Regs1[] = { LC1, SA1 };
1000  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
1001  : makeArrayRef(Regs1, array_lengthof(Regs1));
1002  for (unsigned R : CheckRegs)
1003  if (MI->modifiesRegister(R, TRI))
1004  return true;
1005 
1006  return false;
1007 }
1008 
1009 /// Return true if the loop contains an instruction that inhibits
1010 /// the use of the hardware loop instruction.
1011 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1012  bool IsInnerHWLoop) const {
1013  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1014  << printMBBReference(**L->block_begin()));
1015  for (MachineBasicBlock *MBB : L->getBlocks()) {
1017  MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
1018  const MachineInstr *MI = &*MII;
1019  if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
1020  LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1021  MI->dump(););
1022  return true;
1023  }
1024  }
1025  }
1026  return false;
1027 }
1028 
1029 /// Returns true if the instruction is dead. This was essentially
1030 /// copied from DeadMachineInstructionElim::isDead, but with special cases
1031 /// for inline asm, physical registers and instructions with side effects
1032 /// removed.
1033 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1034  SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1035  // Examine each operand.
1036  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1037  const MachineOperand &MO = MI->getOperand(i);
1038  if (!MO.isReg() || !MO.isDef())
1039  continue;
1040 
1041  unsigned Reg = MO.getReg();
1042  if (MRI->use_nodbg_empty(Reg))
1043  continue;
1044 
1045  using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1046 
1047  // This instruction has users, but if the only user is the phi node for the
1048  // parent block, and the only use of that phi node is this instruction, then
1049  // this instruction is dead: both it (and the phi node) can be removed.
1050  use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1051  use_nodbg_iterator End = MRI->use_nodbg_end();
1052  if (std::next(I) != End || !I->getParent()->isPHI())
1053  return false;
1054 
1055  MachineInstr *OnePhi = I->getParent();
1056  for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1057  const MachineOperand &OPO = OnePhi->getOperand(j);
1058  if (!OPO.isReg() || !OPO.isDef())
1059  continue;
1060 
1061  unsigned OPReg = OPO.getReg();
1062  use_nodbg_iterator nextJ;
1063  for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1064  J != End; J = nextJ) {
1065  nextJ = std::next(J);
1066  MachineOperand &Use = *J;
1067  MachineInstr *UseMI = Use.getParent();
1068 
1069  // If the phi node has a user that is not MI, bail.
1070  if (MI != UseMI)
1071  return false;
1072  }
1073  }
1074  DeadPhis.push_back(OnePhi);
1075  }
1076 
1077  // If there are no defs with uses, the instruction is dead.
1078  return true;
1079 }
1080 
1081 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1082  // This procedure was essentially copied from DeadMachineInstructionElim.
1083 
1085  if (isDead(MI, DeadPhis)) {
1086  LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1087 
1088  // It is possible that some DBG_VALUE instructions refer to this
1089  // instruction. Examine each def operand for such references;
1090  // if found, mark the DBG_VALUE as undef (but don't delete it).
1091  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1092  const MachineOperand &MO = MI->getOperand(i);
1093  if (!MO.isReg() || !MO.isDef())
1094  continue;
1095  unsigned Reg = MO.getReg();
1098  E = MRI->use_end(); I != E; I = nextI) {
1099  nextI = std::next(I); // I is invalidated by the setReg
1100  MachineOperand &Use = *I;
1101  MachineInstr *UseMI = I->getParent();
1102  if (UseMI == MI)
1103  continue;
1104  if (Use.isDebug())
1105  UseMI->getOperand(0).setReg(0U);
1106  }
1107  }
1108 
1109  MI->eraseFromParent();
1110  for (unsigned i = 0; i < DeadPhis.size(); ++i)
1111  DeadPhis[i]->eraseFromParent();
1112  }
1113 }
1114 
1115 /// Check if the loop is a candidate for converting to a hardware
1116 /// loop. If so, then perform the transformation.
1117 ///
1118 /// This function works on innermost loops first. A loop can be converted
1119 /// if it is a counting loop; either a register value or an immediate.
1120 ///
1121 /// The code makes several assumptions about the representation of the loop
1122 /// in llvm.
1123 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1124  bool &RecL0used,
1125  bool &RecL1used) {
1126  // This is just for sanity.
1127  assert(L->getHeader() && "Loop without a header?");
1128 
1129  bool Changed = false;
1130  bool L0Used = false;
1131  bool L1Used = false;
1132 
1133  // Process nested loops first.
1134  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
1135  Changed |= convertToHardwareLoop(*I, RecL0used, RecL1used);
1136  L0Used |= RecL0used;
1137  L1Used |= RecL1used;
1138  }
1139 
1140  // If a nested loop has been converted, then we can't convert this loop.
1141  if (Changed && L0Used && L1Used)
1142  return Changed;
1143 
1144  unsigned LOOP_i;
1145  unsigned LOOP_r;
1146  unsigned ENDLOOP;
1147 
1148  // Flag used to track loopN instruction:
1149  // 1 - Hardware loop is being generated for the inner most loop.
1150  // 0 - Hardware loop is being generated for the outer loop.
1151  unsigned IsInnerHWLoop = 1;
1152 
1153  if (L0Used) {
1154  LOOP_i = Hexagon::J2_loop1i;
1155  LOOP_r = Hexagon::J2_loop1r;
1156  ENDLOOP = Hexagon::ENDLOOP1;
1157  IsInnerHWLoop = 0;
1158  } else {
1159  LOOP_i = Hexagon::J2_loop0i;
1160  LOOP_r = Hexagon::J2_loop0r;
1161  ENDLOOP = Hexagon::ENDLOOP0;
1162  }
1163 
1164 #ifndef NDEBUG
1165  // Stop trying after reaching the limit (if any).
1166  int Limit = HWLoopLimit;
1167  if (Limit >= 0) {
1168  if (Counter >= HWLoopLimit)
1169  return false;
1170  Counter++;
1171  }
1172 #endif
1173 
1174  // Does the loop contain any invalid instructions?
1175  if (containsInvalidInstruction(L, IsInnerHWLoop))
1176  return false;
1177 
1178  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1179  // Don't generate hw loop if the loop has more than one exit.
1180  if (!LastMBB)
1181  return false;
1182 
1184  if (LastI == LastMBB->end())
1185  return false;
1186 
1187  // Is the induction variable bump feeding the latch condition?
1188  if (!fixupInductionVariable(L))
1189  return false;
1190 
1191  // Ensure the loop has a preheader: the loop instruction will be
1192  // placed there.
1193  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1194  if (!Preheader) {
1195  Preheader = createPreheaderForLoop(L);
1196  if (!Preheader)
1197  return false;
1198  }
1199 
1200  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1201 
1203  // Are we able to determine the trip count for the loop?
1204  CountValue *TripCount = getLoopTripCount(L, OldInsts);
1205  if (!TripCount)
1206  return false;
1207 
1208  // Is the trip count available in the preheader?
1209  if (TripCount->isReg()) {
1210  // There will be a use of the register inserted into the preheader,
1211  // so make sure that the register is actually defined at that point.
1212  MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1213  MachineBasicBlock *BBDef = TCDef->getParent();
1214  if (!MDT->dominates(BBDef, Preheader))
1215  return false;
1216  }
1217 
1218  // Determine the loop start.
1219  MachineBasicBlock *TopBlock = L->getTopBlock();
1220  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1221  MachineBasicBlock *LoopStart = nullptr;
1222  if (ExitingBlock != L->getLoopLatch()) {
1223  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1225 
1226  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1227  return false;
1228 
1229  if (L->contains(TB))
1230  LoopStart = TB;
1231  else if (L->contains(FB))
1232  LoopStart = FB;
1233  else
1234  return false;
1235  }
1236  else
1237  LoopStart = TopBlock;
1238 
1239  // Convert the loop to a hardware loop.
1240  LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1241  DebugLoc DL;
1242  if (InsertPos != Preheader->end())
1243  DL = InsertPos->getDebugLoc();
1244 
1245  if (TripCount->isReg()) {
1246  // Create a copy of the loop count register.
1247  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1248  BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1249  .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1250  // Add the Loop instruction to the beginning of the loop.
1251  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1252  .addReg(CountReg);
1253  } else {
1254  assert(TripCount->isImm() && "Expecting immediate value for trip count");
1255  // Add the Loop immediate instruction to the beginning of the loop,
1256  // if the immediate fits in the instructions. Otherwise, we need to
1257  // create a new virtual register.
1258  int64_t CountImm = TripCount->getImm();
1259  if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1260  unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1261  BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1262  .addImm(CountImm);
1263  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1264  .addMBB(LoopStart).addReg(CountReg);
1265  } else
1266  BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1267  .addMBB(LoopStart).addImm(CountImm);
1268  }
1269 
1270  // Make sure the loop start always has a reference in the CFG. We need
1271  // to create a BlockAddress operand to get this mechanism to work both the
1272  // MachineBasicBlock and BasicBlock objects need the flag set.
1273  LoopStart->setHasAddressTaken();
1274  // This line is needed to set the hasAddressTaken flag on the BasicBlock
1275  // object.
1276  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
1277 
1278  // Replace the loop branch with an endloop instruction.
1279  DebugLoc LastIDL = LastI->getDebugLoc();
1280  BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1281 
1282  // The loop ends with either:
1283  // - a conditional branch followed by an unconditional branch, or
1284  // - a conditional branch to the loop start.
1285  if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1286  LastI->getOpcode() == Hexagon::J2_jumpf) {
1287  // Delete one and change/add an uncond. branch to out of the loop.
1288  MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1289  LastI = LastMBB->erase(LastI);
1290  if (!L->contains(BranchTarget)) {
1291  if (LastI != LastMBB->end())
1292  LastI = LastMBB->erase(LastI);
1294  TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1295  }
1296  } else {
1297  // Conditional branch to loop start; just delete it.
1298  LastMBB->erase(LastI);
1299  }
1300  delete TripCount;
1301 
1302  // The induction operation and the comparison may now be
1303  // unneeded. If these are unneeded, then remove them.
1304  for (unsigned i = 0; i < OldInsts.size(); ++i)
1305  removeIfDead(OldInsts[i]);
1306 
1307  ++NumHWLoops;
1308 
1309  // Set RecL1used and RecL0used only after hardware loop has been
1310  // successfully generated. Doing it earlier can cause wrong loop instruction
1311  // to be used.
1312  if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1313  RecL1used = true;
1314  else
1315  RecL0used = true;
1316 
1317  return true;
1318 }
1319 
1320 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1321  MachineInstr *CmpI) {
1322  assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1323 
1324  MachineBasicBlock *BB = BumpI->getParent();
1325  if (CmpI->getParent() != BB)
1326  return false;
1327 
1328  using instr_iterator = MachineBasicBlock::instr_iterator;
1329 
1330  // Check if things are in order to begin with.
1331  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1332  if (&*I == CmpI)
1333  return true;
1334 
1335  // Out of order.
1336  unsigned PredR = CmpI->getOperand(0).getReg();
1337  bool FoundBump = false;
1338  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1339  for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1340  MachineInstr *In = &*I;
1341  for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1342  MachineOperand &MO = In->getOperand(i);
1343  if (MO.isReg() && MO.isUse()) {
1344  if (MO.getReg() == PredR) // Found an intervening use of PredR.
1345  return false;
1346  }
1347  }
1348 
1349  if (In == BumpI) {
1350  BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1351  FoundBump = true;
1352  break;
1353  }
1354  }
1355  assert (FoundBump && "Cannot determine instruction order");
1356  return FoundBump;
1357 }
1358 
1359 /// This function is required to break recursion. Visiting phis in a loop may
1360 /// result in recursion during compilation. We break the recursion by making
1361 /// sure that we visit a MachineOperand and its definition in a
1362 /// MachineInstruction only once. If we attempt to visit more than once, then
1363 /// there is recursion, and will return false.
1364 bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1365  MachineInstr *MI,
1366  const MachineOperand *MO,
1367  LoopFeederMap &LoopFeederPhi) const {
1368  if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1369  LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1370  << printMBBReference(**L->block_begin()));
1371  // Ignore all BBs that form Loop.
1372  for (MachineBasicBlock *MBB : L->getBlocks()) {
1373  if (A == MBB)
1374  return false;
1375  }
1376  MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1377  LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1378  return true;
1379  } else
1380  // Already visited node.
1381  return false;
1382 }
1383 
1384 /// Return true if a Phi may generate a value that can underflow.
1385 /// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1386 bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1387  MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1388  MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1389  assert(Phi->isPHI() && "Expecting a Phi.");
1390  // Walk through each Phi, and its used operands. Make sure that
1391  // if there is recursion in Phi, we won't generate hardware loops.
1392  for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1393  if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1394  if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1395  Phi->getParent(), L, LoopFeederPhi))
1396  return true;
1397  return false;
1398 }
1399 
1400 /// Return true if the induction variable can underflow in the first iteration.
1401 /// An example, is an initial unsigned value that is 0 and is decrement in the
1402 /// first itertion of a do-while loop. In this case, we cannot generate a
1403 /// hardware loop because the endloop instruction does not decrement the loop
1404 /// counter if it is <= 1. We only need to perform this analysis if the
1405 /// initial value is a register.
1406 ///
1407 /// This function assumes the initial value may underfow unless proven
1408 /// otherwise. If the type is signed, then we don't care because signed
1409 /// underflow is undefined. We attempt to prove the initial value is not
1410 /// zero by perfoming a crude analysis of the loop counter. This function
1411 /// checks if the initial value is used in any comparison prior to the loop
1412 /// and, if so, assumes the comparison is a range check. This is inexact,
1413 /// but will catch the simple cases.
1414 bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1415  const MachineOperand *InitVal, const MachineOperand *EndVal,
1416  MachineBasicBlock *MBB, MachineLoop *L,
1417  LoopFeederMap &LoopFeederPhi) const {
1418  // Only check register values since they are unknown.
1419  if (!InitVal->isReg())
1420  return false;
1421 
1422  if (!EndVal->isImm())
1423  return false;
1424 
1425  // A register value that is assigned an immediate is a known value, and it
1426  // won't underflow in the first iteration.
1427  int64_t Imm;
1428  if (checkForImmediate(*InitVal, Imm))
1429  return (EndVal->getImm() == Imm);
1430 
1431  unsigned Reg = InitVal->getReg();
1432 
1433  // We don't know the value of a physical register.
1435  return true;
1436 
1437  MachineInstr *Def = MRI->getVRegDef(Reg);
1438  if (!Def)
1439  return true;
1440 
1441  // If the initial value is a Phi or copy and the operands may not underflow,
1442  // then the definition cannot be underflow either.
1443  if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1444  L, LoopFeederPhi))
1445  return false;
1446  if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1447  EndVal, Def->getParent(),
1448  L, LoopFeederPhi))
1449  return false;
1450 
1451  // Iterate over the uses of the initial value. If the initial value is used
1452  // in a compare, then we assume this is a range check that ensures the loop
1453  // doesn't underflow. This is not an exact test and should be improved.
1455  E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1456  MachineInstr *MI = &*I;
1457  unsigned CmpReg1 = 0, CmpReg2 = 0;
1458  int CmpMask = 0, CmpValue = 0;
1459 
1460  if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1461  continue;
1462 
1463  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1465  if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1466  continue;
1467 
1468  Comparison::Kind Cmp =
1469  getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1470  if (Cmp == 0)
1471  continue;
1472  if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1473  Cmp = Comparison::getNegatedComparison(Cmp);
1474  if (CmpReg2 != 0 && CmpReg2 == Reg)
1475  Cmp = Comparison::getSwappedComparison(Cmp);
1476 
1477  // Signed underflow is undefined.
1478  if (Comparison::isSigned(Cmp))
1479  return false;
1480 
1481  // Check if there is a comparison of the initial value. If the initial value
1482  // is greater than or not equal to another value, then assume this is a
1483  // range check.
1484  if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1485  return false;
1486  }
1487 
1488  // OK - this is a hack that needs to be improved. We really need to analyze
1489  // the instructions performed on the initial value. This works on the simplest
1490  // cases only.
1491  if (!Def->isCopy() && !Def->isPHI())
1492  return false;
1493 
1494  return true;
1495 }
1496 
1497 bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1498  int64_t &Val) const {
1499  if (MO.isImm()) {
1500  Val = MO.getImm();
1501  return true;
1502  }
1503  if (!MO.isReg())
1504  return false;
1505 
1506  // MO is a register. Check whether it is defined as an immediate value,
1507  // and if so, get the value of it in TV. That value will then need to be
1508  // processed to handle potential subregisters in MO.
1509  int64_t TV;
1510 
1511  unsigned R = MO.getReg();
1513  return false;
1514  MachineInstr *DI = MRI->getVRegDef(R);
1515  unsigned DOpc = DI->getOpcode();
1516  switch (DOpc) {
1517  case TargetOpcode::COPY:
1518  case Hexagon::A2_tfrsi:
1519  case Hexagon::A2_tfrpi:
1520  case Hexagon::CONST32:
1521  case Hexagon::CONST64:
1522  // Call recursively to avoid an extra check whether operand(1) is
1523  // indeed an immediate (it could be a global address, for example),
1524  // plus we can handle COPY at the same time.
1525  if (!checkForImmediate(DI->getOperand(1), TV))
1526  return false;
1527  break;
1528  case Hexagon::A2_combineii:
1529  case Hexagon::A4_combineir:
1530  case Hexagon::A4_combineii:
1531  case Hexagon::A4_combineri:
1532  case Hexagon::A2_combinew: {
1533  const MachineOperand &S1 = DI->getOperand(1);
1534  const MachineOperand &S2 = DI->getOperand(2);
1535  int64_t V1, V2;
1536  if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1537  return false;
1538  TV = V2 | (static_cast<uint64_t>(V1) << 32);
1539  break;
1540  }
1541  case TargetOpcode::REG_SEQUENCE: {
1542  const MachineOperand &S1 = DI->getOperand(1);
1543  const MachineOperand &S3 = DI->getOperand(3);
1544  int64_t V1, V3;
1545  if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1546  return false;
1547  unsigned Sub2 = DI->getOperand(2).getImm();
1548  unsigned Sub4 = DI->getOperand(4).getImm();
1549  if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1550  TV = V1 | (V3 << 32);
1551  else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1552  TV = V3 | (V1 << 32);
1553  else
1554  llvm_unreachable("Unexpected form of REG_SEQUENCE");
1555  break;
1556  }
1557 
1558  default:
1559  return false;
1560  }
1561 
1562  // By now, we should have successfully obtained the immediate value defining
1563  // the register referenced in MO. Handle a potential use of a subregister.
1564  switch (MO.getSubReg()) {
1565  case Hexagon::isub_lo:
1566  Val = TV & 0xFFFFFFFFULL;
1567  break;
1568  case Hexagon::isub_hi:
1569  Val = (TV >> 32) & 0xFFFFFFFFULL;
1570  break;
1571  default:
1572  Val = TV;
1573  break;
1574  }
1575  return true;
1576 }
1577 
1578 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1579  if (MO.isImm()) {
1580  MO.setImm(Val);
1581  return;
1582  }
1583 
1584  assert(MO.isReg());
1585  unsigned R = MO.getReg();
1586  MachineInstr *DI = MRI->getVRegDef(R);
1587 
1588  const TargetRegisterClass *RC = MRI->getRegClass(R);
1589  unsigned NewR = MRI->createVirtualRegister(RC);
1590  MachineBasicBlock &B = *DI->getParent();
1591  DebugLoc DL = DI->getDebugLoc();
1592  BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1593  MO.setReg(NewR);
1594 }
1595 
1596 static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
1597  // These two instructions are not extendable.
1598  if (CmpOpc == Hexagon::A4_cmpbeqi)
1599  return isUInt<8>(Imm);
1600  if (CmpOpc == Hexagon::A4_cmpbgti)
1601  return isInt<8>(Imm);
1602  // The rest of the comparison-with-immediate instructions are extendable.
1603  return true;
1604 }
1605 
1606 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1607  MachineBasicBlock *Header = L->getHeader();
1608  MachineBasicBlock *Latch = L->getLoopLatch();
1609  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1610 
1611  if (!(Header && Latch && ExitingBlock))
1612  return false;
1613 
1614  // These data structures follow the same concept as the corresponding
1615  // ones in findInductionRegister (where some comments are).
1616  using RegisterBump = std::pair<unsigned, int64_t>;
1617  using RegisterInduction = std::pair<unsigned, RegisterBump>;
1618  using RegisterInductionSet = std::set<RegisterInduction>;
1619 
1620  // Register candidates for induction variables, with their associated bumps.
1621  RegisterInductionSet IndRegs;
1622 
1623  // Look for induction patterns:
1624  // %1 = PHI ..., [ latch, %2 ]
1625  // %2 = ADD %1, imm
1626  using instr_iterator = MachineBasicBlock::instr_iterator;
1627 
1628  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1629  I != E && I->isPHI(); ++I) {
1630  MachineInstr *Phi = &*I;
1631 
1632  // Have a PHI instruction.
1633  for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1634  if (Phi->getOperand(i+1).getMBB() != Latch)
1635  continue;
1636 
1637  unsigned PhiReg = Phi->getOperand(i).getReg();
1638  MachineInstr *DI = MRI->getVRegDef(PhiReg);
1639 
1640  if (DI->getDesc().isAdd()) {
1641  // If the register operand to the add/sub is the PHI we are looking
1642  // at, this meets the induction pattern.
1643  unsigned IndReg = DI->getOperand(1).getReg();
1644  MachineOperand &Opnd2 = DI->getOperand(2);
1645  int64_t V;
1646  if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1647  unsigned UpdReg = DI->getOperand(0).getReg();
1648  IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1649  }
1650  }
1651  } // for (i)
1652  } // for (instr)
1653 
1654  if (IndRegs.empty())
1655  return false;
1656 
1657  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1659  // AnalyzeBranch returns true if it fails to analyze branch.
1660  bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1661  if (NotAnalyzed || Cond.empty())
1662  return false;
1663 
1664  if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1665  MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1667  bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1668  if (NotAnalyzed)
1669  return false;
1670 
1671  // Since latch is not the exiting block, the latch branch should be an
1672  // unconditional branch to the loop header.
1673  if (TB == Latch)
1674  TB = (LTB == Header) ? LTB : LFB;
1675  else
1676  FB = (LTB == Header) ? LTB : LFB;
1677  }
1678  if (TB != Header) {
1679  if (FB != Header) {
1680  // The latch/exit block does not go back to the header.
1681  return false;
1682  }
1683  // FB is the header (i.e., uncond. jump to branch header)
1684  // In this case, the LoopBody -> TB should not be a back edge otherwise
1685  // it could result in an infinite loop after conversion to hw_loop.
1686  // This case can happen when the Latch has two jumps like this:
1687  // Jmp_c OuterLoopHeader <-- TB
1688  // Jmp InnerLoopHeader <-- FB
1689  if (MDT->dominates(TB, FB))
1690  return false;
1691  }
1692 
1693  // Expecting a predicate register as a condition. It won't be a hardware
1694  // predicate register at this point yet, just a vreg.
1695  // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
1696  // into Cond, followed by the predicate register. For non-negated branches
1697  // it's just the register.
1698  unsigned CSz = Cond.size();
1699  if (CSz != 1 && CSz != 2)
1700  return false;
1701 
1702  if (!Cond[CSz-1].isReg())
1703  return false;
1704 
1705  unsigned P = Cond[CSz-1].getReg();
1706  MachineInstr *PredDef = MRI->getVRegDef(P);
1707 
1708  if (!PredDef->isCompare())
1709  return false;
1710 
1711  SmallSet<unsigned,2> CmpRegs;
1712  MachineOperand *CmpImmOp = nullptr;
1713 
1714  // Go over all operands to the compare and look for immediate and register
1715  // operands. Assume that if the compare has a single register use and a
1716  // single immediate operand, then the register is being compared with the
1717  // immediate value.
1718  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1719  MachineOperand &MO = PredDef->getOperand(i);
1720  if (MO.isReg()) {
1721  // Skip all implicit references. In one case there was:
1722  // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1723  if (MO.isImplicit())
1724  continue;
1725  if (MO.isUse()) {
1726  if (!isImmediate(MO)) {
1727  CmpRegs.insert(MO.getReg());
1728  continue;
1729  }
1730  // Consider the register to be the "immediate" operand.
1731  if (CmpImmOp)
1732  return false;
1733  CmpImmOp = &MO;
1734  }
1735  } else if (MO.isImm()) {
1736  if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1737  return false;
1738  CmpImmOp = &MO;
1739  }
1740  }
1741 
1742  if (CmpRegs.empty())
1743  return false;
1744 
1745  // Check if the compared register follows the order we want. Fix if needed.
1746  for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1747  I != E; ++I) {
1748  // This is a success. If the register used in the comparison is one that
1749  // we have identified as a bumped (updated) induction register, there is
1750  // nothing to do.
1751  if (CmpRegs.count(I->first))
1752  return true;
1753 
1754  // Otherwise, if the register being compared comes out of a PHI node,
1755  // and has been recognized as following the induction pattern, and is
1756  // compared against an immediate, we can fix it.
1757  const RegisterBump &RB = I->second;
1758  if (CmpRegs.count(RB.first)) {
1759  if (!CmpImmOp) {
1760  // If both operands to the compare instruction are registers, see if
1761  // it can be changed to use induction register as one of the operands.
1762  MachineInstr *IndI = nullptr;
1763  MachineInstr *nonIndI = nullptr;
1764  MachineOperand *IndMO = nullptr;
1765  MachineOperand *nonIndMO = nullptr;
1766 
1767  for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1768  MachineOperand &MO = PredDef->getOperand(i);
1769  if (MO.isReg() && MO.getReg() == RB.first) {
1770  LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1771  << ") = " << *(MRI->getVRegDef(I->first)));
1772  if (IndI)
1773  return false;
1774 
1775  IndI = MRI->getVRegDef(I->first);
1776  IndMO = &MO;
1777  } else if (MO.isReg()) {
1778  LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1779  << ") = " << *(MRI->getVRegDef(MO.getReg())));
1780  if (nonIndI)
1781  return false;
1782 
1783  nonIndI = MRI->getVRegDef(MO.getReg());
1784  nonIndMO = &MO;
1785  }
1786  }
1787  if (IndI && nonIndI &&
1788  nonIndI->getOpcode() == Hexagon::A2_addi &&
1789  nonIndI->getOperand(2).isImm() &&
1790  nonIndI->getOperand(2).getImm() == - RB.second) {
1791  bool Order = orderBumpCompare(IndI, PredDef);
1792  if (Order) {
1793  IndMO->setReg(I->first);
1794  nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1795  return true;
1796  }
1797  }
1798  return false;
1799  }
1800 
1801  // It is not valid to do this transformation on an unsigned comparison
1802  // because it may underflow.
1803  Comparison::Kind Cmp =
1804  getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1805  if (!Cmp || Comparison::isUnsigned(Cmp))
1806  return false;
1807 
1808  // If the register is being compared against an immediate, try changing
1809  // the compare instruction to use induction register and adjust the
1810  // immediate operand.
1811  int64_t CmpImm = getImmediate(*CmpImmOp);
1812  int64_t V = RB.second;
1813  // Handle Overflow (64-bit).
1814  if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1815  ((V < 0) && (CmpImm < INT64_MIN - V)))
1816  return false;
1817  CmpImm += V;
1818  // Most comparisons of register against an immediate value allow
1819  // the immediate to be constant-extended. There are some exceptions
1820  // though. Make sure the new combination will work.
1821  if (CmpImmOp->isImm())
1822  if (!isImmValidForOpcode(PredDef->getOpcode(), CmpImm))
1823  return false;
1824 
1825  // Make sure that the compare happens after the bump. Otherwise,
1826  // after the fixup, the compare would use a yet-undefined register.
1827  MachineInstr *BumpI = MRI->getVRegDef(I->first);
1828  bool Order = orderBumpCompare(BumpI, PredDef);
1829  if (!Order)
1830  return false;
1831 
1832  // Finally, fix the compare instruction.
1833  setImmediate(*CmpImmOp, CmpImm);
1834  for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1835  MachineOperand &MO = PredDef->getOperand(i);
1836  if (MO.isReg() && MO.getReg() == RB.first) {
1837  MO.setReg(I->first);
1838  return true;
1839  }
1840  }
1841  }
1842  }
1843 
1844  return false;
1845 }
1846 
1847 /// createPreheaderForLoop - Create a preheader for a given loop.
1848 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1849  MachineLoop *L) {
1850  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1851  return TmpPH;
1852  if (!HWCreatePreheader)
1853  return nullptr;
1854 
1855  MachineBasicBlock *Header = L->getHeader();
1856  MachineBasicBlock *Latch = L->getLoopLatch();
1857  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1858  MachineFunction *MF = Header->getParent();
1859  DebugLoc DL;
1860 
1861 #ifndef NDEBUG
1862  if ((!PHFn.empty()) && (PHFn != MF->getName()))
1863  return nullptr;
1864 #endif
1865 
1866  if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1867  return nullptr;
1868 
1869  using instr_iterator = MachineBasicBlock::instr_iterator;
1870 
1871  // Verify that all existing predecessors have analyzable branches
1872  // (or no branches at all).
1873  using MBBVector = std::vector<MachineBasicBlock *>;
1874 
1875  MBBVector Preds(Header->pred_begin(), Header->pred_end());
1877  MachineBasicBlock *TB = nullptr, *FB = nullptr;
1878 
1879  if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1880  return nullptr;
1881 
1882  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1883  MachineBasicBlock *PB = *I;
1884  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1885  if (NotAnalyzed)
1886  return nullptr;
1887  }
1888 
1890  MF->insert(Header->getIterator(), NewPH);
1891 
1892  if (Header->pred_size() > 2) {
1893  // Ensure that the header has only two predecessors: the preheader and
1894  // the loop latch. Any additional predecessors of the header should
1895  // join at the newly created preheader. Inspect all PHI nodes from the
1896  // header and create appropriate corresponding PHI nodes in the preheader.
1897 
1898  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1899  I != E && I->isPHI(); ++I) {
1900  MachineInstr *PN = &*I;
1901 
1902  const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1903  MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1904  NewPH->insert(NewPH->end(), NewPN);
1905 
1906  unsigned PR = PN->getOperand(0).getReg();
1907  const TargetRegisterClass *RC = MRI->getRegClass(PR);
1908  unsigned NewPR = MRI->createVirtualRegister(RC);
1909  NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1910 
1911  // Copy all non-latch operands of a header's PHI node to the newly
1912  // created PHI node in the preheader.
1913  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1914  unsigned PredR = PN->getOperand(i).getReg();
1915  unsigned PredRSub = PN->getOperand(i).getSubReg();
1916  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1917  if (PredB == Latch)
1918  continue;
1919 
1920  MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1921  MO.setSubReg(PredRSub);
1922  NewPN->addOperand(MO);
1923  NewPN->addOperand(MachineOperand::CreateMBB(PredB));
1924  }
1925 
1926  // Remove copied operands from the old PHI node and add the value
1927  // coming from the preheader's PHI.
1928  for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1929  MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1930  if (PredB != Latch) {
1931  PN->RemoveOperand(i+1);
1932  PN->RemoveOperand(i);
1933  }
1934  }
1935  PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1937  }
1938  } else {
1939  assert(Header->pred_size() == 2);
1940 
1941  // The header has only two predecessors, but the non-latch predecessor
1942  // is not a preheader (e.g. it has other successors, etc.)
1943  // In such a case we don't need any extra PHI nodes in the new preheader,
1944  // all we need is to adjust existing PHIs in the header to now refer to
1945  // the new preheader.
1946  for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1947  I != E && I->isPHI(); ++I) {
1948  MachineInstr *PN = &*I;
1949  for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1950  MachineOperand &MO = PN->getOperand(i+1);
1951  if (MO.getMBB() != Latch)
1952  MO.setMBB(NewPH);
1953  }
1954  }
1955  }
1956 
1957  // "Reroute" the CFG edges to link in the new preheader.
1958  // If any of the predecessors falls through to the header, insert a branch
1959  // to the new preheader in that place.
1962 
1963  TB = FB = nullptr;
1964 
1965  for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
1966  MachineBasicBlock *PB = *I;
1967  if (PB != Latch) {
1968  Tmp2.clear();
1969  bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1970  (void)NotAnalyzed; // suppress compiler warning
1971  assert (!NotAnalyzed && "Should be analyzable!");
1972  if (TB != Header && (Tmp2.empty() || FB != Header))
1973  TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1974  PB->ReplaceUsesOfBlockWith(Header, NewPH);
1975  }
1976  }
1977 
1978  // It can happen that the latch block will fall through into the header.
1979  // Insert an unconditional branch to the header.
1980  TB = FB = nullptr;
1981  bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1982  (void)LatchNotAnalyzed; // suppress compiler warning
1983  assert (!LatchNotAnalyzed && "Should be analyzable!");
1984  if (!TB && !FB)
1985  TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1986 
1987  // Finally, the branch from the preheader to the header.
1988  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1989  NewPH->addSuccessor(Header);
1990 
1991  MachineLoop *ParentLoop = L->getParentLoop();
1992  if (ParentLoop)
1993  ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1994 
1995  // Update the dominator information with the new preheader.
1996  if (MDT) {
1997  if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1998  if (MachineDomTreeNode *DHN = HN->getIDom()) {
1999  MDT->addNewBlock(NewPH, DHN->getBlock());
2000  MDT->changeImmediateDominator(Header, NewPH);
2001  }
2002  }
2003  }
2004 
2005  return NewPH;
2006 }
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
instr_iterator instr_begin()
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void initializeHexagonHardwareLoopsPass(PassRegistry &)
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
void push_back(const T &Elt)
Definition: SmallVector.h:211
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm)
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
constexpr bool isInt< 8 >(int64_t x)
Definition: MathExtras.h:302
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
F(f)
FunctionPass * createHexagonHardwareLoops()
use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const
bool isPHI() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
static use_nodbg_iterator use_nodbg_end()
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to &#39;Old&#39;, change the code and CFG so that it branches to &#39;N...
Hexagon Hardware Loops
LLVM_NODISCARD bool empty() const
Definition: SmallSet.h:155
static use_iterator use_end()
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
#define INT64_MIN
Definition: DataTypes.h:80
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
#define INT64_MAX
Definition: DataTypes.h:77
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
BlockT * getHeader() const
Definition: LoopInfo.h:100
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
std::vector< MachineLoop *>::const_iterator iterator
Definition: LoopInfo.h:139
bool doesNotReturn(const MachineInstr &CallMI) const
MachineBasicBlock * findLoopControlBlock()
Find the block that contains the loop control variable and the loop test.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:250
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register...
Base class for the actual dominator tree node.
#define EQ(a, b)
Definition: regexec.c:112
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
bool getPredReg(ArrayRef< MachineOperand > Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:342
unsigned const MachineRegisterInfo * MRI
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:693
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
static BlockAddress * get(Function *F, BasicBlock *BB)
Return a BlockAddress for the specified function and basic block.
Definition: Constants.cpp:1443
MachineInstrBuilder & UseMI
DomTreeNodeBase * getIDom() const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:433
void setImm(int64_t immVal)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
self_iterator getIterator()
Definition: ilist_node.h:81
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
std::vector< MachineBasicBlock * >::iterator pred_iterator
bool isCopy() const
bool hasAddressTaken() const
Test whether this block is potentially the target of an indirect branch.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:248
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
iterator begin() const
Definition: LoopInfo.h:142
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1043
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
int64_t getImm() const
unsigned pred_size() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0)
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
void setSubReg(unsigned subReg)
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:257
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator end() const
Definition: LoopInfo.h:143
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node&#39;s...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader...
Instructions::iterator instr_iterator
static use_instr_nodbg_iterator use_instr_nodbg_end()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const
INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops", "Hexagon Hardware Loops", false, false) INITIALIZE_PASS_END(HexagonHardwareLoops
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader " "instructions"))
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
block_iterator block_begin() const
Definition: LoopInfo.h:154
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool isImplicit() const
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164