LLVM 17.0.0git
HexagonHardwareLoops.cpp
Go to the documentation of this file.
1//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass identifies loops where we can generate the Hexagon hardware
10// loop instruction. The hardware loop can perform loop branches with a
11// zero-cycle overhead.
12//
13// The pattern that defines the induction variable can changed depending on
14// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
15// normalizes induction variables, and the Loop Strength Reduction pass
16// run by 'llc' may also make changes to the induction variable.
17// The pattern detected by this phase is due to running Strength Reduction.
18//
19// Criteria for hardware loops:
20// - Countable loops (w/ ind. var for a trip count)
21// - Assumes loops are normalized by IndVarSimplify
22// - Try inner-most loops first
23// - No function calls in loops.
24//
25//===----------------------------------------------------------------------===//
26
27#include "HexagonInstrInfo.h"
28#include "HexagonSubtarget.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DebugLoc.h"
48#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <cstdlib>
57#include <iterator>
58#include <map>
59#include <set>
60#include <string>
61#include <utility>
62#include <vector>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "hwloops"
67
68#ifndef NDEBUG
69static cl::opt<int> HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1));
70
71// Option to create preheader only for a specific function.
72static cl::opt<std::string> PHFn("hexagon-hwloop-phfn", cl::Hidden,
73 cl::init(""));
74#endif
75
76// Option to create a preheader if one doesn't exist.
77static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
78 cl::Hidden, cl::init(true),
79 cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
80
81// Turn it off by default. If a preheader block is not created here, the
82// software pipeliner may be unable to find a block suitable to serve as
83// a preheader. In that case SWP will not run.
84static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::Hidden,
85 cl::desc("Allow speculation of preheader "
86 "instructions"));
87
88STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
89
90namespace llvm {
91
94
95} // end namespace llvm
96
97namespace {
98
99 class CountValue;
100
101 struct HexagonHardwareLoops : public MachineFunctionPass {
102 MachineLoopInfo *MLI;
105 const HexagonInstrInfo *TII;
107#ifndef NDEBUG
108 static int Counter;
109#endif
110
111 public:
112 static char ID;
113
114 HexagonHardwareLoops() : MachineFunctionPass(ID) {}
115
116 bool runOnMachineFunction(MachineFunction &MF) override;
117
118 StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
119
120 void getAnalysisUsage(AnalysisUsage &AU) const override {
124 }
125
126 private:
127 using LoopFeederMap = std::map<Register, MachineInstr *>;
128
129 /// Kinds of comparisons in the compare instructions.
130 struct Comparison {
131 enum Kind {
132 EQ = 0x01,
133 NE = 0x02,
134 L = 0x04,
135 G = 0x08,
136 U = 0x40,
137 LTs = L,
138 LEs = L | EQ,
139 GTs = G,
140 GEs = G | EQ,
141 LTu = L | U,
142 LEu = L | EQ | U,
143 GTu = G | U,
144 GEu = G | EQ | U
145 };
146
147 static Kind getSwappedComparison(Kind Cmp) {
148 assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
149 if ((Cmp & L) || (Cmp & G))
150 return (Kind)(Cmp ^ (L|G));
151 return Cmp;
152 }
153
154 static Kind getNegatedComparison(Kind Cmp) {
155 if ((Cmp & L) || (Cmp & G))
156 return (Kind)((Cmp ^ (L | G)) ^ EQ);
157 if ((Cmp & NE) || (Cmp & EQ))
158 return (Kind)(Cmp ^ (EQ | NE));
159 return (Kind)0;
160 }
161
162 static bool isSigned(Kind Cmp) {
163 return (Cmp & (L | G) && !(Cmp & U));
164 }
165
166 static bool isUnsigned(Kind Cmp) {
167 return (Cmp & U);
168 }
169 };
170
171 /// Find the register that contains the loop controlling
172 /// induction variable.
173 /// If successful, it will return true and set the \p Reg, \p IVBump
174 /// and \p IVOp arguments. Otherwise it will return false.
175 /// The returned induction register is the register R that follows the
176 /// following induction pattern:
177 /// loop:
178 /// R = phi ..., [ R.next, LatchBlock ]
179 /// R.next = R + #bump
180 /// if (R.next < #N) goto loop
181 /// IVBump is the immediate value added to R, and IVOp is the instruction
182 /// "R.next = R + #bump".
183 bool findInductionRegister(MachineLoop *L, Register &Reg,
184 int64_t &IVBump, MachineInstr *&IVOp) const;
185
186 /// Return the comparison kind for the specified opcode.
187 Comparison::Kind getComparisonKind(unsigned CondOpc,
188 MachineOperand *InitialValue,
189 const MachineOperand *Endvalue,
190 int64_t IVBump) const;
191
192 /// Analyze the statements in a loop to determine if the loop
193 /// has a computable trip count and, if so, return a value that represents
194 /// the trip count expression.
195 CountValue *getLoopTripCount(MachineLoop *L,
197
198 /// Return the expression that represents the number of times
199 /// a loop iterates. The function takes the operands that represent the
200 /// loop start value, loop end value, and induction value. Based upon
201 /// these operands, the function attempts to compute the trip count.
202 /// If the trip count is not directly available (as an immediate value,
203 /// or a register), the function will attempt to insert computation of it
204 /// to the loop's preheader.
205 CountValue *computeCount(MachineLoop *Loop, const MachineOperand *Start,
206 const MachineOperand *End, Register IVReg,
207 int64_t IVBump, Comparison::Kind Cmp) const;
208
209 /// Return true if the instruction is not valid within a hardware
210 /// loop.
211 bool isInvalidLoopOperation(const MachineInstr *MI,
212 bool IsInnerHWLoop) const;
213
214 /// Return true if the loop contains an instruction that inhibits
215 /// using the hardware loop.
216 bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
217
218 /// Given a loop, check if we can convert it to a hardware loop.
219 /// If so, then perform the conversion and return true.
220 bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
221
222 /// Return true if the instruction is now dead.
223 bool isDead(const MachineInstr *MI,
224 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
225
226 /// Remove the instruction if it is now dead.
227 void removeIfDead(MachineInstr *MI);
228
229 /// Make sure that the "bump" instruction executes before the
230 /// compare. We need that for the IV fixup, so that the compare
231 /// instruction would not use a bumped value that has not yet been
232 /// defined. If the instructions are out of order, try to reorder them.
233 bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
234
235 /// Return true if MO and MI pair is visited only once. If visited
236 /// more than once, this indicates there is recursion. In such a case,
237 /// return false.
238 bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
239 const MachineOperand *MO,
240 LoopFeederMap &LoopFeederPhi) const;
241
242 /// Return true if the Phi may generate a value that may underflow,
243 /// or may wrap.
244 bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
246 LoopFeederMap &LoopFeederPhi) const;
247
248 /// Return true if the induction variable may underflow an unsigned
249 /// value in the first iteration.
250 bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
251 const MachineOperand *EndVal,
253 LoopFeederMap &LoopFeederPhi) const;
254
255 /// Check if the given operand has a compile-time known constant
256 /// value. Return true if yes, and false otherwise. When returning true, set
257 /// Val to the corresponding constant value.
258 bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
259
260 /// Check if the operand has a compile-time known constant value.
261 bool isImmediate(const MachineOperand &MO) const {
262 int64_t V;
263 return checkForImmediate(MO, V);
264 }
265
266 /// Return the immediate for the specified operand.
267 int64_t getImmediate(const MachineOperand &MO) const {
268 int64_t V;
269 if (!checkForImmediate(MO, V))
270 llvm_unreachable("Invalid operand");
271 return V;
272 }
273
274 /// Reset the given machine operand to now refer to a new immediate
275 /// value. Assumes that the operand was already referencing an immediate
276 /// value, either directly, or via a register.
277 void setImmediate(MachineOperand &MO, int64_t Val);
278
279 /// Fix the data flow of the induction variable.
280 /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
281 /// |
282 /// +-> back to phi
283 /// where "bump" is the increment of the induction variable:
284 /// iv = iv + #const.
285 /// Due to some prior code transformations, the actual flow may look
286 /// like this:
287 /// phi -+-> bump ---> back to phi
288 /// |
289 /// +-> comparison-in-latch (against upper_bound-bump),
290 /// i.e. the comparison that controls the loop execution may be using
291 /// the value of the induction variable from before the increment.
292 ///
293 /// Return true if the loop's flow is the desired one (i.e. it's
294 /// either been fixed, or no fixing was necessary).
295 /// Otherwise, return false. This can happen if the induction variable
296 /// couldn't be identified, or if the value in the latch's comparison
297 /// cannot be adjusted to reflect the post-bump value.
298 bool fixupInductionVariable(MachineLoop *L);
299
300 /// Given a loop, if it does not have a preheader, create one.
301 /// Return the block that is the preheader.
302 MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
303 };
304
305 char HexagonHardwareLoops::ID = 0;
306#ifndef NDEBUG
307 int HexagonHardwareLoops::Counter = 0;
308#endif
309
310 /// Abstraction for a trip count of a loop. A smaller version
311 /// of the MachineOperand class without the concerns of changing the
312 /// operand representation.
313 class CountValue {
314 public:
315 enum CountValueType {
316 CV_Register,
317 CV_Immediate
318 };
319
320 private:
321 CountValueType Kind;
322 union Values {
323 Values() : R{Register(), 0} {}
324 Values(const Values&) = default;
325 struct {
327 unsigned Sub;
328 } R;
329 unsigned ImmVal;
330 } Contents;
331
332 public:
333 explicit CountValue(CountValueType t, Register v, unsigned u = 0) {
334 Kind = t;
335 if (Kind == CV_Register) {
336 Contents.R.Reg = v;
337 Contents.R.Sub = u;
338 } else {
339 Contents.ImmVal = v;
340 }
341 }
342
343 bool isReg() const { return Kind == CV_Register; }
344 bool isImm() const { return Kind == CV_Immediate; }
345
346 Register getReg() const {
347 assert(isReg() && "Wrong CountValue accessor");
348 return Contents.R.Reg;
349 }
350
351 unsigned getSubReg() const {
352 assert(isReg() && "Wrong CountValue accessor");
353 return Contents.R.Sub;
354 }
355
356 unsigned getImm() const {
357 assert(isImm() && "Wrong CountValue accessor");
358 return Contents.ImmVal;
359 }
360
361 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const {
362 if (isReg()) { OS << printReg(Contents.R.Reg, TRI, Contents.R.Sub); }
363 if (isImm()) { OS << Contents.ImmVal; }
364 }
365 };
366
367} // end anonymous namespace
368
369INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
370 "Hexagon Hardware Loops", false, false)
373INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
374 "Hexagon Hardware Loops", false, false)
375
377 return new HexagonHardwareLoops();
378}
379
380bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
381 LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
382 if (skipFunction(MF.getFunction()))
383 return false;
384
385 bool Changed = false;
386
387 MLI = &getAnalysis<MachineLoopInfo>();
388 MRI = &MF.getRegInfo();
389 MDT = &getAnalysis<MachineDominatorTree>();
391 TII = HST.getInstrInfo();
392 TRI = HST.getRegisterInfo();
393
394 for (auto &L : *MLI)
395 if (L->isOutermost()) {
396 bool L0Used = false;
397 bool L1Used = false;
398 Changed |= convertToHardwareLoop(L, L0Used, L1Used);
399 }
400
401 return Changed;
402}
403
404bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
405 Register &Reg,
406 int64_t &IVBump,
407 MachineInstr *&IVOp
408 ) const {
409 MachineBasicBlock *Header = L->getHeader();
410 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
411 MachineBasicBlock *Latch = L->getLoopLatch();
412 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
413 if (!Header || !Preheader || !Latch || !ExitingBlock)
414 return false;
415
416 // This pair represents an induction register together with an immediate
417 // value that will be added to it in each loop iteration.
418 using RegisterBump = std::pair<Register, int64_t>;
419
420 // Mapping: R.next -> (R, bump), where R, R.next and bump are derived
421 // from an induction operation
422 // R.next = R + bump
423 // where bump is an immediate value.
424 using InductionMap = std::map<Register, RegisterBump>;
425
426 InductionMap IndMap;
427
428 using instr_iterator = MachineBasicBlock::instr_iterator;
429
430 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
431 I != E && I->isPHI(); ++I) {
432 MachineInstr *Phi = &*I;
433
434 // Have a PHI instruction. Get the operand that corresponds to the
435 // latch block, and see if is a result of an addition of form "reg+imm",
436 // where the "reg" is defined by the PHI node we are looking at.
437 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
438 if (Phi->getOperand(i+1).getMBB() != Latch)
439 continue;
440
441 Register PhiOpReg = Phi->getOperand(i).getReg();
442 MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
443
444 if (DI->getDesc().isAdd()) {
445 // If the register operand to the add is the PHI we're looking at, this
446 // meets the induction pattern.
447 Register IndReg = DI->getOperand(1).getReg();
448 MachineOperand &Opnd2 = DI->getOperand(2);
449 int64_t V;
450 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
451 Register UpdReg = DI->getOperand(0).getReg();
452 IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
453 }
454 }
455 } // for (i)
456 } // for (instr)
457
459 MachineBasicBlock *TB = nullptr, *FB = nullptr;
460 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
461 if (NotAnalyzed)
462 return false;
463
464 Register PredR;
465 unsigned PredPos, PredRegFlags;
466 if (!TII->getPredReg(Cond, PredR, PredPos, PredRegFlags))
467 return false;
468
469 MachineInstr *PredI = MRI->getVRegDef(PredR);
470 if (!PredI->isCompare())
471 return false;
472
473 Register CmpReg1, CmpReg2;
474 int64_t CmpImm = 0, CmpMask = 0;
475 bool CmpAnalyzed =
476 TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
477 // Fail if the compare was not analyzed, or it's not comparing a register
478 // with an immediate value. Not checking the mask here, since we handle
479 // the individual compare opcodes (including A4_cmpb*) later on.
480 if (!CmpAnalyzed)
481 return false;
482
483 // Exactly one of the input registers to the comparison should be among
484 // the induction registers.
485 InductionMap::iterator IndMapEnd = IndMap.end();
486 InductionMap::iterator F = IndMapEnd;
487 if (CmpReg1 != 0) {
488 InductionMap::iterator F1 = IndMap.find(CmpReg1);
489 if (F1 != IndMapEnd)
490 F = F1;
491 }
492 if (CmpReg2 != 0) {
493 InductionMap::iterator F2 = IndMap.find(CmpReg2);
494 if (F2 != IndMapEnd) {
495 if (F != IndMapEnd)
496 return false;
497 F = F2;
498 }
499 }
500 if (F == IndMapEnd)
501 return false;
502
503 Reg = F->second.first;
504 IVBump = F->second.second;
505 IVOp = MRI->getVRegDef(F->first);
506 return true;
507}
508
509// Return the comparison kind for the specified opcode.
510HexagonHardwareLoops::Comparison::Kind
511HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
512 MachineOperand *InitialValue,
513 const MachineOperand *EndValue,
514 int64_t IVBump) const {
515 Comparison::Kind Cmp = (Comparison::Kind)0;
516 switch (CondOpc) {
517 case Hexagon::C2_cmpeq:
518 case Hexagon::C2_cmpeqi:
519 case Hexagon::C2_cmpeqp:
520 Cmp = Comparison::EQ;
521 break;
522 case Hexagon::C4_cmpneq:
523 case Hexagon::C4_cmpneqi:
524 Cmp = Comparison::NE;
525 break;
526 case Hexagon::C2_cmplt:
527 Cmp = Comparison::LTs;
528 break;
529 case Hexagon::C2_cmpltu:
530 Cmp = Comparison::LTu;
531 break;
532 case Hexagon::C4_cmplte:
533 case Hexagon::C4_cmpltei:
534 Cmp = Comparison::LEs;
535 break;
536 case Hexagon::C4_cmplteu:
537 case Hexagon::C4_cmplteui:
538 Cmp = Comparison::LEu;
539 break;
540 case Hexagon::C2_cmpgt:
541 case Hexagon::C2_cmpgti:
542 case Hexagon::C2_cmpgtp:
543 Cmp = Comparison::GTs;
544 break;
545 case Hexagon::C2_cmpgtu:
546 case Hexagon::C2_cmpgtui:
547 case Hexagon::C2_cmpgtup:
548 Cmp = Comparison::GTu;
549 break;
550 case Hexagon::C2_cmpgei:
551 Cmp = Comparison::GEs;
552 break;
553 case Hexagon::C2_cmpgeui:
554 Cmp = Comparison::GEs;
555 break;
556 default:
557 return (Comparison::Kind)0;
558 }
559 return Cmp;
560}
561
562/// Analyze the statements in a loop to determine if the loop has
563/// a computable trip count and, if so, return a value that represents
564/// the trip count expression.
565///
566/// This function iterates over the phi nodes in the loop to check for
567/// induction variable patterns that are used in the calculation for
568/// the number of time the loop is executed.
569CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
571 MachineBasicBlock *TopMBB = L->getTopBlock();
573 assert(PI != TopMBB->pred_end() &&
574 "Loop must have more than one incoming edge!");
575 MachineBasicBlock *Backedge = *PI++;
576 if (PI == TopMBB->pred_end()) // dead loop?
577 return nullptr;
578 MachineBasicBlock *Incoming = *PI++;
579 if (PI != TopMBB->pred_end()) // multiple backedges?
580 return nullptr;
581
582 // Make sure there is one incoming and one backedge and determine which
583 // is which.
584 if (L->contains(Incoming)) {
585 if (L->contains(Backedge))
586 return nullptr;
587 std::swap(Incoming, Backedge);
588 } else if (!L->contains(Backedge))
589 return nullptr;
590
591 // Look for the cmp instruction to determine if we can get a useful trip
592 // count. The trip count can be either a register or an immediate. The
593 // location of the value depends upon the type (reg or imm).
594 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
595 if (!ExitingBlock)
596 return nullptr;
597
598 Register IVReg = 0;
599 int64_t IVBump = 0;
600 MachineInstr *IVOp;
601 bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
602 if (!FoundIV)
603 return nullptr;
604
605 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
606
607 MachineOperand *InitialValue = nullptr;
608 MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
609 MachineBasicBlock *Latch = L->getLoopLatch();
610 for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
611 MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
612 if (MBB == Preheader)
613 InitialValue = &IV_Phi->getOperand(i);
614 else if (MBB == Latch)
615 IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
616 }
617 if (!InitialValue)
618 return nullptr;
619
621 MachineBasicBlock *TB = nullptr, *FB = nullptr;
622 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
623 if (NotAnalyzed)
624 return nullptr;
625
626 MachineBasicBlock *Header = L->getHeader();
627 // TB must be non-null. If FB is also non-null, one of them must be
628 // the header. Otherwise, branch to TB could be exiting the loop, and
629 // the fall through can go to the header.
630 assert (TB && "Exit block without a branch?");
631 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
632 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
634 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
635 if (NotAnalyzed)
636 return nullptr;
637 if (TB == Latch)
638 TB = (LTB == Header) ? LTB : LFB;
639 else
640 FB = (LTB == Header) ? LTB: LFB;
641 }
642 assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
643 if (!TB || (FB && TB != Header && FB != Header))
644 return nullptr;
645
646 // Branches of form "if (!P) ..." cause HexagonInstrInfo::analyzeBranch
647 // to put imm(0), followed by P in the vector Cond.
648 // If TB is not the header, it means that the "not-taken" path must lead
649 // to the header.
650 bool Negated = TII->predOpcodeHasNot(Cond) ^ (TB != Header);
651 Register PredReg;
652 unsigned PredPos, PredRegFlags;
653 if (!TII->getPredReg(Cond, PredReg, PredPos, PredRegFlags))
654 return nullptr;
655 MachineInstr *CondI = MRI->getVRegDef(PredReg);
656 unsigned CondOpc = CondI->getOpcode();
657
658 Register CmpReg1, CmpReg2;
659 int64_t Mask = 0, ImmValue = 0;
660 bool AnalyzedCmp =
661 TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
662 if (!AnalyzedCmp)
663 return nullptr;
664
665 // The comparison operator type determines how we compute the loop
666 // trip count.
667 OldInsts.push_back(CondI);
668 OldInsts.push_back(IVOp);
669
670 // Sadly, the following code gets information based on the position
671 // of the operands in the compare instruction. This has to be done
672 // this way, because the comparisons check for a specific relationship
673 // between the operands (e.g. is-less-than), rather than to find out
674 // what relationship the operands are in (as on PPC).
675 Comparison::Kind Cmp;
676 bool isSwapped = false;
677 const MachineOperand &Op1 = CondI->getOperand(1);
678 const MachineOperand &Op2 = CondI->getOperand(2);
679 const MachineOperand *EndValue = nullptr;
680
681 if (Op1.isReg()) {
682 if (Op2.isImm() || Op1.getReg() == IVReg)
683 EndValue = &Op2;
684 else {
685 EndValue = &Op1;
686 isSwapped = true;
687 }
688 }
689
690 if (!EndValue)
691 return nullptr;
692
693 Cmp = getComparisonKind(CondOpc, InitialValue, EndValue, IVBump);
694 if (!Cmp)
695 return nullptr;
696 if (Negated)
697 Cmp = Comparison::getNegatedComparison(Cmp);
698 if (isSwapped)
699 Cmp = Comparison::getSwappedComparison(Cmp);
700
701 if (InitialValue->isReg()) {
702 Register R = InitialValue->getReg();
703 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
704 if (!MDT->properlyDominates(DefBB, Header)) {
705 int64_t V;
706 if (!checkForImmediate(*InitialValue, V))
707 return nullptr;
708 }
709 OldInsts.push_back(MRI->getVRegDef(R));
710 }
711 if (EndValue->isReg()) {
712 Register R = EndValue->getReg();
713 MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
714 if (!MDT->properlyDominates(DefBB, Header)) {
715 int64_t V;
716 if (!checkForImmediate(*EndValue, V))
717 return nullptr;
718 }
719 OldInsts.push_back(MRI->getVRegDef(R));
720 }
721
722 return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
723}
724
725/// Helper function that returns the expression that represents the
726/// number of times a loop iterates. The function takes the operands that
727/// represent the loop start value, loop end value, and induction value.
728/// Based upon these operands, the function attempts to compute the trip count.
729CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
730 const MachineOperand *Start,
731 const MachineOperand *End,
732 Register IVReg,
733 int64_t IVBump,
734 Comparison::Kind Cmp) const {
735 // Cannot handle comparison EQ, i.e. while (A == B).
736 if (Cmp == Comparison::EQ)
737 return nullptr;
738
739 // Check if either the start or end values are an assignment of an immediate.
740 // If so, use the immediate value rather than the register.
741 if (Start->isReg()) {
742 const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
743 if (StartValInstr && (StartValInstr->getOpcode() == Hexagon::A2_tfrsi ||
744 StartValInstr->getOpcode() == Hexagon::A2_tfrpi))
745 Start = &StartValInstr->getOperand(1);
746 }
747 if (End->isReg()) {
748 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
749 if (EndValInstr && (EndValInstr->getOpcode() == Hexagon::A2_tfrsi ||
750 EndValInstr->getOpcode() == Hexagon::A2_tfrpi))
751 End = &EndValInstr->getOperand(1);
752 }
753
754 if (!Start->isReg() && !Start->isImm())
755 return nullptr;
756 if (!End->isReg() && !End->isImm())
757 return nullptr;
758
759 bool CmpLess = Cmp & Comparison::L;
760 bool CmpGreater = Cmp & Comparison::G;
761 bool CmpHasEqual = Cmp & Comparison::EQ;
762
763 // Avoid certain wrap-arounds. This doesn't detect all wrap-arounds.
764 if (CmpLess && IVBump < 0)
765 // Loop going while iv is "less" with the iv value going down. Must wrap.
766 return nullptr;
767
768 if (CmpGreater && IVBump > 0)
769 // Loop going while iv is "greater" with the iv value going up. Must wrap.
770 return nullptr;
771
772 // Phis that may feed into the loop.
773 LoopFeederMap LoopFeederPhi;
774
775 // Check if the initial value may be zero and can be decremented in the first
776 // iteration. If the value is zero, the endloop instruction will not decrement
777 // the loop counter, so we shouldn't generate a hardware loop in this case.
778 if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
779 LoopFeederPhi))
780 return nullptr;
781
782 if (Start->isImm() && End->isImm()) {
783 // Both, start and end are immediates.
784 int64_t StartV = Start->getImm();
785 int64_t EndV = End->getImm();
786 int64_t Dist = EndV - StartV;
787 if (Dist == 0)
788 return nullptr;
789
790 bool Exact = (Dist % IVBump) == 0;
791
792 if (Cmp == Comparison::NE) {
793 if (!Exact)
794 return nullptr;
795 if ((Dist < 0) ^ (IVBump < 0))
796 return nullptr;
797 }
798
799 // For comparisons that include the final value (i.e. include equality
800 // with the final value), we need to increase the distance by 1.
801 if (CmpHasEqual)
802 Dist = Dist > 0 ? Dist+1 : Dist-1;
803
804 // For the loop to iterate, CmpLess should imply Dist > 0. Similarly,
805 // CmpGreater should imply Dist < 0. These conditions could actually
806 // fail, for example, in unreachable code (which may still appear to be
807 // reachable in the CFG).
808 if ((CmpLess && Dist < 0) || (CmpGreater && Dist > 0))
809 return nullptr;
810
811 // "Normalized" distance, i.e. with the bump set to +-1.
812 int64_t Dist1 = (IVBump > 0) ? (Dist + (IVBump - 1)) / IVBump
813 : (-Dist + (-IVBump - 1)) / (-IVBump);
814 assert (Dist1 > 0 && "Fishy thing. Both operands have the same sign.");
815
816 uint64_t Count = Dist1;
817
818 if (Count > 0xFFFFFFFFULL)
819 return nullptr;
820
821 return new CountValue(CountValue::CV_Immediate, Count);
822 }
823
824 // A general case: Start and End are some values, but the actual
825 // iteration count may not be available. If it is not, insert
826 // a computation of it into the preheader.
827
828 // If the induction variable bump is not a power of 2, quit.
829 // Othwerise we'd need a general integer division.
830 if (!isPowerOf2_64(std::abs(IVBump)))
831 return nullptr;
832
833 MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
834 assert (PH && "Should have a preheader by now");
836 DebugLoc DL;
837 if (InsertPos != PH->end())
838 DL = InsertPos->getDebugLoc();
839
840 // If Start is an immediate and End is a register, the trip count
841 // will be "reg - imm". Hexagon's "subtract immediate" instruction
842 // is actually "reg + -imm".
843
844 // If the loop IV is going downwards, i.e. if the bump is negative,
845 // then the iteration count (computed as End-Start) will need to be
846 // negated. To avoid the negation, just swap Start and End.
847 if (IVBump < 0) {
848 std::swap(Start, End);
849 IVBump = -IVBump;
850 }
851 // Cmp may now have a wrong direction, e.g. LEs may now be GEs.
852 // Signedness, and "including equality" are preserved.
853
854 bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
855 bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
856
857 int64_t StartV = 0, EndV = 0;
858 if (Start->isImm())
859 StartV = Start->getImm();
860 if (End->isImm())
861 EndV = End->getImm();
862
863 int64_t AdjV = 0;
864 // To compute the iteration count, we would need this computation:
865 // Count = (End - Start + (IVBump-1)) / IVBump
866 // or, when CmpHasEqual:
867 // Count = (End - Start + (IVBump-1)+1) / IVBump
868 // The "IVBump-1" part is the adjustment (AdjV). We can avoid
869 // generating an instruction specifically to add it if we can adjust
870 // the immediate values for Start or End.
871
872 if (CmpHasEqual) {
873 // Need to add 1 to the total iteration count.
874 if (Start->isImm())
875 StartV--;
876 else if (End->isImm())
877 EndV++;
878 else
879 AdjV += 1;
880 }
881
882 if (Cmp != Comparison::NE) {
883 if (Start->isImm())
884 StartV -= (IVBump-1);
885 else if (End->isImm())
886 EndV += (IVBump-1);
887 else
888 AdjV += (IVBump-1);
889 }
890
891 Register R = 0;
892 unsigned SR = 0;
893 if (Start->isReg()) {
894 R = Start->getReg();
895 SR = Start->getSubReg();
896 } else {
897 R = End->getReg();
898 SR = End->getSubReg();
899 }
900 const TargetRegisterClass *RC = MRI->getRegClass(R);
901 // Hardware loops cannot handle 64-bit registers. If it's a double
902 // register, it has to have a subregister.
903 if (!SR && RC == &Hexagon::DoubleRegsRegClass)
904 return nullptr;
905 const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
906
907 // Compute DistR (register with the distance between Start and End).
908 Register DistR;
909 unsigned DistSR;
910
911 // Avoid special case, where the start value is an imm(0).
912 if (Start->isImm() && StartV == 0) {
913 DistR = End->getReg();
914 DistSR = End->getSubReg();
915 } else {
916 const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::A2_sub) :
917 (RegToImm ? TII->get(Hexagon::A2_subri) :
918 TII->get(Hexagon::A2_addi));
919 if (RegToReg || RegToImm) {
920 Register SubR = MRI->createVirtualRegister(IntRC);
921 MachineInstrBuilder SubIB =
922 BuildMI(*PH, InsertPos, DL, SubD, SubR);
923
924 if (RegToReg)
925 SubIB.addReg(End->getReg(), 0, End->getSubReg())
926 .addReg(Start->getReg(), 0, Start->getSubReg());
927 else
928 SubIB.addImm(EndV)
929 .addReg(Start->getReg(), 0, Start->getSubReg());
930 DistR = SubR;
931 } else {
932 // If the loop has been unrolled, we should use the original loop count
933 // instead of recalculating the value. This will avoid additional
934 // 'Add' instruction.
935 const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
936 if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
937 EndValInstr->getOperand(1).getSubReg() == 0 &&
938 EndValInstr->getOperand(2).getImm() == StartV) {
939 DistR = EndValInstr->getOperand(1).getReg();
940 } else {
941 Register SubR = MRI->createVirtualRegister(IntRC);
942 MachineInstrBuilder SubIB =
943 BuildMI(*PH, InsertPos, DL, SubD, SubR);
944 SubIB.addReg(End->getReg(), 0, End->getSubReg())
945 .addImm(-StartV);
946 DistR = SubR;
947 }
948 }
949 DistSR = 0;
950 }
951
952 // From DistR, compute AdjR (register with the adjusted distance).
953 Register AdjR;
954 unsigned AdjSR;
955
956 if (AdjV == 0) {
957 AdjR = DistR;
958 AdjSR = DistSR;
959 } else {
960 // Generate CountR = ADD DistR, AdjVal
961 Register AddR = MRI->createVirtualRegister(IntRC);
962 MCInstrDesc const &AddD = TII->get(Hexagon::A2_addi);
963 BuildMI(*PH, InsertPos, DL, AddD, AddR)
964 .addReg(DistR, 0, DistSR)
965 .addImm(AdjV);
966
967 AdjR = AddR;
968 AdjSR = 0;
969 }
970
971 // From AdjR, compute CountR (register with the final count).
972 Register CountR;
973 unsigned CountSR;
974
975 if (IVBump == 1) {
976 CountR = AdjR;
977 CountSR = AdjSR;
978 } else {
979 // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
980 unsigned Shift = Log2_32(IVBump);
981
982 // Generate NormR = LSR DistR, Shift.
983 Register LsrR = MRI->createVirtualRegister(IntRC);
984 const MCInstrDesc &LsrD = TII->get(Hexagon::S2_lsr_i_r);
985 BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
986 .addReg(AdjR, 0, AdjSR)
987 .addImm(Shift);
988
989 CountR = LsrR;
990 CountSR = 0;
991 }
992
993 return new CountValue(CountValue::CV_Register, CountR, CountSR);
994}
995
996/// Return true if the operation is invalid within hardware loop.
997bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
998 bool IsInnerHWLoop) const {
999 // Call is not allowed because the callee may use a hardware loop except for
1000 // the case when the call never returns.
1001 if (MI->getDesc().isCall())
1002 return !TII->doesNotReturn(*MI);
1003
1004 // Check if the instruction defines a hardware loop register.
1005 using namespace Hexagon;
1006
1007 static const Register Regs01[] = { LC0, SA0, LC1, SA1 };
1008 static const Register Regs1[] = { LC1, SA1 };
1009 auto CheckRegs = IsInnerHWLoop ? ArrayRef(Regs01, std::size(Regs01))
1010 : ArrayRef(Regs1, std::size(Regs1));
1011 for (Register R : CheckRegs)
1012 if (MI->modifiesRegister(R, TRI))
1013 return true;
1014
1015 return false;
1016}
1017
1018/// Return true if the loop contains an instruction that inhibits
1019/// the use of the hardware loop instruction.
1020bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
1021 bool IsInnerHWLoop) const {
1022 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1023 << printMBBReference(**L->block_begin()));
1024 for (MachineBasicBlock *MBB : L->getBlocks()) {
1025 for (const MachineInstr &MI : *MBB) {
1026 if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
1027 LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
1028 MI.dump(););
1029 return true;
1030 }
1031 }
1032 }
1033 return false;
1034}
1035
1036/// Returns true if the instruction is dead. This was essentially
1037/// copied from DeadMachineInstructionElim::isDead, but with special cases
1038/// for inline asm, physical registers and instructions with side effects
1039/// removed.
1040bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
1041 SmallVectorImpl<MachineInstr *> &DeadPhis) const {
1042 // Examine each operand.
1043 for (const MachineOperand &MO : MI->operands()) {
1044 if (!MO.isReg() || !MO.isDef())
1045 continue;
1046
1047 Register Reg = MO.getReg();
1048 if (MRI->use_nodbg_empty(Reg))
1049 continue;
1050
1051 using use_nodbg_iterator = MachineRegisterInfo::use_nodbg_iterator;
1052
1053 // This instruction has users, but if the only user is the phi node for the
1054 // parent block, and the only use of that phi node is this instruction, then
1055 // this instruction is dead: both it (and the phi node) can be removed.
1056 use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
1057 use_nodbg_iterator End = MRI->use_nodbg_end();
1058 if (std::next(I) != End || !I->getParent()->isPHI())
1059 return false;
1060
1061 MachineInstr *OnePhi = I->getParent();
1062 for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
1063 const MachineOperand &OPO = OnePhi->getOperand(j);
1064 if (!OPO.isReg() || !OPO.isDef())
1065 continue;
1066
1067 Register OPReg = OPO.getReg();
1068 use_nodbg_iterator nextJ;
1069 for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
1070 J != End; J = nextJ) {
1071 nextJ = std::next(J);
1072 MachineOperand &Use = *J;
1073 MachineInstr *UseMI = Use.getParent();
1074
1075 // If the phi node has a user that is not MI, bail.
1076 if (MI != UseMI)
1077 return false;
1078 }
1079 }
1080 DeadPhis.push_back(OnePhi);
1081 }
1082
1083 // If there are no defs with uses, the instruction is dead.
1084 return true;
1085}
1086
1087void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
1088 // This procedure was essentially copied from DeadMachineInstructionElim.
1089
1091 if (isDead(MI, DeadPhis)) {
1092 LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
1093
1094 // It is possible that some DBG_VALUE instructions refer to this
1095 // instruction. Examine each def operand for such references;
1096 // if found, mark the DBG_VALUE as undef (but don't delete it).
1097 for (const MachineOperand &MO : MI->operands()) {
1098 if (!MO.isReg() || !MO.isDef())
1099 continue;
1100 Register Reg = MO.getReg();
1101 // We use make_early_inc_range here because setReg below invalidates the
1102 // iterator.
1103 for (MachineOperand &MO :
1104 llvm::make_early_inc_range(MRI->use_operands(Reg))) {
1106 if (UseMI == MI)
1107 continue;
1108 if (MO.isDebug())
1109 MO.setReg(0U);
1110 }
1111 }
1112
1113 MI->eraseFromParent();
1114 for (unsigned i = 0; i < DeadPhis.size(); ++i)
1115 DeadPhis[i]->eraseFromParent();
1116 }
1117}
1118
1119/// Check if the loop is a candidate for converting to a hardware
1120/// loop. If so, then perform the transformation.
1121///
1122/// This function works on innermost loops first. A loop can be converted
1123/// if it is a counting loop; either a register value or an immediate.
1124///
1125/// The code makes several assumptions about the representation of the loop
1126/// in llvm.
1127bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
1128 bool &RecL0used,
1129 bool &RecL1used) {
1130 // This is just to confirm basic correctness.
1131 assert(L->getHeader() && "Loop without a header?");
1132
1133 bool Changed = false;
1134 bool L0Used = false;
1135 bool L1Used = false;
1136
1137 // Process nested loops first.
1138 for (MachineLoop *I : *L) {
1139 Changed |= convertToHardwareLoop(I, RecL0used, RecL1used);
1140 L0Used |= RecL0used;
1141 L1Used |= RecL1used;
1142 }
1143
1144 // If a nested loop has been converted, then we can't convert this loop.
1145 if (Changed && L0Used && L1Used)
1146 return Changed;
1147
1148 unsigned LOOP_i;
1149 unsigned LOOP_r;
1150 unsigned ENDLOOP;
1151
1152 // Flag used to track loopN instruction:
1153 // 1 - Hardware loop is being generated for the inner most loop.
1154 // 0 - Hardware loop is being generated for the outer loop.
1155 unsigned IsInnerHWLoop = 1;
1156
1157 if (L0Used) {
1158 LOOP_i = Hexagon::J2_loop1i;
1159 LOOP_r = Hexagon::J2_loop1r;
1160 ENDLOOP = Hexagon::ENDLOOP1;
1161 IsInnerHWLoop = 0;
1162 } else {
1163 LOOP_i = Hexagon::J2_loop0i;
1164 LOOP_r = Hexagon::J2_loop0r;
1165 ENDLOOP = Hexagon::ENDLOOP0;
1166 }
1167
1168#ifndef NDEBUG
1169 // Stop trying after reaching the limit (if any).
1170 int Limit = HWLoopLimit;
1171 if (Limit >= 0) {
1172 if (Counter >= HWLoopLimit)
1173 return false;
1174 Counter++;
1175 }
1176#endif
1177
1178 // Does the loop contain any invalid instructions?
1179 if (containsInvalidInstruction(L, IsInnerHWLoop))
1180 return false;
1181
1182 MachineBasicBlock *LastMBB = L->findLoopControlBlock();
1183 // Don't generate hw loop if the loop has more than one exit.
1184 if (!LastMBB)
1185 return false;
1186
1188 if (LastI == LastMBB->end())
1189 return false;
1190
1191 // Is the induction variable bump feeding the latch condition?
1192 if (!fixupInductionVariable(L))
1193 return false;
1194
1195 // Ensure the loop has a preheader: the loop instruction will be
1196 // placed there.
1197 MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
1198 if (!Preheader) {
1199 Preheader = createPreheaderForLoop(L);
1200 if (!Preheader)
1201 return false;
1202 }
1203
1204 MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
1205
1207 // Are we able to determine the trip count for the loop?
1208 CountValue *TripCount = getLoopTripCount(L, OldInsts);
1209 if (!TripCount)
1210 return false;
1211
1212 // Is the trip count available in the preheader?
1213 if (TripCount->isReg()) {
1214 // There will be a use of the register inserted into the preheader,
1215 // so make sure that the register is actually defined at that point.
1216 MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
1217 MachineBasicBlock *BBDef = TCDef->getParent();
1218 if (!MDT->dominates(BBDef, Preheader))
1219 return false;
1220 }
1221
1222 // Determine the loop start.
1223 MachineBasicBlock *TopBlock = L->getTopBlock();
1224 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1225 MachineBasicBlock *LoopStart = nullptr;
1226 if (ExitingBlock != L->getLoopLatch()) {
1227 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1229
1230 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
1231 return false;
1232
1233 if (L->contains(TB))
1234 LoopStart = TB;
1235 else if (L->contains(FB))
1236 LoopStart = FB;
1237 else
1238 return false;
1239 }
1240 else
1241 LoopStart = TopBlock;
1242
1243 // Convert the loop to a hardware loop.
1244 LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
1245 DebugLoc DL;
1246 if (InsertPos != Preheader->end())
1247 DL = InsertPos->getDebugLoc();
1248
1249 if (TripCount->isReg()) {
1250 // Create a copy of the loop count register.
1251 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1252 BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
1253 .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
1254 // Add the Loop instruction to the beginning of the loop.
1255 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r)).addMBB(LoopStart)
1256 .addReg(CountReg);
1257 } else {
1258 assert(TripCount->isImm() && "Expecting immediate value for trip count");
1259 // Add the Loop immediate instruction to the beginning of the loop,
1260 // if the immediate fits in the instructions. Otherwise, we need to
1261 // create a new virtual register.
1262 int64_t CountImm = TripCount->getImm();
1263 if (!TII->isValidOffset(LOOP_i, CountImm, TRI)) {
1264 Register CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
1265 BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::A2_tfrsi), CountReg)
1266 .addImm(CountImm);
1267 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_r))
1268 .addMBB(LoopStart).addReg(CountReg);
1269 } else
1270 BuildMI(*Preheader, InsertPos, DL, TII->get(LOOP_i))
1271 .addMBB(LoopStart).addImm(CountImm);
1272 }
1273
1274 // Make sure the loop start always has a reference in the CFG.
1275 LoopStart->setMachineBlockAddressTaken();
1276
1277 // Replace the loop branch with an endloop instruction.
1278 DebugLoc LastIDL = LastI->getDebugLoc();
1279 BuildMI(*LastMBB, LastI, LastIDL, TII->get(ENDLOOP)).addMBB(LoopStart);
1280
1281 // The loop ends with either:
1282 // - a conditional branch followed by an unconditional branch, or
1283 // - a conditional branch to the loop start.
1284 if (LastI->getOpcode() == Hexagon::J2_jumpt ||
1285 LastI->getOpcode() == Hexagon::J2_jumpf) {
1286 // Delete one and change/add an uncond. branch to out of the loop.
1287 MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
1288 LastI = LastMBB->erase(LastI);
1289 if (!L->contains(BranchTarget)) {
1290 if (LastI != LastMBB->end())
1291 LastI = LastMBB->erase(LastI);
1293 TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
1294 }
1295 } else {
1296 // Conditional branch to loop start; just delete it.
1297 LastMBB->erase(LastI);
1298 }
1299 delete TripCount;
1300
1301 // The induction operation and the comparison may now be
1302 // unneeded. If these are unneeded, then remove them.
1303 for (unsigned i = 0; i < OldInsts.size(); ++i)
1304 removeIfDead(OldInsts[i]);
1305
1306 ++NumHWLoops;
1307
1308 // Set RecL1used and RecL0used only after hardware loop has been
1309 // successfully generated. Doing it earlier can cause wrong loop instruction
1310 // to be used.
1311 if (L0Used) // Loop0 was already used. So, the correct loop must be loop1.
1312 RecL1used = true;
1313 else
1314 RecL0used = true;
1315
1316 return true;
1317}
1318
1319bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
1320 MachineInstr *CmpI) {
1321 assert (BumpI != CmpI && "Bump and compare in the same instruction?");
1322
1323 MachineBasicBlock *BB = BumpI->getParent();
1324 if (CmpI->getParent() != BB)
1325 return false;
1326
1327 using instr_iterator = MachineBasicBlock::instr_iterator;
1328
1329 // Check if things are in order to begin with.
1330 for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
1331 if (&*I == CmpI)
1332 return true;
1333
1334 // Out of order.
1335 Register PredR = CmpI->getOperand(0).getReg();
1336 bool FoundBump = false;
1337 instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
1338 for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
1339 MachineInstr *In = &*I;
1340 for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
1341 MachineOperand &MO = In->getOperand(i);
1342 if (MO.isReg() && MO.isUse()) {
1343 if (MO.getReg() == PredR) // Found an intervening use of PredR.
1344 return false;
1345 }
1346 }
1347
1348 if (In == BumpI) {
1349 BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
1350 FoundBump = true;
1351 break;
1352 }
1353 }
1354 assert (FoundBump && "Cannot determine instruction order");
1355 return FoundBump;
1356}
1357
1358/// This function is required to break recursion. Visiting phis in a loop may
1359/// result in recursion during compilation. We break the recursion by making
1360/// sure that we visit a MachineOperand and its definition in a
1361/// MachineInstruction only once. If we attempt to visit more than once, then
1362/// there is recursion, and will return false.
1363bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
1365 const MachineOperand *MO,
1366 LoopFeederMap &LoopFeederPhi) const {
1367 if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
1368 LLVM_DEBUG(dbgs() << "\nhw_loop head, "
1369 << printMBBReference(**L->block_begin()));
1370 // Ignore all BBs that form Loop.
1371 if (llvm::is_contained(L->getBlocks(), A))
1372 return false;
1373 MachineInstr *Def = MRI->getVRegDef(MO->getReg());
1374 LoopFeederPhi.insert(std::make_pair(MO->getReg(), Def));
1375 return true;
1376 } else
1377 // Already visited node.
1378 return false;
1379}
1380
1381/// Return true if a Phi may generate a value that can underflow.
1382/// This function calls loopCountMayWrapOrUnderFlow for each Phi operand.
1383bool HexagonHardwareLoops::phiMayWrapOrUnderflow(
1384 MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB,
1385 MachineLoop *L, LoopFeederMap &LoopFeederPhi) const {
1386 assert(Phi->isPHI() && "Expecting a Phi.");
1387 // Walk through each Phi, and its used operands. Make sure that
1388 // if there is recursion in Phi, we won't generate hardware loops.
1389 for (int i = 1, n = Phi->getNumOperands(); i < n; i += 2)
1390 if (isLoopFeeder(L, MBB, Phi, &(Phi->getOperand(i)), LoopFeederPhi))
1391 if (loopCountMayWrapOrUnderFlow(&(Phi->getOperand(i)), EndVal,
1392 Phi->getParent(), L, LoopFeederPhi))
1393 return true;
1394 return false;
1395}
1396
1397/// Return true if the induction variable can underflow in the first iteration.
1398/// An example, is an initial unsigned value that is 0 and is decrement in the
1399/// first itertion of a do-while loop. In this case, we cannot generate a
1400/// hardware loop because the endloop instruction does not decrement the loop
1401/// counter if it is <= 1. We only need to perform this analysis if the
1402/// initial value is a register.
1403///
1404/// This function assumes the initial value may underfow unless proven
1405/// otherwise. If the type is signed, then we don't care because signed
1406/// underflow is undefined. We attempt to prove the initial value is not
1407/// zero by perfoming a crude analysis of the loop counter. This function
1408/// checks if the initial value is used in any comparison prior to the loop
1409/// and, if so, assumes the comparison is a range check. This is inexact,
1410/// but will catch the simple cases.
1411bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
1412 const MachineOperand *InitVal, const MachineOperand *EndVal,
1414 LoopFeederMap &LoopFeederPhi) const {
1415 // Only check register values since they are unknown.
1416 if (!InitVal->isReg())
1417 return false;
1418
1419 if (!EndVal->isImm())
1420 return false;
1421
1422 // A register value that is assigned an immediate is a known value, and it
1423 // won't underflow in the first iteration.
1424 int64_t Imm;
1425 if (checkForImmediate(*InitVal, Imm))
1426 return (EndVal->getImm() == Imm);
1427
1428 Register Reg = InitVal->getReg();
1429
1430 // We don't know the value of a physical register.
1431 if (!Reg.isVirtual())
1432 return true;
1433
1434 MachineInstr *Def = MRI->getVRegDef(Reg);
1435 if (!Def)
1436 return true;
1437
1438 // If the initial value is a Phi or copy and the operands may not underflow,
1439 // then the definition cannot be underflow either.
1440 if (Def->isPHI() && !phiMayWrapOrUnderflow(Def, EndVal, Def->getParent(),
1441 L, LoopFeederPhi))
1442 return false;
1443 if (Def->isCopy() && !loopCountMayWrapOrUnderFlow(&(Def->getOperand(1)),
1444 EndVal, Def->getParent(),
1445 L, LoopFeederPhi))
1446 return false;
1447
1448 // Iterate over the uses of the initial value. If the initial value is used
1449 // in a compare, then we assume this is a range check that ensures the loop
1450 // doesn't underflow. This is not an exact test and should be improved.
1451 for (MachineRegisterInfo::use_instr_nodbg_iterator I = MRI->use_instr_nodbg_begin(Reg),
1452 E = MRI->use_instr_nodbg_end(); I != E; ++I) {
1453 MachineInstr *MI = &*I;
1454 Register CmpReg1, CmpReg2;
1455 int64_t CmpMask = 0, CmpValue = 0;
1456
1457 if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
1458 continue;
1459
1460 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1462 if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
1463 continue;
1464
1465 Comparison::Kind Cmp =
1466 getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
1467 if (Cmp == 0)
1468 continue;
1469 if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
1470 Cmp = Comparison::getNegatedComparison(Cmp);
1471 if (CmpReg2 != 0 && CmpReg2 == Reg)
1472 Cmp = Comparison::getSwappedComparison(Cmp);
1473
1474 // Signed underflow is undefined.
1475 if (Comparison::isSigned(Cmp))
1476 return false;
1477
1478 // Check if there is a comparison of the initial value. If the initial value
1479 // is greater than or not equal to another value, then assume this is a
1480 // range check.
1481 if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
1482 return false;
1483 }
1484
1485 // OK - this is a hack that needs to be improved. We really need to analyze
1486 // the instructions performed on the initial value. This works on the simplest
1487 // cases only.
1488 if (!Def->isCopy() && !Def->isPHI())
1489 return false;
1490
1491 return true;
1492}
1493
1494bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
1495 int64_t &Val) const {
1496 if (MO.isImm()) {
1497 Val = MO.getImm();
1498 return true;
1499 }
1500 if (!MO.isReg())
1501 return false;
1502
1503 // MO is a register. Check whether it is defined as an immediate value,
1504 // and if so, get the value of it in TV. That value will then need to be
1505 // processed to handle potential subregisters in MO.
1506 int64_t TV;
1507
1508 Register R = MO.getReg();
1509 if (!R.isVirtual())
1510 return false;
1511 MachineInstr *DI = MRI->getVRegDef(R);
1512 unsigned DOpc = DI->getOpcode();
1513 switch (DOpc) {
1514 case TargetOpcode::COPY:
1515 case Hexagon::A2_tfrsi:
1516 case Hexagon::A2_tfrpi:
1517 case Hexagon::CONST32:
1518 case Hexagon::CONST64:
1519 // Call recursively to avoid an extra check whether operand(1) is
1520 // indeed an immediate (it could be a global address, for example),
1521 // plus we can handle COPY at the same time.
1522 if (!checkForImmediate(DI->getOperand(1), TV))
1523 return false;
1524 break;
1525 case Hexagon::A2_combineii:
1526 case Hexagon::A4_combineir:
1527 case Hexagon::A4_combineii:
1528 case Hexagon::A4_combineri:
1529 case Hexagon::A2_combinew: {
1530 const MachineOperand &S1 = DI->getOperand(1);
1531 const MachineOperand &S2 = DI->getOperand(2);
1532 int64_t V1, V2;
1533 if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
1534 return false;
1535 TV = V2 | (static_cast<uint64_t>(V1) << 32);
1536 break;
1537 }
1538 case TargetOpcode::REG_SEQUENCE: {
1539 const MachineOperand &S1 = DI->getOperand(1);
1540 const MachineOperand &S3 = DI->getOperand(3);
1541 int64_t V1, V3;
1542 if (!checkForImmediate(S1, V1) || !checkForImmediate(S3, V3))
1543 return false;
1544 unsigned Sub2 = DI->getOperand(2).getImm();
1545 unsigned Sub4 = DI->getOperand(4).getImm();
1546 if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
1547 TV = V1 | (V3 << 32);
1548 else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
1549 TV = V3 | (V1 << 32);
1550 else
1551 llvm_unreachable("Unexpected form of REG_SEQUENCE");
1552 break;
1553 }
1554
1555 default:
1556 return false;
1557 }
1558
1559 // By now, we should have successfully obtained the immediate value defining
1560 // the register referenced in MO. Handle a potential use of a subregister.
1561 switch (MO.getSubReg()) {
1562 case Hexagon::isub_lo:
1563 Val = TV & 0xFFFFFFFFULL;
1564 break;
1565 case Hexagon::isub_hi:
1566 Val = (TV >> 32) & 0xFFFFFFFFULL;
1567 break;
1568 default:
1569 Val = TV;
1570 break;
1571 }
1572 return true;
1573}
1574
1575void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
1576 if (MO.isImm()) {
1577 MO.setImm(Val);
1578 return;
1579 }
1580
1581 assert(MO.isReg());
1582 Register R = MO.getReg();
1583 MachineInstr *DI = MRI->getVRegDef(R);
1584
1585 const TargetRegisterClass *RC = MRI->getRegClass(R);
1586 Register NewR = MRI->createVirtualRegister(RC);
1587 MachineBasicBlock &B = *DI->getParent();
1588 DebugLoc DL = DI->getDebugLoc();
1589 BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR).addImm(Val);
1590 MO.setReg(NewR);
1591}
1592
1593bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
1594 MachineBasicBlock *Header = L->getHeader();
1595 MachineBasicBlock *Latch = L->getLoopLatch();
1596 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1597
1598 if (!(Header && Latch && ExitingBlock))
1599 return false;
1600
1601 // These data structures follow the same concept as the corresponding
1602 // ones in findInductionRegister (where some comments are).
1603 using RegisterBump = std::pair<Register, int64_t>;
1604 using RegisterInduction = std::pair<Register, RegisterBump>;
1605 using RegisterInductionSet = std::set<RegisterInduction>;
1606
1607 // Register candidates for induction variables, with their associated bumps.
1608 RegisterInductionSet IndRegs;
1609
1610 // Look for induction patterns:
1611 // %1 = PHI ..., [ latch, %2 ]
1612 // %2 = ADD %1, imm
1613 using instr_iterator = MachineBasicBlock::instr_iterator;
1614
1615 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1616 I != E && I->isPHI(); ++I) {
1617 MachineInstr *Phi = &*I;
1618
1619 // Have a PHI instruction.
1620 for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
1621 if (Phi->getOperand(i+1).getMBB() != Latch)
1622 continue;
1623
1624 Register PhiReg = Phi->getOperand(i).getReg();
1625 MachineInstr *DI = MRI->getVRegDef(PhiReg);
1626
1627 if (DI->getDesc().isAdd()) {
1628 // If the register operand to the add/sub is the PHI we are looking
1629 // at, this meets the induction pattern.
1630 Register IndReg = DI->getOperand(1).getReg();
1631 MachineOperand &Opnd2 = DI->getOperand(2);
1632 int64_t V;
1633 if (MRI->getVRegDef(IndReg) == Phi && checkForImmediate(Opnd2, V)) {
1634 Register UpdReg = DI->getOperand(0).getReg();
1635 IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
1636 }
1637 }
1638 } // for (i)
1639 } // for (instr)
1640
1641 if (IndRegs.empty())
1642 return false;
1643
1644 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1646 // analyzeBranch returns true if it fails to analyze branch.
1647 bool NotAnalyzed = TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false);
1648 if (NotAnalyzed || Cond.empty())
1649 return false;
1650
1651 if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
1652 MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
1654 bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
1655 if (NotAnalyzed)
1656 return false;
1657
1658 // Since latch is not the exiting block, the latch branch should be an
1659 // unconditional branch to the loop header.
1660 if (TB == Latch)
1661 TB = (LTB == Header) ? LTB : LFB;
1662 else
1663 FB = (LTB == Header) ? LTB : LFB;
1664 }
1665 if (TB != Header) {
1666 if (FB != Header) {
1667 // The latch/exit block does not go back to the header.
1668 return false;
1669 }
1670 // FB is the header (i.e., uncond. jump to branch header)
1671 // In this case, the LoopBody -> TB should not be a back edge otherwise
1672 // it could result in an infinite loop after conversion to hw_loop.
1673 // This case can happen when the Latch has two jumps like this:
1674 // Jmp_c OuterLoopHeader <-- TB
1675 // Jmp InnerLoopHeader <-- FB
1676 if (MDT->dominates(TB, FB))
1677 return false;
1678 }
1679
1680 // Expecting a predicate register as a condition. It won't be a hardware
1681 // predicate register at this point yet, just a vreg.
1682 // HexagonInstrInfo::analyzeBranch for negated branches inserts imm(0)
1683 // into Cond, followed by the predicate register. For non-negated branches
1684 // it's just the register.
1685 unsigned CSz = Cond.size();
1686 if (CSz != 1 && CSz != 2)
1687 return false;
1688
1689 if (!Cond[CSz-1].isReg())
1690 return false;
1691
1692 Register P = Cond[CSz - 1].getReg();
1693 MachineInstr *PredDef = MRI->getVRegDef(P);
1694
1695 if (!PredDef->isCompare())
1696 return false;
1697
1698 SmallSet<Register,2> CmpRegs;
1699 MachineOperand *CmpImmOp = nullptr;
1700
1701 // Go over all operands to the compare and look for immediate and register
1702 // operands. Assume that if the compare has a single register use and a
1703 // single immediate operand, then the register is being compared with the
1704 // immediate value.
1705 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1706 MachineOperand &MO = PredDef->getOperand(i);
1707 if (MO.isReg()) {
1708 // Skip all implicit references. In one case there was:
1709 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
1710 if (MO.isImplicit())
1711 continue;
1712 if (MO.isUse()) {
1713 if (!isImmediate(MO)) {
1714 CmpRegs.insert(MO.getReg());
1715 continue;
1716 }
1717 // Consider the register to be the "immediate" operand.
1718 if (CmpImmOp)
1719 return false;
1720 CmpImmOp = &MO;
1721 }
1722 } else if (MO.isImm()) {
1723 if (CmpImmOp) // A second immediate argument? Confusing. Bail out.
1724 return false;
1725 CmpImmOp = &MO;
1726 }
1727 }
1728
1729 if (CmpRegs.empty())
1730 return false;
1731
1732 // Check if the compared register follows the order we want. Fix if needed.
1733 for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
1734 I != E; ++I) {
1735 // This is a success. If the register used in the comparison is one that
1736 // we have identified as a bumped (updated) induction register, there is
1737 // nothing to do.
1738 if (CmpRegs.count(I->first))
1739 return true;
1740
1741 // Otherwise, if the register being compared comes out of a PHI node,
1742 // and has been recognized as following the induction pattern, and is
1743 // compared against an immediate, we can fix it.
1744 const RegisterBump &RB = I->second;
1745 if (CmpRegs.count(RB.first)) {
1746 if (!CmpImmOp) {
1747 // If both operands to the compare instruction are registers, see if
1748 // it can be changed to use induction register as one of the operands.
1749 MachineInstr *IndI = nullptr;
1750 MachineInstr *nonIndI = nullptr;
1751 MachineOperand *IndMO = nullptr;
1752 MachineOperand *nonIndMO = nullptr;
1753
1754 for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
1755 MachineOperand &MO = PredDef->getOperand(i);
1756 if (MO.isReg() && MO.getReg() == RB.first) {
1757 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1758 << ") = " << *(MRI->getVRegDef(I->first)));
1759 if (IndI)
1760 return false;
1761
1762 IndI = MRI->getVRegDef(I->first);
1763 IndMO = &MO;
1764 } else if (MO.isReg()) {
1765 LLVM_DEBUG(dbgs() << "\n DefMI(" << i
1766 << ") = " << *(MRI->getVRegDef(MO.getReg())));
1767 if (nonIndI)
1768 return false;
1769
1770 nonIndI = MRI->getVRegDef(MO.getReg());
1771 nonIndMO = &MO;
1772 }
1773 }
1774 if (IndI && nonIndI &&
1775 nonIndI->getOpcode() == Hexagon::A2_addi &&
1776 nonIndI->getOperand(2).isImm() &&
1777 nonIndI->getOperand(2).getImm() == - RB.second) {
1778 bool Order = orderBumpCompare(IndI, PredDef);
1779 if (Order) {
1780 IndMO->setReg(I->first);
1781 nonIndMO->setReg(nonIndI->getOperand(1).getReg());
1782 return true;
1783 }
1784 }
1785 return false;
1786 }
1787
1788 // It is not valid to do this transformation on an unsigned comparison
1789 // because it may underflow.
1790 Comparison::Kind Cmp =
1791 getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
1792 if (!Cmp || Comparison::isUnsigned(Cmp))
1793 return false;
1794
1795 // If the register is being compared against an immediate, try changing
1796 // the compare instruction to use induction register and adjust the
1797 // immediate operand.
1798 int64_t CmpImm = getImmediate(*CmpImmOp);
1799 int64_t V = RB.second;
1800 // Handle Overflow (64-bit).
1801 if (((V > 0) && (CmpImm > INT64_MAX - V)) ||
1802 ((V < 0) && (CmpImm < INT64_MIN - V)))
1803 return false;
1804 CmpImm += V;
1805 // Most comparisons of register against an immediate value allow
1806 // the immediate to be constant-extended. There are some exceptions
1807 // though. Make sure the new combination will work.
1808 if (CmpImmOp->isImm() && !TII->isExtendable(*PredDef) &&
1809 !TII->isValidOffset(PredDef->getOpcode(), CmpImm, TRI, false))
1810 return false;
1811
1812 // Make sure that the compare happens after the bump. Otherwise,
1813 // after the fixup, the compare would use a yet-undefined register.
1814 MachineInstr *BumpI = MRI->getVRegDef(I->first);
1815 bool Order = orderBumpCompare(BumpI, PredDef);
1816 if (!Order)
1817 return false;
1818
1819 // Finally, fix the compare instruction.
1820 setImmediate(*CmpImmOp, CmpImm);
1821 for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
1822 MachineOperand &MO = PredDef->getOperand(i);
1823 if (MO.isReg() && MO.getReg() == RB.first) {
1824 MO.setReg(I->first);
1825 return true;
1826 }
1827 }
1828 }
1829 }
1830
1831 return false;
1832}
1833
1834/// createPreheaderForLoop - Create a preheader for a given loop.
1835MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
1836 MachineLoop *L) {
1837 if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
1838 return TmpPH;
1839 if (!HWCreatePreheader)
1840 return nullptr;
1841
1842 MachineBasicBlock *Header = L->getHeader();
1843 MachineBasicBlock *Latch = L->getLoopLatch();
1844 MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
1845 MachineFunction *MF = Header->getParent();
1846 DebugLoc DL;
1847
1848#ifndef NDEBUG
1849 if ((!PHFn.empty()) && (PHFn != MF->getName()))
1850 return nullptr;
1851#endif
1852
1853 if (!Latch || !ExitingBlock || Header->hasAddressTaken())
1854 return nullptr;
1855
1856 using instr_iterator = MachineBasicBlock::instr_iterator;
1857
1858 // Verify that all existing predecessors have analyzable branches
1859 // (or no branches at all).
1860 using MBBVector = std::vector<MachineBasicBlock *>;
1861
1862 MBBVector Preds(Header->pred_begin(), Header->pred_end());
1864 MachineBasicBlock *TB = nullptr, *FB = nullptr;
1865
1866 if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
1867 return nullptr;
1868
1869 for (MachineBasicBlock *PB : Preds) {
1870 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
1871 if (NotAnalyzed)
1872 return nullptr;
1873 }
1874
1876 MF->insert(Header->getIterator(), NewPH);
1877
1878 if (Header->pred_size() > 2) {
1879 // Ensure that the header has only two predecessors: the preheader and
1880 // the loop latch. Any additional predecessors of the header should
1881 // join at the newly created preheader. Inspect all PHI nodes from the
1882 // header and create appropriate corresponding PHI nodes in the preheader.
1883
1884 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1885 I != E && I->isPHI(); ++I) {
1886 MachineInstr *PN = &*I;
1887
1888 const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
1889 MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
1890 NewPH->insert(NewPH->end(), NewPN);
1891
1892 Register PR = PN->getOperand(0).getReg();
1893 const TargetRegisterClass *RC = MRI->getRegClass(PR);
1894 Register NewPR = MRI->createVirtualRegister(RC);
1895 NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
1896
1897 // Copy all non-latch operands of a header's PHI node to the newly
1898 // created PHI node in the preheader.
1899 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1900 Register PredR = PN->getOperand(i).getReg();
1901 unsigned PredRSub = PN->getOperand(i).getSubReg();
1902 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1903 if (PredB == Latch)
1904 continue;
1905
1906 MachineOperand MO = MachineOperand::CreateReg(PredR, false);
1907 MO.setSubReg(PredRSub);
1908 NewPN->addOperand(MO);
1910 }
1911
1912 // Remove copied operands from the old PHI node and add the value
1913 // coming from the preheader's PHI.
1914 for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
1915 MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
1916 if (PredB != Latch) {
1917 PN->removeOperand(i+1);
1918 PN->removeOperand(i);
1919 }
1920 }
1921 PN->addOperand(MachineOperand::CreateReg(NewPR, false));
1923 }
1924 } else {
1925 assert(Header->pred_size() == 2);
1926
1927 // The header has only two predecessors, but the non-latch predecessor
1928 // is not a preheader (e.g. it has other successors, etc.)
1929 // In such a case we don't need any extra PHI nodes in the new preheader,
1930 // all we need is to adjust existing PHIs in the header to now refer to
1931 // the new preheader.
1932 for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
1933 I != E && I->isPHI(); ++I) {
1934 MachineInstr *PN = &*I;
1935 for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
1936 MachineOperand &MO = PN->getOperand(i+1);
1937 if (MO.getMBB() != Latch)
1938 MO.setMBB(NewPH);
1939 }
1940 }
1941 }
1942
1943 // "Reroute" the CFG edges to link in the new preheader.
1944 // If any of the predecessors falls through to the header, insert a branch
1945 // to the new preheader in that place.
1948
1949 TB = FB = nullptr;
1950
1951 for (MachineBasicBlock *PB : Preds) {
1952 if (PB != Latch) {
1953 Tmp2.clear();
1954 bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
1955 (void)NotAnalyzed; // suppress compiler warning
1956 assert (!NotAnalyzed && "Should be analyzable!");
1957 if (TB != Header && (Tmp2.empty() || FB != Header))
1958 TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
1959 PB->ReplaceUsesOfBlockWith(Header, NewPH);
1960 }
1961 }
1962
1963 // It can happen that the latch block will fall through into the header.
1964 // Insert an unconditional branch to the header.
1965 TB = FB = nullptr;
1966 bool LatchNotAnalyzed = TII->analyzeBranch(*Latch, TB, FB, Tmp2, false);
1967 (void)LatchNotAnalyzed; // suppress compiler warning
1968 assert (!LatchNotAnalyzed && "Should be analyzable!");
1969 if (!TB && !FB)
1970 TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
1971
1972 // Finally, the branch from the preheader to the header.
1973 TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
1974 NewPH->addSuccessor(Header);
1975
1976 MachineLoop *ParentLoop = L->getParentLoop();
1977 if (ParentLoop)
1978 ParentLoop->addBasicBlockToLoop(NewPH, MLI->getBase());
1979
1980 // Update the dominator information with the new preheader.
1981 if (MDT) {
1982 if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
1983 if (MachineDomTreeNode *DHN = HN->getIDom()) {
1984 MDT->addNewBlock(NewPH, DHN->getBlock());
1985 MDT->changeImmediateDominator(Header, NewPH);
1986 }
1987 }
1988 }
1989
1990 return NewPH;
1991}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static bool isSigned(unsigned int Opcode)
const HexagonInstrInfo * TII
static cl::opt< bool > HWCreatePreheader("hexagon-hwloop-preheader", cl::Hidden, cl::init(true), cl::desc("Add a preheader to a hardware loop if one doesn't exist"))
static cl::opt< bool > SpecPreheader("hwloop-spec-preheader", cl::Hidden, cl::desc("Allow speculation of preheader " "instructions"))
static cl::opt< std::string > PHFn("hexagon-hwloop-phfn", cl::Hidden, cl::init(""))
Hexagon Hardware Loops
static cl::opt< int > HWLoopLimit("hexagon-max-hwloop", cl::Hidden, cl::init(-1))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
return ToRemove size() > 0
#define P(N)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
DomTreeNodeBase * getIDom() const
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
bool doesNotReturn(const MachineInstr &CallMI) const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool getPredReg(ArrayRef< MachineOperand > Cond, Register &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const
bool isValidOffset(unsigned Opcode, int Offset, const TargetRegisterInfo *TRI, bool Extend=true) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &Mask, int64_t &Value) const override
For a comparison instruction, return the source registers in SrcReg and SrcReg2 if having two registe...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool predOpcodeHasNot(ArrayRef< MachineOperand > Cond) const
bool isExtendable(const MachineInstr &MI) const
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:232
BlockT * getHeader() const
Definition: LoopInfo.h:105
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:114
block_iterator block_begin() const
Definition: LoopInfo.h:193
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool isAdd() const
Return true if the instruction is an add instruction.
Definition: MCInstrDesc.h:278
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
std::vector< MachineBasicBlock * >::iterator pred_iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB)
addNewBlock - Add a new node to the dominator tree information.
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
bool properlyDominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
void changeImmediateDominator(MachineBasicBlock *N, MachineBasicBlock *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
bool isCompare(QueryType Type=IgnoreBundle) const
Return true if this instruction is a comparison.
Definition: MachineInstr.h:941
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineBasicBlock * findLoopControlBlock()
Find the block that contains the loop control variable and the loop test.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_iterator< true, false, true, true, false, false > use_nodbg_iterator
use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the specified register,...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
bool empty() const
Definition: SmallSet.h:157
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define INT64_MIN
Definition: DataTypes.h:74
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:373
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1869
FunctionPass * createHexagonHardwareLoops()
void initializeHexagonHardwareLoopsPass(PassRegistry &)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: BitVector.h:851
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define EQ(a, b)
Definition: regexec.c:112