LLVM  13.0.0git
ARMLowOverheadLoops.cpp
Go to the documentation of this file.
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
17 ///
18 /// In addition to this, we also look for the presence of the VCTP instruction,
19 /// which determines whether we can generated the tail-predicated low-overhead
20 /// loop form.
21 ///
22 /// Assumptions and Dependencies:
23 /// Low-overhead loops are constructed and executed using a setup instruction:
24 /// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP.
25 /// WLS(TP) and LE(TP) are branching instructions with a (large) limited range
26 /// but fixed polarity: WLS can only branch forwards and LE can only branch
27 /// backwards. These restrictions mean that this pass is dependent upon block
28 /// layout and block sizes, which is why it's the last pass to run. The same is
29 /// true for ConstantIslands, but this pass does not increase the size of the
30 /// basic blocks, nor does it change the CFG. Instructions are mainly removed
31 /// during the transform and pseudo instructions are replaced by real ones. In
32 /// some cases, when we have to revert to a 'normal' loop, we have to introduce
33 /// multiple instructions for a single pseudo (see RevertWhile and
34 /// RevertLoopEnd). To handle this situation, t2WhileLoopStartLR and t2LoopEnd
35 /// are defined to be as large as this maximum sequence of replacement
36 /// instructions.
37 ///
38 /// A note on VPR.P0 (the lane mask):
39 /// VPT, VCMP, VPNOT and VCTP won't overwrite VPR.P0 when they update it in a
40 /// "VPT Active" context (which includes low-overhead loops and vpt blocks).
41 /// They will simply "and" the result of their calculation with the current
42 /// value of VPR.P0. You can think of it like this:
43 /// \verbatim
44 /// if VPT active: ; Between a DLSTP/LETP, or for predicated instrs
45 /// VPR.P0 &= Value
46 /// else
47 /// VPR.P0 = Value
48 /// \endverbatim
49 /// When we're inside the low-overhead loop (between DLSTP and LETP), we always
50 /// fall in the "VPT active" case, so we can consider that all VPR writes by
51 /// one of those instruction is actually a "and".
52 //===----------------------------------------------------------------------===//
53 
54 #include "ARM.h"
55 #include "ARMBaseInstrInfo.h"
56 #include "ARMBaseRegisterInfo.h"
57 #include "ARMBasicBlockInfo.h"
58 #include "ARMSubtarget.h"
59 #include "MVETailPredUtils.h"
60 #include "Thumb2InstrInfo.h"
61 #include "llvm/ADT/SetOperations.h"
62 #include "llvm/ADT/SmallSet.h"
68 #include "llvm/CodeGen/Passes.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "arm-low-overhead-loops"
75 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
76 
77 static cl::opt<bool>
78 DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
79  cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
80  cl::init(false));
81 
84  return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
85 }
86 
88  return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
89 }
90 
91 static bool hasVPRUse(MachineInstr &MI) {
92  return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
93 }
94 
95 static bool isDomainMVE(MachineInstr *MI) {
96  uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
97  return Domain == ARMII::DomainMVE;
98 }
99 
101  return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
102 }
103 
104 static bool isDo(MachineInstr *MI) {
105  return MI->getOpcode() != ARM::t2WhileLoopStartLR;
106 }
107 
108 namespace {
109 
110  using InstSet = SmallPtrSetImpl<MachineInstr *>;
111 
112  class PostOrderLoopTraversal {
113  MachineLoop &ML;
114  MachineLoopInfo &MLI;
117 
118  public:
119  PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)
120  : ML(ML), MLI(MLI) { }
121 
122  const SmallVectorImpl<MachineBasicBlock*> &getOrder() const {
123  return Order;
124  }
125 
126  // Visit all the blocks within the loop, as well as exit blocks and any
127  // blocks properly dominating the header.
128  void ProcessLoop() {
129  std::function<void(MachineBasicBlock*)> Search = [this, &Search]
130  (MachineBasicBlock *MBB) -> void {
131  if (Visited.count(MBB))
132  return;
133 
134  Visited.insert(MBB);
135  for (auto *Succ : MBB->successors()) {
136  if (!ML.contains(Succ))
137  continue;
138  Search(Succ);
139  }
140  Order.push_back(MBB);
141  };
142 
143  // Insert exit blocks.
145  ML.getExitBlocks(ExitBlocks);
146  append_range(Order, ExitBlocks);
147 
148  // Then add the loop body.
149  Search(ML.getHeader());
150 
151  // Then try the preheader and its predecessors.
152  std::function<void(MachineBasicBlock*)> GetPredecessor =
153  [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void {
154  Order.push_back(MBB);
155  if (MBB->pred_size() == 1)
156  GetPredecessor(*MBB->pred_begin());
157  };
158 
159  if (auto *Preheader = ML.getLoopPreheader())
160  GetPredecessor(Preheader);
161  else if (auto *Preheader = MLI.findLoopPreheader(&ML, true))
162  GetPredecessor(Preheader);
163  }
164  };
165 
166  struct PredicatedMI {
167  MachineInstr *MI = nullptr;
168  SetVector<MachineInstr*> Predicates;
169 
170  public:
171  PredicatedMI(MachineInstr *I, SetVector<MachineInstr *> &Preds) : MI(I) {
172  assert(I && "Instruction must not be null!");
173  Predicates.insert(Preds.begin(), Preds.end());
174  }
175  };
176 
177  // Represent the current state of the VPR and hold all instances which
178  // represent a VPT block, which is a list of instructions that begins with a
179  // VPT/VPST and has a maximum of four proceeding instructions. All
180  // instructions within the block are predicated upon the vpr and we allow
181  // instructions to define the vpr within in the block too.
182  class VPTState {
183  friend struct LowOverheadLoop;
184 
186 
187  static SmallVector<VPTState, 4> Blocks;
188  static SetVector<MachineInstr *> CurrentPredicates;
189  static std::map<MachineInstr *,
190  std::unique_ptr<PredicatedMI>> PredicatedInsts;
191 
192  static void CreateVPTBlock(MachineInstr *MI) {
193  assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
194  && "Can't begin VPT without predicate");
195  Blocks.emplace_back(MI);
196  // The execution of MI is predicated upon the current set of instructions
197  // that are AND'ed together to form the VPR predicate value. In the case
198  // that MI is a VPT, CurrentPredicates will also just be MI.
199  PredicatedInsts.emplace(
200  MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
201  }
202 
203  static void reset() {
204  Blocks.clear();
205  PredicatedInsts.clear();
206  CurrentPredicates.clear();
207  }
208 
209  static void addInst(MachineInstr *MI) {
210  Blocks.back().insert(MI);
211  PredicatedInsts.emplace(
212  MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
213  }
214 
215  static void addPredicate(MachineInstr *MI) {
216  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
217  CurrentPredicates.insert(MI);
218  }
219 
220  static void resetPredicate(MachineInstr *MI) {
221  LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
222  CurrentPredicates.clear();
223  CurrentPredicates.insert(MI);
224  }
225 
226  public:
227  // Have we found an instruction within the block which defines the vpr? If
228  // so, not all the instructions in the block will have the same predicate.
229  static bool hasUniformPredicate(VPTState &Block) {
230  return getDivergent(Block) == nullptr;
231  }
232 
233  // If it exists, return the first internal instruction which modifies the
234  // VPR.
235  static MachineInstr *getDivergent(VPTState &Block) {
236  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
237  for (unsigned i = 1; i < Insts.size(); ++i) {
238  MachineInstr *Next = Insts[i];
239  if (isVectorPredicate(Next))
240  return Next; // Found an instruction altering the vpr.
241  }
242  return nullptr;
243  }
244 
245  // Return whether the given instruction is predicated upon a VCTP.
246  static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
247  SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
248  if (Exclusive && Predicates.size() != 1)
249  return false;
250  for (auto *PredMI : Predicates)
251  if (isVCTP(PredMI))
252  return true;
253  return false;
254  }
255 
256  // Is the VPST, controlling the block entry, predicated upon a VCTP.
257  static bool isEntryPredicatedOnVCTP(VPTState &Block,
258  bool Exclusive = false) {
259  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
260  return isPredicatedOnVCTP(Insts.front(), Exclusive);
261  }
262 
263  // If this block begins with a VPT, we can check whether it's using
264  // at least one predicated input(s), as well as possible loop invariant
265  // which would result in it being implicitly predicated.
266  static bool hasImplicitlyValidVPT(VPTState &Block,
268  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
269  MachineInstr *VPT = Insts.front();
270  assert(isVPTOpcode(VPT->getOpcode()) &&
271  "Expected VPT block to begin with VPT/VPST");
272 
273  if (VPT->getOpcode() == ARM::MVE_VPST)
274  return false;
275 
276  auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
277  MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
278  return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
279  };
280 
281  auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
282  MachineOperand &MO = MI->getOperand(Idx);
283  if (!MO.isReg() || !MO.getReg())
284  return true;
285 
287  RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
288  if (Defs.empty())
289  return true;
290 
291  for (auto *Def : Defs)
292  if (Def->getParent() == VPT->getParent())
293  return false;
294  return true;
295  };
296 
297  // Check that at least one of the operands is directly predicated on a
298  // vctp and allow an invariant value too.
299  return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
300  (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
301  (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
302  }
303 
304  static bool isValid(ReachingDefAnalysis &RDA) {
305  // All predication within the loop should be based on vctp. If the block
306  // isn't predicated on entry, check whether the vctp is within the block
307  // and that all other instructions are then predicated on it.
308  for (auto &Block : Blocks) {
309  if (isEntryPredicatedOnVCTP(Block, false) ||
310  hasImplicitlyValidVPT(Block, RDA))
311  continue;
312 
313  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
314  // We don't know how to convert a block with just a VPT;VCTP into
315  // anything valid once we remove the VCTP. For now just bail out.
316  assert(isVPTOpcode(Insts.front()->getOpcode()) &&
317  "Expected VPT block to start with a VPST or VPT!");
318  if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
319  isVCTP(Insts.back()))
320  return false;
321 
322  for (auto *MI : Insts) {
323  // Check that any internal VCTPs are 'Then' predicated.
325  return false;
326  // Skip other instructions that build up the predicate.
327  if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
328  continue;
329  // Check that any other instructions are predicated upon a vctp.
330  // TODO: We could infer when VPTs are implicitly predicated on the
331  // vctp (when the operands are predicated).
332  if (!isPredicatedOnVCTP(MI)) {
333  LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
334  return false;
335  }
336  }
337  }
338  return true;
339  }
340 
341  VPTState(MachineInstr *MI) { Insts.push_back(MI); }
342 
343  void insert(MachineInstr *MI) {
344  Insts.push_back(MI);
345  // VPT/VPST + 4 predicated instructions.
346  assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
347  }
348 
349  bool containsVCTP() const {
350  for (auto *MI : Insts)
351  if (isVCTP(MI))
352  return true;
353  return false;
354  }
355 
356  unsigned size() const { return Insts.size(); }
357  SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
358  };
359 
360  struct LowOverheadLoop {
361 
362  MachineLoop &ML;
363  MachineBasicBlock *Preheader = nullptr;
364  MachineLoopInfo &MLI;
366  const TargetRegisterInfo &TRI;
367  const ARMBaseInstrInfo &TII;
368  MachineFunction *MF = nullptr;
369  MachineBasicBlock::iterator StartInsertPt;
370  MachineBasicBlock *StartInsertBB = nullptr;
371  MachineInstr *Start = nullptr;
372  MachineInstr *Dec = nullptr;
373  MachineInstr *End = nullptr;
374  MachineOperand TPNumElements;
377  SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
378  bool Revert = false;
379  bool CannotTailPredicate = false;
380 
381  LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
383  const ARMBaseInstrInfo &TII)
384  : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
385  TPNumElements(MachineOperand::CreateImm(0)) {
386  MF = ML.getHeader()->getParent();
387  if (auto *MBB = ML.getLoopPreheader())
388  Preheader = MBB;
389  else if (auto *MBB = MLI.findLoopPreheader(&ML, true))
390  Preheader = MBB;
391  VPTState::reset();
392  }
393 
394  // If this is an MVE instruction, check that we know how to use tail
395  // predication with it. Record VPT blocks and return whether the
396  // instruction is valid for tail predication.
397  bool ValidateMVEInst(MachineInstr *MI);
398 
399  void AnalyseMVEInst(MachineInstr *MI) {
400  CannotTailPredicate = !ValidateMVEInst(MI);
401  }
402 
403  bool IsTailPredicationLegal() const {
404  // For now, let's keep things really simple and only support a single
405  // block for tail predication.
406  return !Revert && FoundAllComponents() && !VCTPs.empty() &&
407  !CannotTailPredicate && ML.getNumBlocks() == 1;
408  }
409 
410  // Given that MI is a VCTP, check that is equivalent to any other VCTPs
411  // found.
412  bool AddVCTP(MachineInstr *MI);
413 
414  // Check that the predication in the loop will be equivalent once we
415  // perform the conversion. Also ensure that we can provide the number
416  // of elements to the loop start instruction.
417  bool ValidateTailPredicate();
418 
419  // Check that any values available outside of the loop will be the same
420  // after tail predication conversion.
421  bool ValidateLiveOuts();
422 
423  // Is it safe to define LR with DLS/WLS?
424  // LR can be defined if it is the operand to start, because it's the same
425  // value, or if it's going to be equivalent to the operand to Start.
426  MachineInstr *isSafeToDefineLR();
427 
428  // Check the branch targets are within range and we satisfy our
429  // restrictions.
430  void Validate(ARMBasicBlockUtils *BBUtils);
431 
432  bool FoundAllComponents() const {
433  return Start && Dec && End;
434  }
435 
436  SmallVectorImpl<VPTState> &getVPTBlocks() {
437  return VPTState::Blocks;
438  }
439 
440  // Return the operand for the loop start instruction. This will be the loop
441  // iteration count, or the number of elements if we're tail predicating.
442  MachineOperand &getLoopStartOperand() {
443  if (IsTailPredicationLegal())
444  return TPNumElements;
445  return Start->getOperand(1);
446  }
447 
448  unsigned getStartOpcode() const {
449  bool IsDo = isDo(Start);
450  if (!IsTailPredicationLegal())
451  return IsDo ? ARM::t2DLS : ARM::t2WLS;
452 
453  return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
454  }
455 
456  void dump() const {
457  if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
458  if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
459  if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
460  if (!VCTPs.empty()) {
461  dbgs() << "ARM Loops: Found VCTP(s):\n";
462  for (auto *MI : VCTPs)
463  dbgs() << " - " << *MI;
464  }
465  if (!FoundAllComponents())
466  dbgs() << "ARM Loops: Not a low-overhead loop.\n";
467  else if (!(Start && Dec && End))
468  dbgs() << "ARM Loops: Failed to find all loop components.\n";
469  }
470  };
471 
472  class ARMLowOverheadLoops : public MachineFunctionPass {
473  MachineFunction *MF = nullptr;
474  MachineLoopInfo *MLI = nullptr;
475  ReachingDefAnalysis *RDA = nullptr;
476  const ARMBaseInstrInfo *TII = nullptr;
477  MachineRegisterInfo *MRI = nullptr;
478  const TargetRegisterInfo *TRI = nullptr;
479  std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
480 
481  public:
482  static char ID;
483 
484  ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
485 
486  void getAnalysisUsage(AnalysisUsage &AU) const override {
487  AU.setPreservesCFG();
491  }
492 
493  bool runOnMachineFunction(MachineFunction &MF) override;
494 
495  MachineFunctionProperties getRequiredProperties() const override {
499  }
500 
501  StringRef getPassName() const override {
503  }
504 
505  private:
506  bool ProcessLoop(MachineLoop *ML);
507 
508  bool RevertNonLoops();
509 
510  void RevertWhile(MachineInstr *MI) const;
511  void RevertDo(MachineInstr *MI) const;
512 
513  bool RevertLoopDec(MachineInstr *MI) const;
514 
515  void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
516 
517  void RevertLoopEndDec(MachineInstr *MI) const;
518 
519  void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
520 
521  MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
522 
523  void Expand(LowOverheadLoop &LoLoop);
524 
525  void IterationCountDCE(LowOverheadLoop &LoLoop);
526  };
527 }
528 
529 char ARMLowOverheadLoops::ID = 0;
530 
531 SmallVector<VPTState, 4> VPTState::Blocks;
532 SetVector<MachineInstr *> VPTState::CurrentPredicates;
533 std::map<MachineInstr *,
534  std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
535 
537  false, false)
538 
539 static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
540  InstSet &ToRemove, InstSet &Ignore) {
541 
542  // Check that we can remove all of Killed without having to modify any IT
543  // blocks.
544  auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
545  // Collect the dead code and the MBBs in which they reside.
547  for (auto *Dead : Killed)
548  BasicBlocks.insert(Dead->getParent());
549 
550  // Collect IT blocks in all affected basic blocks.
551  std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
552  for (auto *MBB : BasicBlocks) {
553  for (auto &IT : *MBB) {
554  if (IT.getOpcode() != ARM::t2IT)
555  continue;
557  ITBlocks[&IT]);
558  }
559  }
560 
561  // If we're removing all of the instructions within an IT block, then
562  // also remove the IT instruction.
563  SmallPtrSet<MachineInstr *, 2> ModifiedITs;
565  for (auto *Dead : Killed) {
566  if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
568  RemoveITs.insert(IT);
569  auto &CurrentBlock = ITBlocks[IT];
570  CurrentBlock.erase(Dead);
571  if (CurrentBlock.empty())
572  ModifiedITs.erase(IT);
573  else
574  ModifiedITs.insert(IT);
575  }
576  }
577  if (!ModifiedITs.empty())
578  return false;
579  Killed.insert(RemoveITs.begin(), RemoveITs.end());
580  return true;
581  };
582 
584  if (!RDA.isSafeToRemove(MI, Uses, Ignore))
585  return false;
586 
587  if (WontCorruptITs(Uses, RDA)) {
588  ToRemove.insert(Uses.begin(), Uses.end());
589  LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
590  << " - can also remove:\n";
591  for (auto *Use : Uses)
592  dbgs() << " - " << *Use);
593 
595  RDA.collectKilledOperands(MI, Killed);
596  if (WontCorruptITs(Killed, RDA)) {
597  ToRemove.insert(Killed.begin(), Killed.end());
598  LLVM_DEBUG(for (auto *Dead : Killed)
599  dbgs() << " - " << *Dead);
600  }
601  return true;
602  }
603  return false;
604 }
605 
606 bool LowOverheadLoop::ValidateTailPredicate() {
607  if (!IsTailPredicationLegal()) {
608  LLVM_DEBUG(if (VCTPs.empty())
609  dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
610  dbgs() << "ARM Loops: Tail-predication is not valid.\n");
611  return false;
612  }
613 
614  assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
615  assert(ML.getBlocks().size() == 1 &&
616  "Shouldn't be processing a loop with more than one block");
617 
619  LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
620  return false;
621  }
622 
623  if (!VPTState::isValid(RDA)) {
624  LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
625  return false;
626  }
627 
628  if (!ValidateLiveOuts()) {
629  LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
630  return false;
631  }
632 
633  // Check that creating a [W|D]LSTP, which will define LR with an element
634  // count instead of iteration count, won't affect any other instructions
635  // than the LoopStart and LoopDec.
636  // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
637  Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
638  : Start->getOperand(0).getReg();
639  if (StartInsertPt == Start && StartReg == ARM::LR) {
640  if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
642  RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
643  for (auto *Use : Uses) {
644  if (Use != Start && Use != Dec) {
645  LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
646  return false;
647  }
648  }
649  }
650  }
651 
652  // For tail predication, we need to provide the number of elements, instead
653  // of the iteration count, to the loop start instruction. The number of
654  // elements is provided to the vctp instruction, so we need to check that
655  // we can use this register at InsertPt.
656  MachineInstr *VCTP = VCTPs.back();
657  if (Start->getOpcode() == ARM::t2DoLoopStartTP) {
658  TPNumElements = Start->getOperand(2);
659  StartInsertPt = Start;
660  StartInsertBB = Start->getParent();
661  } else {
662  TPNumElements = VCTP->getOperand(1);
663  MCRegister NumElements = TPNumElements.getReg().asMCReg();
664 
665  // If the register is defined within loop, then we can't perform TP.
666  // TODO: Check whether this is just a mov of a register that would be
667  // available.
668  if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
669  LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
670  return false;
671  }
672 
673  // The element count register maybe defined after InsertPt, in which case we
674  // need to try to move either InsertPt or the def so that the [w|d]lstp can
675  // use the value.
676 
677  if (StartInsertPt != StartInsertBB->end() &&
678  !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
679  if (auto *ElemDef =
680  RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
681  if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
682  ElemDef->removeFromParent();
683  StartInsertBB->insert(StartInsertPt, ElemDef);
684  LLVM_DEBUG(dbgs()
685  << "ARM Loops: Moved element count def: " << *ElemDef);
686  } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
687  StartInsertPt->removeFromParent();
688  StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
689  &*StartInsertPt);
690  LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
691  } else {
692  // If we fail to move an instruction and the element count is provided
693  // by a mov, use the mov operand if it will have the same value at the
694  // insertion point
695  MachineOperand Operand = ElemDef->getOperand(1);
696  if (isMovRegOpcode(ElemDef->getOpcode()) &&
697  RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
698  RDA.getUniqueReachingMIDef(&*StartInsertPt,
699  Operand.getReg().asMCReg())) {
700  TPNumElements = Operand;
701  NumElements = TPNumElements.getReg();
702  } else {
703  LLVM_DEBUG(dbgs()
704  << "ARM Loops: Unable to move element count to loop "
705  << "start instruction.\n");
706  return false;
707  }
708  }
709  }
710  }
711 
712  // Especially in the case of while loops, InsertBB may not be the
713  // preheader, so we need to check that the register isn't redefined
714  // before entering the loop.
715  auto CannotProvideElements = [this](MachineBasicBlock *MBB,
716  MCRegister NumElements) {
717  if (MBB->empty())
718  return false;
719  // NumElements is redefined in this block.
720  if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
721  return true;
722 
723  // Don't continue searching up through multiple predecessors.
724  if (MBB->pred_size() > 1)
725  return true;
726 
727  return false;
728  };
729 
730  // Search backwards for a def, until we get to InsertBB.
731  MachineBasicBlock *MBB = Preheader;
732  while (MBB && MBB != StartInsertBB) {
733  if (CannotProvideElements(MBB, NumElements)) {
734  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
735  return false;
736  }
737  MBB = *MBB->pred_begin();
738  }
739  }
740 
741  // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
742  // world the [w|d]lstp instruction would be last instruction in the preheader
743  // and so it would only affect instructions within the loop body. But due to
744  // scheduling, and/or the logic in this pass (above), the insertion point can
745  // be moved earlier. So if the Loop Start isn't the last instruction in the
746  // preheader, and if the initial element count is smaller than the vector
747  // width, the Loop Start instruction will immediately generate one or more
748  // false lane mask which can, incorrectly, affect the proceeding MVE
749  // instructions in the preheader.
750  if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
751  LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
752  return false;
753  }
754 
755  // Check that the value change of the element count is what we expect and
756  // that the predication will be equivalent. For this we need:
757  // NumElements = NumElements - VectorWidth. The sub will be a sub immediate
758  // and we can also allow register copies within the chain too.
759  auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) {
760  return -getAddSubImmediate(*MI) == ExpectedVecWidth;
761  };
762 
763  MachineBasicBlock *MBB = VCTP->getParent();
764  // Remove modifications to the element count since they have no purpose in a
765  // tail predicated loop. Explicitly refer to the vctp operand no matter which
766  // register NumElements has been assigned to, since that is what the
767  // modifications will be using
768  if (auto *Def = RDA.getUniqueReachingMIDef(
769  &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
770  SmallPtrSet<MachineInstr*, 2> ElementChain;
772  unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
773 
774  Ignore.insert(VCTPs.begin(), VCTPs.end());
775 
776  if (TryRemove(Def, RDA, ElementChain, Ignore)) {
777  bool FoundSub = false;
778 
779  for (auto *MI : ElementChain) {
780  if (isMovRegOpcode(MI->getOpcode()))
781  continue;
782 
783  if (isSubImmOpcode(MI->getOpcode())) {
784  if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
785  LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
786  " count: " << *MI);
787  return false;
788  }
789  FoundSub = true;
790  } else {
791  LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
792  " count: " << *MI);
793  return false;
794  }
795  }
796  ToRemove.insert(ElementChain.begin(), ElementChain.end());
797  }
798  }
799 
800  // If we converted the LoopStart to a t2DoLoopStartTP, we can also remove any
801  // extra instructions in the preheader, which often includes a now unused MOV.
802  if (Start->getOpcode() == ARM::t2DoLoopStartTP && Preheader &&
803  !Preheader->empty() &&
804  !RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) {
805  if (auto *Def = RDA.getUniqueReachingMIDef(
806  &Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) {
808  Ignore.insert(VCTPs.begin(), VCTPs.end());
809  TryRemove(Def, RDA, ToRemove, Ignore);
810  }
811  }
812 
813  return true;
814 }
815 
816 static bool isRegInClass(const MachineOperand &MO,
817  const TargetRegisterClass *Class) {
818  return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());
819 }
820 
821 // MVE 'narrowing' operate on half a lane, reading from half and writing
822 // to half, which are referred to has the top and bottom half. The other
823 // half retains its previous value.
825  const MCInstrDesc &MCID = MI.getDesc();
826  uint64_t Flags = MCID.TSFlags;
827  return (Flags & ARMII::RetainsPreviousHalfElement) != 0;
828 }
829 
830 // Some MVE instructions read from the top/bottom halves of their operand(s)
831 // and generate a vector result with result elements that are double the
832 // width of the input.
834  const MCInstrDesc &MCID = MI.getDesc();
835  uint64_t Flags = MCID.TSFlags;
836  return (Flags & ARMII::DoubleWidthResult) != 0;
837 }
838 
839 static bool isHorizontalReduction(const MachineInstr &MI) {
840  const MCInstrDesc &MCID = MI.getDesc();
841  uint64_t Flags = MCID.TSFlags;
842  return (Flags & ARMII::HorizontalReduction) != 0;
843 }
844 
845 // Can this instruction generate a non-zero result when given only zeroed
846 // operands? This allows us to know that, given operands with false bytes
847 // zeroed by masked loads, that the result will also contain zeros in those
848 // bytes.
849 static bool canGenerateNonZeros(const MachineInstr &MI) {
850 
851  // Check for instructions which can write into a larger element size,
852  // possibly writing into a previous zero'd lane.
854  return true;
855 
856  switch (MI.getOpcode()) {
857  default:
858  break;
859  // FIXME: VNEG FP and -0? I think we'll need to handle this once we allow
860  // fp16 -> fp32 vector conversions.
861  // Instructions that perform a NOT will generate 1s from 0s.
862  case ARM::MVE_VMVN:
863  case ARM::MVE_VORN:
864  // Count leading zeros will do just that!
865  case ARM::MVE_VCLZs8:
866  case ARM::MVE_VCLZs16:
867  case ARM::MVE_VCLZs32:
868  return true;
869  }
870  return false;
871 }
872 
873 // Look at its register uses to see if it only can only receive zeros
874 // into its false lanes which would then produce zeros. Also check that
875 // the output register is also defined by an FalseLanesZero instruction
876 // so that if tail-predication happens, the lanes that aren't updated will
877 // still be zeros.
879  const TargetRegisterClass *QPRs,
880  const ReachingDefAnalysis &RDA,
881  InstSet &FalseLanesZero) {
882  if (canGenerateNonZeros(MI))
883  return false;
884 
886  // Predicated loads will write zeros to the falsely predicated bytes of the
887  // destination register.
888  if (MI.mayLoad())
889  return isPredicated;
890 
891  auto IsZeroInit = [](MachineInstr *Def) {
892  return !isVectorPredicated(Def) &&
893  Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
894  Def->getOperand(1).getImm() == 0;
895  };
896 
897  bool AllowScalars = isHorizontalReduction(MI);
898  for (auto &MO : MI.operands()) {
899  if (!MO.isReg() || !MO.getReg())
900  continue;
901  if (!isRegInClass(MO, QPRs) && AllowScalars)
902  continue;
903 
904  // Check that this instruction will produce zeros in its false lanes:
905  // - If it only consumes false lanes zero or constant 0 (vmov #0)
906  // - If it's predicated, it only matters that it's def register already has
907  // false lane zeros, so we can ignore the uses.
909  RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
910  for (auto *Def : Defs) {
911  if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
912  continue;
913  if (MO.isUse() && isPredicated)
914  continue;
915  return false;
916  }
917  }
918  LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
919  return true;
920 }
921 
922 bool LowOverheadLoop::ValidateLiveOuts() {
923  // We want to find out if the tail-predicated version of this loop will
924  // produce the same values as the loop in its original form. For this to
925  // be true, the newly inserted implicit predication must not change the
926  // the (observable) results.
927  // We're doing this because many instructions in the loop will not be
928  // predicated and so the conversion from VPT predication to tail-predication
929  // can result in different values being produced; due to the tail-predication
930  // preventing many instructions from updating their falsely predicated
931  // lanes. This analysis assumes that all the instructions perform lane-wise
932  // operations and don't perform any exchanges.
933  // A masked load, whether through VPT or tail predication, will write zeros
934  // to any of the falsely predicated bytes. So, from the loads, we know that
935  // the false lanes are zeroed and here we're trying to track that those false
936  // lanes remain zero, or where they change, the differences are masked away
937  // by their user(s).
938  // All MVE stores have to be predicated, so we know that any predicate load
939  // operands, or stored results are equivalent already. Other explicitly
940  // predicated instructions will perform the same operation in the original
941  // loop and the tail-predicated form too. Because of this, we can insert
942  // loads, stores and other predicated instructions into our Predicated
943  // set and build from there.
944  const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
945  SetVector<MachineInstr *> FalseLanesUnknown;
948  MachineBasicBlock *Header = ML.getHeader();
949 
950  for (auto &MI : *Header) {
951  if (!shouldInspect(MI))
952  continue;
953 
954  if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
955  continue;
956 
958  bool retainsOrReduces =
960 
961  if (isPredicated)
962  Predicated.insert(&MI);
964  FalseLanesZero.insert(&MI);
965  else if (MI.getNumDefs() == 0)
966  continue;
967  else if (!isPredicated && retainsOrReduces)
968  return false;
969  else if (!isPredicated)
970  FalseLanesUnknown.insert(&MI);
971  }
972 
973  auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
974  SmallPtrSetImpl<MachineInstr *> &Predicated) {
976  RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
977  for (auto *Use : Uses) {
978  if (Use != MI && !Predicated.count(Use))
979  return false;
980  }
981  return true;
982  };
983 
984  // Visit the unknowns in reverse so that we can start at the values being
985  // stored and then we can work towards the leaves, hopefully adding more
986  // instructions to Predicated. Successfully terminating the loop means that
987  // all the unknown values have to found to be masked by predicated user(s).
988  // For any unpredicated values, we store them in NonPredicated so that we
989  // can later check whether these form a reduction.
990  SmallPtrSet<MachineInstr*, 2> NonPredicated;
991  for (auto *MI : reverse(FalseLanesUnknown)) {
992  for (auto &MO : MI->operands()) {
993  if (!isRegInClass(MO, QPRs) || !MO.isDef())
994  continue;
995  if (!HasPredicatedUsers(MI, MO, Predicated)) {
996  LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
997  << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
998  NonPredicated.insert(MI);
999  break;
1000  }
1001  }
1002  // Any unknown false lanes have been masked away by the user(s).
1003  if (!NonPredicated.contains(MI))
1004  Predicated.insert(MI);
1005  }
1006 
1007  SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
1009  ML.getExitBlocks(ExitBlocks);
1010  assert(ML.getNumBlocks() == 1 && "Expected single block loop!");
1011  assert(ExitBlocks.size() == 1 && "Expected a single exit block");
1012  MachineBasicBlock *ExitBB = ExitBlocks.front();
1013  for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
1014  // TODO: Instead of blocking predication, we could move the vctp to the exit
1015  // block and calculate it's operand there in or the preheader.
1016  if (RegMask.PhysReg == ARM::VPR)
1017  return false;
1018  // Check Q-regs that are live in the exit blocks. We don't collect scalars
1019  // because they won't be affected by lane predication.
1020  if (QPRs->contains(RegMask.PhysReg))
1021  if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
1022  LiveOutMIs.insert(MI);
1023  }
1024 
1025  // We've already validated that any VPT predication within the loop will be
1026  // equivalent when we perform the predication transformation; so we know that
1027  // any VPT predicated instruction is predicated upon VCTP. Any live-out
1028  // instruction needs to be predicated, so check this here. The instructions
1029  // in NonPredicated have been found to be a reduction that we can ensure its
1030  // legality.
1031  for (auto *MI : LiveOutMIs) {
1032  if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
1033  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
1034  return false;
1035  }
1036  }
1037 
1038  return true;
1039 }
1040 
1041 void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
1042  if (Revert)
1043  return;
1044 
1045  // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
1046  // can only jump back.
1047  auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
1048  ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
1049  MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
1050  ? End->getOperand(1).getMBB()
1051  : End->getOperand(2).getMBB();
1052  // TODO Maybe there's cases where the target doesn't have to be the header,
1053  // but for now be safe and revert.
1054  if (TgtBB != ML.getHeader()) {
1055  LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
1056  return false;
1057  }
1058 
1059  // The WLS and LE instructions have 12-bits for the label offset. WLS
1060  // requires a positive offset, while LE uses negative.
1061  if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
1062  !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
1063  LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
1064  return false;
1065  }
1066 
1067  if (Start->getOpcode() == ARM::t2WhileLoopStartLR &&
1068  (BBUtils->getOffsetOf(Start) >
1069  BBUtils->getOffsetOf(Start->getOperand(2).getMBB()) ||
1070  !BBUtils->isBBInRange(Start, Start->getOperand(2).getMBB(), 4094))) {
1071  LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
1072  return false;
1073  }
1074  return true;
1075  };
1076 
1077  StartInsertPt = MachineBasicBlock::iterator(Start);
1078  StartInsertBB = Start->getParent();
1079  LLVM_DEBUG(dbgs() << "ARM Loops: Will insert LoopStart at "
1080  << *StartInsertPt);
1081 
1082  Revert = !ValidateRanges(Start, End, BBUtils, ML);
1083  CannotTailPredicate = !ValidateTailPredicate();
1084 }
1085 
1086 bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
1087  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
1088  if (VCTPs.empty()) {
1089  VCTPs.push_back(MI);
1090  return true;
1091  }
1092 
1093  // If we find another VCTP, check whether it uses the same value as the main VCTP.
1094  // If it does, store it in the VCTPs set, else refuse it.
1095  MachineInstr *Prev = VCTPs.back();
1096  if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
1097  !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
1098  LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
1099  "definition from the main VCTP");
1100  return false;
1101  }
1102  VCTPs.push_back(MI);
1103  return true;
1104 }
1105 
1106 bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) {
1107  if (CannotTailPredicate)
1108  return false;
1109 
1110  if (!shouldInspect(*MI))
1111  return true;
1112 
1113  if (MI->getOpcode() == ARM::MVE_VPSEL ||
1114  MI->getOpcode() == ARM::MVE_VPNOT) {
1115  // TODO: Allow VPSEL and VPNOT, we currently cannot because:
1116  // 1) It will use the VPR as a predicate operand, but doesn't have to be
1117  // instead a VPT block, which means we can assert while building up
1118  // the VPT block because we don't find another VPT or VPST to being a new
1119  // one.
1120  // 2) VPSEL still requires a VPR operand even after tail predicating,
1121  // which means we can't remove it unless there is another
1122  // instruction, such as vcmp, that can provide the VPR def.
1123  return false;
1124  }
1125 
1126  // Record all VCTPs and check that they're equivalent to one another.
1127  if (isVCTP(MI) && !AddVCTP(MI))
1128  return false;
1129 
1130  // Inspect uses first so that any instructions that alter the VPR don't
1131  // alter the predicate upon themselves.
1132  const MCInstrDesc &MCID = MI->getDesc();
1133  bool IsUse = false;
1134  unsigned LastOpIdx = MI->getNumOperands() - 1;
1135  for (auto &Op : enumerate(reverse(MCID.operands()))) {
1136  const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
1137  if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
1138  continue;
1139 
1140  if (ARM::isVpred(Op.value().OperandType)) {
1141  VPTState::addInst(MI);
1142  IsUse = true;
1143  } else if (MI->getOpcode() != ARM::MVE_VPST) {
1144  LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
1145  return false;
1146  }
1147  }
1148 
1149  // If we find an instruction that has been marked as not valid for tail
1150  // predication, only allow the instruction if it's contained within a valid
1151  // VPT block.
1152  bool RequiresExplicitPredication =
1154  if (isDomainMVE(MI) && RequiresExplicitPredication) {
1155  LLVM_DEBUG(if (!IsUse)
1156  dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
1157  return IsUse;
1158  }
1159 
1160  // If the instruction is already explicitly predicated, then the conversion
1161  // will be fine, but ensure that all store operations are predicated.
1162  if (MI->mayStore())
1163  return IsUse;
1164 
1165  // If this instruction defines the VPR, update the predicate for the
1166  // proceeding instructions.
1167  if (isVectorPredicate(MI)) {
1168  // Clear the existing predicate when we're not in VPT Active state,
1169  // otherwise we add to it.
1170  if (!isVectorPredicated(MI))
1171  VPTState::resetPredicate(MI);
1172  else
1173  VPTState::addPredicate(MI);
1174  }
1175 
1176  // Finally once the predicate has been modified, we can start a new VPT
1177  // block if necessary.
1178  if (isVPTOpcode(MI->getOpcode()))
1180 
1181  return true;
1182 }
1183 
1184 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
1185  const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
1186  if (!ST.hasLOB())
1187  return false;
1188 
1189  MF = &mf;
1190  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
1191 
1192  MLI = &getAnalysis<MachineLoopInfo>();
1193  RDA = &getAnalysis<ReachingDefAnalysis>();
1195  MRI = &MF->getRegInfo();
1196  TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
1197  TRI = ST.getRegisterInfo();
1198  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
1199  BBUtils->computeAllBlockSizes();
1200  BBUtils->adjustBBOffsetsAfter(&MF->front());
1201 
1202  bool Changed = false;
1203  for (auto ML : *MLI) {
1204  if (ML->isOutermost())
1205  Changed |= ProcessLoop(ML);
1206  }
1207  Changed |= RevertNonLoops();
1208  return Changed;
1209 }
1210 
1211 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
1212 
1213  bool Changed = false;
1214 
1215  // Process inner loops first.
1216  for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
1217  Changed |= ProcessLoop(*I);
1218 
1219  LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n";
1220  if (auto *Preheader = ML->getLoopPreheader())
1221  dbgs() << " - " << Preheader->getName() << "\n";
1222  else if (auto *Preheader = MLI->findLoopPreheader(ML))
1223  dbgs() << " - " << Preheader->getName() << "\n";
1224  else if (auto *Preheader = MLI->findLoopPreheader(ML, true))
1225  dbgs() << " - " << Preheader->getName() << "\n";
1226  for (auto *MBB : ML->getBlocks())
1227  dbgs() << " - " << MBB->getName() << "\n";
1228  );
1229 
1230  // Search the given block for a loop start instruction. If one isn't found,
1231  // and there's only one predecessor block, search that one too.
1232  std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
1233  [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
1234  for (auto &MI : *MBB) {
1235  if (isLoopStart(MI))
1236  return &MI;
1237  }
1238  if (MBB->pred_size() == 1)
1239  return SearchForStart(*MBB->pred_begin());
1240  return nullptr;
1241  };
1242 
1243  LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI, *TII);
1244  // Search the preheader for the start intrinsic.
1245  // FIXME: I don't see why we shouldn't be supporting multiple predecessors
1246  // with potentially multiple set.loop.iterations, so we need to enable this.
1247  if (LoLoop.Preheader)
1248  LoLoop.Start = SearchForStart(LoLoop.Preheader);
1249  else
1250  return false;
1251 
1252  // Find the low-overhead loop components and decide whether or not to fall
1253  // back to a normal loop. Also look for a vctp instructions and decide
1254  // whether we can convert that predicate using tail predication.
1255  for (auto *MBB : reverse(ML->getBlocks())) {
1256  for (auto &MI : *MBB) {
1257  if (MI.isDebugValue())
1258  continue;
1259  else if (MI.getOpcode() == ARM::t2LoopDec)
1260  LoLoop.Dec = &MI;
1261  else if (MI.getOpcode() == ARM::t2LoopEnd)
1262  LoLoop.End = &MI;
1263  else if (MI.getOpcode() == ARM::t2LoopEndDec)
1264  LoLoop.End = LoLoop.Dec = &MI;
1265  else if (isLoopStart(MI))
1266  LoLoop.Start = &MI;
1267  else if (MI.getDesc().isCall()) {
1268  // TODO: Though the call will require LE to execute again, does this
1269  // mean we should revert? Always executing LE hopefully should be
1270  // faster than performing a sub,cmp,br or even subs,br.
1271  LoLoop.Revert = true;
1272  LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
1273  } else {
1274  // Record VPR defs and build up their corresponding vpt blocks.
1275  // Check we know how to tail predicate any mve instructions.
1276  LoLoop.AnalyseMVEInst(&MI);
1277  }
1278  }
1279  }
1280 
1281  LLVM_DEBUG(LoLoop.dump());
1282  if (!LoLoop.FoundAllComponents()) {
1283  LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n");
1284  return false;
1285  }
1286 
1287  assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1288  "Expected t2WhileLoopStart to be removed before regalloc!");
1289 
1290  // Check that the only instruction using LoopDec is LoopEnd. This can only
1291  // happen when the Dec and End are separate, not a single t2LoopEndDec.
1292  // TODO: Check for copy chains that really have no effect.
1293  if (LoLoop.Dec != LoLoop.End) {
1295  RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
1296  if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
1297  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
1298  LoLoop.Revert = true;
1299  }
1300  }
1301  LoLoop.Validate(BBUtils.get());
1302  Expand(LoLoop);
1303  return true;
1304 }
1305 
1306 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
1307 // beq that branches to the exit branch.
1308 // TODO: We could also try to generate a cbz if the value in LR is also in
1309 // another low register.
1310 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
1311  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
1312  MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
1313  unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
1314  ARM::tBcc : ARM::t2Bcc;
1315 
1316  RevertWhileLoopStartLR(MI, TII, BrOpc);
1317 }
1318 
1319 void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
1320  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
1322 }
1323 
1325  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
1326  MachineBasicBlock *MBB = MI->getParent();
1328  for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) {
1329  if (I->getOpcode() == ARM::t2LoopEnd) {
1330  Ignore.insert(&*I);
1331  break;
1332  }
1333  }
1334 
1335  // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
1336  bool SetFlags =
1338 
1339  llvm::RevertLoopDec(MI, TII, SetFlags);
1340  return SetFlags;
1341 }
1342 
1343 // Generate a subs, or sub and cmp, and a branch instead of an LE.
1344 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
1345  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
1346 
1347  MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
1348  unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
1349  ARM::tBcc : ARM::t2Bcc;
1350 
1351  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
1352 }
1353 
1354 // Generate a subs, or sub and cmp, and a branch instead of an LE.
1355 void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
1356  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
1357  assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
1358  MachineBasicBlock *MBB = MI->getParent();
1359 
1360  MachineInstrBuilder MIB =
1361  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
1362  MIB.addDef(ARM::LR);
1363  MIB.add(MI->getOperand(1));
1364  MIB.addImm(1);
1365  MIB.addImm(ARMCC::AL);
1366  MIB.addReg(ARM::NoRegister);
1367  MIB.addReg(ARM::CPSR);
1368  MIB->getOperand(5).setIsDef(true);
1369 
1370  MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
1371  unsigned BrOpc =
1372  BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1373 
1374  // Create bne
1375  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
1376  MIB.add(MI->getOperand(2)); // branch target
1377  MIB.addImm(ARMCC::NE); // condition code
1378  MIB.addReg(ARM::CPSR);
1379 
1380  MI->eraseFromParent();
1381 }
1382 
1383 // Perform dead code elimation on the loop iteration count setup expression.
1384 // If we are tail-predicating, the number of elements to be processed is the
1385 // operand of the VCTP instruction in the vector body, see getCount(), which is
1386 // register $r3 in this example:
1387 //
1388 // $lr = big-itercount-expression
1389 // ..
1390 // $lr = t2DoLoopStart renamable $lr
1391 // vector.body:
1392 // ..
1393 // $vpr = MVE_VCTP32 renamable $r3
1394 // renamable $lr = t2LoopDec killed renamable $lr, 1
1395 // t2LoopEnd renamable $lr, %vector.body
1396 // tB %end
1397 //
1398 // What we would like achieve here is to replace the do-loop start pseudo
1399 // instruction t2DoLoopStart with:
1400 //
1401 // $lr = MVE_DLSTP_32 killed renamable $r3
1402 //
1403 // Thus, $r3 which defines the number of elements, is written to $lr,
1404 // and then we want to delete the whole chain that used to define $lr,
1405 // see the comment below how this chain could look like.
1406 //
1407 void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1408  if (!LoLoop.IsTailPredicationLegal())
1409  return;
1410 
1411  LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
1412 
1413  MachineInstr *Def =
1414  RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
1415  if (!Def) {
1416  LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
1417  return;
1418  }
1419 
1420  // Collect and remove the users of iteration count.
1421  SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
1422  LoLoop.End };
1423  if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
1424  LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
1425 }
1426 
1427 MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1428  LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");
1429  // When using tail-predication, try to delete the dead code that was used to
1430  // calculate the number of loop iterations.
1431  IterationCountDCE(LoLoop);
1432 
1433  MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
1434  MachineInstr *Start = LoLoop.Start;
1435  MachineBasicBlock *MBB = LoLoop.StartInsertBB;
1436  unsigned Opc = LoLoop.getStartOpcode();
1437  MachineOperand &Count = LoLoop.getLoopStartOperand();
1438 
1439  // A DLS lr, lr we needn't emit
1440  MachineInstr* NewStart;
1441  if (Opc == ARM::t2DLS && Count.isReg() && Count.getReg() == ARM::LR) {
1442  LLVM_DEBUG(dbgs() << "ARM Loops: Didn't insert start: DLS lr, lr");
1443  NewStart = nullptr;
1444  } else {
1445  MachineInstrBuilder MIB =
1446  BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
1447 
1448  MIB.addDef(ARM::LR);
1449  MIB.add(Count);
1450  if (!isDo(Start))
1451  MIB.add(Start->getOperand(2));
1452 
1453  LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
1454  NewStart = &*MIB;
1455  }
1456 
1457  LoLoop.ToRemove.insert(Start);
1458  return NewStart;
1459 }
1460 
1461 void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1462  auto RemovePredicate = [](MachineInstr *MI) {
1463  if (MI->isDebugInstr())
1464  return;
1465  LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
1466  int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
1467  assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
1468  assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
1469  "Expected Then predicate!");
1470  MI->getOperand(PIdx).setImm(ARMVCC::None);
1471  MI->getOperand(PIdx + 1).setReg(0);
1472  };
1473 
1474  for (auto &Block : LoLoop.getVPTBlocks()) {
1475  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
1476 
1477  auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
1478  assert(TheVCMP && "Replacing a removed or non-existent VCMP");
1479  // Replace the VCMP with a VPT
1480  MachineInstrBuilder MIB =
1481  BuildMI(*At->getParent(), At, At->getDebugLoc(),
1482  TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
1483  MIB.addImm(ARMVCC::Then);
1484  // Register one
1485  MIB.add(TheVCMP->getOperand(1));
1486  // Register two
1487  MIB.add(TheVCMP->getOperand(2));
1488  // The comparison code, e.g. ge, eq, lt
1489  MIB.add(TheVCMP->getOperand(3));
1490  LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
1491  LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
1492  LoLoop.ToRemove.insert(TheVCMP);
1493  TheVCMP = nullptr;
1494  };
1495 
1496  if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
1497  MachineInstr *VPST = Insts.front();
1498  if (VPTState::hasUniformPredicate(Block)) {
1499  // A vpt block starting with VPST, is only predicated upon vctp and has no
1500  // internal vpr defs:
1501  // - Remove vpst.
1502  // - Unpredicate the remaining instructions.
1503  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1504  for (unsigned i = 1; i < Insts.size(); ++i)
1505  RemovePredicate(Insts[i]);
1506  } else {
1507  // The VPT block has a non-uniform predicate but it uses a vpst and its
1508  // entry is guarded only by a vctp, which means we:
1509  // - Need to remove the original vpst.
1510  // - Then need to unpredicate any following instructions, until
1511  // we come across the divergent vpr def.
1512  // - Insert a new vpst to predicate the instruction(s) that following
1513  // the divergent vpr def.
1514  MachineInstr *Divergent = VPTState::getDivergent(Block);
1515  MachineBasicBlock *MBB = Divergent->getParent();
1516  auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
1517  while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
1518  ++DivergentNext;
1519 
1520  bool DivergentNextIsPredicated =
1521  DivergentNext != MBB->end() &&
1522  getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
1523 
1524  for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
1525  I != E; ++I)
1526  RemovePredicate(&*I);
1527 
1528  // Check if the instruction defining vpr is a vcmp so it can be combined
1529  // with the VPST This should be the divergent instruction
1530  MachineInstr *VCMP =
1531  VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
1532 
1533  if (DivergentNextIsPredicated) {
1534  // Insert a VPST at the divergent only if the next instruction
1535  // would actually use it. A VCMP following a VPST can be
1536  // merged into a VPT so do that instead if the VCMP exists.
1537  if (!VCMP) {
1538  // Create a VPST (with a null mask for now, we'll recompute it
1539  // later)
1540  MachineInstrBuilder MIB =
1541  BuildMI(*Divergent->getParent(), Divergent,
1542  Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
1543  MIB.addImm(0);
1544  LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
1545  LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
1546  } else {
1547  // No RDA checks are necessary here since the VPST would have been
1548  // directly after the VCMP
1549  ReplaceVCMPWithVPT(VCMP, VCMP);
1550  }
1551  }
1552  }
1553  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1554  LoLoop.ToRemove.insert(VPST);
1555  } else if (Block.containsVCTP()) {
1556  // The vctp will be removed, so either the entire block will be dead or
1557  // the block mask of the vp(s)t will need to be recomputed.
1558  MachineInstr *VPST = Insts.front();
1559  if (Block.size() == 2) {
1560  assert(VPST->getOpcode() == ARM::MVE_VPST &&
1561  "Found a VPST in an otherwise empty vpt block");
1562  LoLoop.ToRemove.insert(VPST);
1563  } else
1564  LoLoop.BlockMasksToRecompute.insert(VPST);
1565  } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
1566  // If this block starts with a VPST then attempt to merge it with the
1567  // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
1568  // block that no longer exists
1569  MachineInstr *VPST = Insts.front();
1570  auto Next = ++MachineBasicBlock::iterator(VPST);
1572  "The instruction after a VPST must be predicated");
1573  (void)Next;
1574  MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
1575  if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
1576  !LoLoop.ToRemove.contains(VprDef)) {
1577  MachineInstr *VCMP = VprDef;
1578  // The VCMP and VPST can only be merged if the VCMP's operands will have
1579  // the same values at the VPST.
1580  // If any of the instructions between the VCMP and VPST are predicated
1581  // then a different code path is expected to have merged the VCMP and
1582  // VPST already.
1585  RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
1586  RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
1587  ReplaceVCMPWithVPT(VCMP, VPST);
1588  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1589  LoLoop.ToRemove.insert(VPST);
1590  }
1591  }
1592  }
1593  }
1594 
1595  LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1596 }
1597 
1598 void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1599 
1600  // Combine the LoopDec and LoopEnd instructions into LE(TP).
1601  auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {
1602  MachineInstr *End = LoLoop.End;
1603  MachineBasicBlock *MBB = End->getParent();
1604  unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1605  ARM::MVE_LETP : ARM::t2LEUpdate;
1606  MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
1607  TII->get(Opc));
1608  MIB.addDef(ARM::LR);
1609  unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1610  MIB.add(End->getOperand(Off + 0));
1611  MIB.add(End->getOperand(Off + 1));
1612  LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
1613  LoLoop.ToRemove.insert(LoLoop.Dec);
1614  LoLoop.ToRemove.insert(End);
1615  return &*MIB;
1616  };
1617 
1618  // TODO: We should be able to automatically remove these branches before we
1619  // get here - probably by teaching analyzeBranch about the pseudo
1620  // instructions.
1621  // If there is an unconditional branch, after I, that just branches to the
1622  // next block, remove it.
1623  auto RemoveDeadBranch = [](MachineInstr *I) {
1624  MachineBasicBlock *BB = I->getParent();
1625  MachineInstr *Terminator = &BB->instr_back();
1626  if (Terminator->isUnconditionalBranch() && I != Terminator) {
1627  MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
1628  if (BB->isLayoutSuccessor(Succ)) {
1629  LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
1630  Terminator->eraseFromParent();
1631  }
1632  }
1633  };
1634 
1635  if (LoLoop.Revert) {
1636  if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStartLR)
1637  RevertWhile(LoLoop.Start);
1638  else
1639  RevertDo(LoLoop.Start);
1640  if (LoLoop.Dec == LoLoop.End)
1641  RevertLoopEndDec(LoLoop.End);
1642  else
1643  RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
1644  } else {
1645  LoLoop.Start = ExpandLoopStart(LoLoop);
1646  if (LoLoop.Start)
1647  RemoveDeadBranch(LoLoop.Start);
1648  LoLoop.End = ExpandLoopEnd(LoLoop);
1649  RemoveDeadBranch(LoLoop.End);
1650  if (LoLoop.IsTailPredicationLegal())
1651  ConvertVPTBlocks(LoLoop);
1652  for (auto *I : LoLoop.ToRemove) {
1653  LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
1654  I->eraseFromParent();
1655  }
1656  for (auto *I : LoLoop.BlockMasksToRecompute) {
1657  LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I);
1659  LLVM_DEBUG(dbgs() << " ... done: " << *I);
1660  }
1661  }
1662 
1663  PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1664  DFS.ProcessLoop();
1665  const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
1666  for (auto *MBB : PostOrder) {
1668  // FIXME: For some reason, the live-in print order is non-deterministic for
1669  // our tests and I can't out why... So just sort them.
1671  }
1672 
1673  for (auto *MBB : reverse(PostOrder))
1675 
1676  // We've moved, removed and inserted new instructions, so update RDA.
1677  RDA->reset();
1678 }
1679 
1680 bool ARMLowOverheadLoops::RevertNonLoops() {
1681  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
1682  bool Changed = false;
1683 
1684  for (auto &MBB : *MF) {
1689 
1690  for (auto &I : MBB) {
1691  if (isLoopStart(I))
1692  Starts.push_back(&I);
1693  else if (I.getOpcode() == ARM::t2LoopDec)
1694  Decs.push_back(&I);
1695  else if (I.getOpcode() == ARM::t2LoopEnd)
1696  Ends.push_back(&I);
1697  else if (I.getOpcode() == ARM::t2LoopEndDec)
1698  EndDecs.push_back(&I);
1699  }
1700 
1701  if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
1702  continue;
1703 
1704  Changed = true;
1705 
1706  for (auto *Start : Starts) {
1707  if (Start->getOpcode() == ARM::t2WhileLoopStartLR)
1708  RevertWhile(Start);
1709  else
1710  RevertDo(Start);
1711  }
1712  for (auto *Dec : Decs)
1713  RevertLoopDec(Dec);
1714 
1715  for (auto *End : Ends)
1716  RevertLoopEnd(End);
1717  for (auto *End : EndDecs)
1718  RevertLoopEndDec(End);
1719  }
1720  return Changed;
1721 }
1722 
1724  return new ARMLowOverheadLoops();
1725 }
isVectorPredicated
static bool isVectorPredicated(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:82
llvm::TargetRegisterInfo::getRegAsmName
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
Definition: TargetRegisterInfo.h:1015
i
i
Definition: README.txt:29
ARMSubtarget.h
INITIALIZE_PASS
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) static bool TryRemove(MachineInstr *MI
producesFalseLanesZero
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
Definition: ARMLowOverheadLoops.cpp:878
llvm::ARMII::HorizontalReduction
@ HorizontalReduction
Definition: ARMBaseInfo.h:405
if
if(!RDA.isSafeToRemove(MI, Uses, Ignore)) return false
llvm::ReachingDefAnalysis::reset
void reset()
Re-run the analysis.
Definition: ReachingDefAnalysis.cpp:224
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm::ReachingDefAnalysis::hasSameReachingDef
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
Definition: ReachingDefAnalysis.cpp:285
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::ReachingDefAnalysis::isSafeToRemove
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
Definition: ReachingDefAnalysis.cpp:600
llvm::ARM::isVpred
bool isVpred(OperandType op)
Definition: ARMMCTargetDesc.h:119
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::ReachingDefAnalysis::getUniqueReachingMIDef
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
Definition: ReachingDefAnalysis.cpp:433
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::ReachingDefAnalysis
This class provides the reaching def analysis.
Definition: ReachingDefAnalysis.h:69
MCInstrDesc.h
llvm::ARMSubtarget
Definition: ARMSubtarget.h:46
SetOperations.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCRegister::from
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:65
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::createARMLowOverheadLoopsPass
FunctionPass * createARMLowOverheadLoopsPass()
Definition: ARMLowOverheadLoops.cpp:1723
llvm::AArch64::FalseLanesZero
@ FalseLanesZero
Definition: AArch64InstrInfo.h:489
llvm::recomputeLivenessFlags
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
Definition: LivePhysRegs.cpp:275
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:540
llvm::MachineLoopInfo::findLoopPreheader
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
Definition: MachineLoopInfo.cpp:118
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::ARMII::DomainMask
@ DomainMask
Definition: ARMBaseInfo.h:414
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1900
llvm::ARMII::DomainMVE
@ DomainMVE
Definition: ARMBaseInfo.h:419
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:540
llvm::recomputeLiveIns
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
Definition: LivePhysRegs.h:192
isDomainMVE
static bool isDomainMVE(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:95
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
hasVPRUse
static bool hasVPRUse(MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:91
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
MachineLoopUtils.h
llvm::MachineBasicBlock::liveins
iterator_range< livein_iterator > liveins() const
Definition: MachineBasicBlock.h:412
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
shouldInspect
static bool shouldInspect(MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:100
isDo
static bool isDo(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:104
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
CreateVPTBlock
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
Definition: MVEVPTBlockPass.cpp:157
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::VCTPOpcodeToLSTP
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
Definition: MVETailPredUtils.h:25
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::ReachingDefAnalysis::hasLocalDefBefore
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
Definition: ReachingDefAnalysis.cpp:320
llvm::ReachingDefAnalysis::getLocalLiveOutMIDef
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
Definition: ReachingDefAnalysis.cpp:522
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::ReachingDefAnalysis::isSafeToMoveBackwards
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
Definition: ReachingDefAnalysis.cpp:590
llvm::getAddSubImmediate
int getAddSubImmediate(MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:855
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition: MachineBasicBlock.h:328
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:91
llvm::isLoopStart
static bool isLoopStart(MachineInstr &MI)
Definition: MVETailPredUtils.h:71
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:752
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:568
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:203
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:784
MachineLoopInfo.h
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:181
llvm::ReachingDefAnalysis::getGlobalUses
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
Definition: ReachingDefAnalysis.cpp:369
producesDoubleWidthResult
static bool producesDoubleWidthResult(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:833
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::RevertLoopDec
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
Definition: MVETailPredUtils.h:129
llvm::findFirstVPTPredOperandIdx
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
Definition: Thumb2InstrInfo.cpp:761
llvm::RevertLoopEnd
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Definition: MVETailPredUtils.h:151
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:660
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:649
llvm::ARMII::DoubleWidthResult
@ DoubleWidthResult
Definition: ARMBaseInfo.h:409
MVETailPredUtils.h
llvm::getVPTInstrPredicate
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
Definition: Thumb2InstrInfo.cpp:774
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:169
llvm::MachineBasicBlock::RegisterMaskPair
Pair of physical register and lane mask.
Definition: MachineBasicBlock.h:101
llvm::ARMISD::VCMP
@ VCMP
Definition: ARMISelLowering.h:142
DisableTailPredication
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
isVectorPredicate
static bool isVectorPredicate(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:87
llvm::VCMPOpcodeToVPT
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
Definition: ARMBaseInstrInfo.h:585
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
llvm::ARMCC::NE
@ NE
Definition: ARMBaseInfo.h:32
Passes.h
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::cl::opt< bool >
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
ARMBaseRegisterInfo.h
llvm::isVPTOpcode
static bool isVPTOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:569
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:743
llvm::MachineFunctionProperties::Property::TracksLiveness
@ TracksLiveness
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::RevertDoLoopStart
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETailPredUtils.h:119
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ARMBaseInstrInfo
Definition: ARMBaseInstrInfo.h:37
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
MachineFunctionPass.h
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::isVCTP
static bool isVCTP(const MachineInstr *MI)
Definition: MVETailPredUtils.h:58
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
DEBUG_TYPE
#define DEBUG_TYPE
Definition: ARMLowOverheadLoops.cpp:74
ARMBaseInstrInfo.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
ARM.h
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
isRegInClass
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
Definition: ARMLowOverheadLoops.cpp:816
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
llvm::MachineOperand::setIsDef
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Definition: MachineOperand.cpp:101
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:233
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
isHorizontalReduction
static bool isHorizontalReduction(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:839
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm::ReachingDefAnalysis::getReachingLocalUses
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
Definition: ReachingDefAnalysis.cpp:325
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::RevertWhileLoopStartLR
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
Definition: MVETailPredUtils.h:83
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1667
ARMBasicBlockInfo.h
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ReachingDefAnalysis::collectKilledOperands
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
Definition: ReachingDefAnalysis.cpp:644
llvm::SetVector::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::ARMII::ValidForTailPredication
@ ValidForTailPredication
Definition: ARMBaseInfo.h:398
llvm::ReachingDefAnalysis::isSafeToDefRegAt
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
Definition: ReachingDefAnalysis.cpp:676
Thumb2InstrInfo.h
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:580
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1337
ReachingDefAnalysis.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::recomputeVPTBlockMask
void recomputeVPTBlockMask(MachineInstr &Instr)
Definition: Thumb2InstrInfo.cpp:786
llvm::ReachingDefAnalysis::getGlobalReachingDefs
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
Definition: ReachingDefAnalysis.cpp:395
canGenerateNonZeros
static bool canGenerateNonZeros(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:849
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
ARM_LOW_OVERHEAD_LOOPS_NAME
#define ARM_LOW_OVERHEAD_LOOPS_NAME
Definition: ARMLowOverheadLoops.cpp:75
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:88
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
RDA
ReachingDefAnalysis & RDA
Definition: ARMLowOverheadLoops.cpp:539
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::ARMII::RetainsPreviousHalfElement
@ RetainsPreviousHalfElement
Definition: ARMBaseInfo.h:402
llvm::ReachingDefAnalysis::isReachingDefLiveOut
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Definition: ReachingDefAnalysis.cpp:500
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ReachingDefAnalysis::getMIOperand
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
Definition: ReachingDefAnalysis.cpp:453
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::pdb::PDB_SymType::Block
@ Block
retainsPreviousHalfElement
static bool retainsPreviousHalfElement(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:824
llvm::isMovRegOpcode
static bool isMovRegOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:738
llvm::MCID::Terminator
@ Terminator
Definition: MCInstrDesc.h:155
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:313
llvm::MCInstrDesc::operands
iterator_range< const_opInfo_iterator > operands() const
Definition: MCInstrDesc.h:235
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::isSubImmOpcode
static bool isSubImmOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:731
llvm::ARMBasicBlockUtils
Definition: ARMBasicBlockInfo.h:109
llvm::getTailPredVectorWidth
static unsigned getTailPredVectorWidth(unsigned Opcode)
Definition: MVETailPredUtils.h:42
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::ReachingDefAnalysis::isSafeToMoveForwards
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
Definition: ReachingDefAnalysis.cpp:580
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:22
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
LivePhysRegs.h