LLVM  13.0.0git
ARMLowOverheadLoops.cpp
Go to the documentation of this file.
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 /// form should be in the preheader, whereas the while form should be in the
14 /// preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
17 ///
18 /// In addition to this, we also look for the presence of the VCTP instruction,
19 /// which determines whether we can generated the tail-predicated low-overhead
20 /// loop form.
21 ///
22 /// Assumptions and Dependencies:
23 /// Low-overhead loops are constructed and executed using a setup instruction:
24 /// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP.
25 /// WLS(TP) and LE(TP) are branching instructions with a (large) limited range
26 /// but fixed polarity: WLS can only branch forwards and LE can only branch
27 /// backwards. These restrictions mean that this pass is dependent upon block
28 /// layout and block sizes, which is why it's the last pass to run. The same is
29 /// true for ConstantIslands, but this pass does not increase the size of the
30 /// basic blocks, nor does it change the CFG. Instructions are mainly removed
31 /// during the transform and pseudo instructions are replaced by real ones. In
32 /// some cases, when we have to revert to a 'normal' loop, we have to introduce
33 /// multiple instructions for a single pseudo (see RevertWhile and
34 /// RevertLoopEnd). To handle this situation, t2WhileLoopStartLR and t2LoopEnd
35 /// are defined to be as large as this maximum sequence of replacement
36 /// instructions.
37 ///
38 /// A note on VPR.P0 (the lane mask):
39 /// VPT, VCMP, VPNOT and VCTP won't overwrite VPR.P0 when they update it in a
40 /// "VPT Active" context (which includes low-overhead loops and vpt blocks).
41 /// They will simply "and" the result of their calculation with the current
42 /// value of VPR.P0. You can think of it like this:
43 /// \verbatim
44 /// if VPT active: ; Between a DLSTP/LETP, or for predicated instrs
45 /// VPR.P0 &= Value
46 /// else
47 /// VPR.P0 = Value
48 /// \endverbatim
49 /// When we're inside the low-overhead loop (between DLSTP and LETP), we always
50 /// fall in the "VPT active" case, so we can consider that all VPR writes by
51 /// one of those instruction is actually a "and".
52 //===----------------------------------------------------------------------===//
53 
54 #include "ARM.h"
55 #include "ARMBaseInstrInfo.h"
56 #include "ARMBaseRegisterInfo.h"
57 #include "ARMBasicBlockInfo.h"
58 #include "ARMSubtarget.h"
59 #include "MVETailPredUtils.h"
60 #include "Thumb2InstrInfo.h"
61 #include "llvm/ADT/SetOperations.h"
62 #include "llvm/ADT/SmallSet.h"
68 #include "llvm/CodeGen/Passes.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "arm-low-overhead-loops"
75 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
76 
77 static cl::opt<bool>
78 DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden,
79  cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),
80  cl::init(false));
81 
84  return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
85 }
86 
88  return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
89 }
90 
91 static bool hasVPRUse(MachineInstr &MI) {
92  return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
93 }
94 
95 static bool isDomainMVE(MachineInstr *MI) {
96  uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
97  return Domain == ARMII::DomainMVE;
98 }
99 
101  return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
102 }
103 
104 namespace {
105 
106  using InstSet = SmallPtrSetImpl<MachineInstr *>;
107 
108  class PostOrderLoopTraversal {
109  MachineLoop &ML;
110  MachineLoopInfo &MLI;
113 
114  public:
115  PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)
116  : ML(ML), MLI(MLI) { }
117 
118  const SmallVectorImpl<MachineBasicBlock*> &getOrder() const {
119  return Order;
120  }
121 
122  // Visit all the blocks within the loop, as well as exit blocks and any
123  // blocks properly dominating the header.
124  void ProcessLoop() {
125  std::function<void(MachineBasicBlock*)> Search = [this, &Search]
126  (MachineBasicBlock *MBB) -> void {
127  if (Visited.count(MBB))
128  return;
129 
130  Visited.insert(MBB);
131  for (auto *Succ : MBB->successors()) {
132  if (!ML.contains(Succ))
133  continue;
134  Search(Succ);
135  }
136  Order.push_back(MBB);
137  };
138 
139  // Insert exit blocks.
141  ML.getExitBlocks(ExitBlocks);
142  append_range(Order, ExitBlocks);
143 
144  // Then add the loop body.
145  Search(ML.getHeader());
146 
147  // Then try the preheader and its predecessors.
148  std::function<void(MachineBasicBlock*)> GetPredecessor =
149  [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void {
150  Order.push_back(MBB);
151  if (MBB->pred_size() == 1)
152  GetPredecessor(*MBB->pred_begin());
153  };
154 
155  if (auto *Preheader = ML.getLoopPreheader())
156  GetPredecessor(Preheader);
157  else if (auto *Preheader = MLI.findLoopPreheader(&ML, true, true))
158  GetPredecessor(Preheader);
159  }
160  };
161 
162  struct PredicatedMI {
163  MachineInstr *MI = nullptr;
164  SetVector<MachineInstr*> Predicates;
165 
166  public:
167  PredicatedMI(MachineInstr *I, SetVector<MachineInstr *> &Preds) : MI(I) {
168  assert(I && "Instruction must not be null!");
169  Predicates.insert(Preds.begin(), Preds.end());
170  }
171  };
172 
173  // Represent the current state of the VPR and hold all instances which
174  // represent a VPT block, which is a list of instructions that begins with a
175  // VPT/VPST and has a maximum of four proceeding instructions. All
176  // instructions within the block are predicated upon the vpr and we allow
177  // instructions to define the vpr within in the block too.
178  class VPTState {
179  friend struct LowOverheadLoop;
180 
182 
183  static SmallVector<VPTState, 4> Blocks;
184  static SetVector<MachineInstr *> CurrentPredicates;
185  static std::map<MachineInstr *,
186  std::unique_ptr<PredicatedMI>> PredicatedInsts;
187 
188  static void CreateVPTBlock(MachineInstr *MI) {
189  assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))
190  && "Can't begin VPT without predicate");
191  Blocks.emplace_back(MI);
192  // The execution of MI is predicated upon the current set of instructions
193  // that are AND'ed together to form the VPR predicate value. In the case
194  // that MI is a VPT, CurrentPredicates will also just be MI.
195  PredicatedInsts.emplace(
196  MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
197  }
198 
199  static void reset() {
200  Blocks.clear();
201  PredicatedInsts.clear();
202  CurrentPredicates.clear();
203  }
204 
205  static void addInst(MachineInstr *MI) {
206  Blocks.back().insert(MI);
207  PredicatedInsts.emplace(
208  MI, std::make_unique<PredicatedMI>(MI, CurrentPredicates));
209  }
210 
211  static void addPredicate(MachineInstr *MI) {
212  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);
213  CurrentPredicates.insert(MI);
214  }
215 
216  static void resetPredicate(MachineInstr *MI) {
217  LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);
218  CurrentPredicates.clear();
219  CurrentPredicates.insert(MI);
220  }
221 
222  public:
223  // Have we found an instruction within the block which defines the vpr? If
224  // so, not all the instructions in the block will have the same predicate.
225  static bool hasUniformPredicate(VPTState &Block) {
226  return getDivergent(Block) == nullptr;
227  }
228 
229  // If it exists, return the first internal instruction which modifies the
230  // VPR.
231  static MachineInstr *getDivergent(VPTState &Block) {
232  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
233  for (unsigned i = 1; i < Insts.size(); ++i) {
234  MachineInstr *Next = Insts[i];
235  if (isVectorPredicate(Next))
236  return Next; // Found an instruction altering the vpr.
237  }
238  return nullptr;
239  }
240 
241  // Return whether the given instruction is predicated upon a VCTP.
242  static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {
243  SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI]->Predicates;
244  if (Exclusive && Predicates.size() != 1)
245  return false;
246  for (auto *PredMI : Predicates)
247  if (isVCTP(PredMI))
248  return true;
249  return false;
250  }
251 
252  // Is the VPST, controlling the block entry, predicated upon a VCTP.
253  static bool isEntryPredicatedOnVCTP(VPTState &Block,
254  bool Exclusive = false) {
255  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
256  return isPredicatedOnVCTP(Insts.front(), Exclusive);
257  }
258 
259  // If this block begins with a VPT, we can check whether it's using
260  // at least one predicated input(s), as well as possible loop invariant
261  // which would result in it being implicitly predicated.
262  static bool hasImplicitlyValidVPT(VPTState &Block,
264  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
265  MachineInstr *VPT = Insts.front();
266  assert(isVPTOpcode(VPT->getOpcode()) &&
267  "Expected VPT block to begin with VPT/VPST");
268 
269  if (VPT->getOpcode() == ARM::MVE_VPST)
270  return false;
271 
272  auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
273  MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
274  return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
275  };
276 
277  auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
278  MachineOperand &MO = MI->getOperand(Idx);
279  if (!MO.isReg() || !MO.getReg())
280  return true;
281 
283  RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
284  if (Defs.empty())
285  return true;
286 
287  for (auto *Def : Defs)
288  if (Def->getParent() == VPT->getParent())
289  return false;
290  return true;
291  };
292 
293  // Check that at least one of the operands is directly predicated on a
294  // vctp and allow an invariant value too.
295  return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
296  (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
297  (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
298  }
299 
300  static bool isValid(ReachingDefAnalysis &RDA) {
301  // All predication within the loop should be based on vctp. If the block
302  // isn't predicated on entry, check whether the vctp is within the block
303  // and that all other instructions are then predicated on it.
304  for (auto &Block : Blocks) {
305  if (isEntryPredicatedOnVCTP(Block, false) ||
306  hasImplicitlyValidVPT(Block, RDA))
307  continue;
308 
309  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
310  // We don't know how to convert a block with just a VPT;VCTP into
311  // anything valid once we remove the VCTP. For now just bail out.
312  assert(isVPTOpcode(Insts.front()->getOpcode()) &&
313  "Expected VPT block to start with a VPST or VPT!");
314  if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
315  isVCTP(Insts.back()))
316  return false;
317 
318  for (auto *MI : Insts) {
319  // Check that any internal VCTPs are 'Then' predicated.
321  return false;
322  // Skip other instructions that build up the predicate.
323  if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI))
324  continue;
325  // Check that any other instructions are predicated upon a vctp.
326  // TODO: We could infer when VPTs are implicitly predicated on the
327  // vctp (when the operands are predicated).
328  if (!isPredicatedOnVCTP(MI)) {
329  LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI);
330  return false;
331  }
332  }
333  }
334  return true;
335  }
336 
337  VPTState(MachineInstr *MI) { Insts.push_back(MI); }
338 
339  void insert(MachineInstr *MI) {
340  Insts.push_back(MI);
341  // VPT/VPST + 4 predicated instructions.
342  assert(Insts.size() <= 5 && "Too many instructions in VPT block!");
343  }
344 
345  bool containsVCTP() const {
346  for (auto *MI : Insts)
347  if (isVCTP(MI))
348  return true;
349  return false;
350  }
351 
352  unsigned size() const { return Insts.size(); }
353  SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }
354  };
355 
356  struct LowOverheadLoop {
357 
358  MachineLoop &ML;
359  MachineBasicBlock *Preheader = nullptr;
360  MachineLoopInfo &MLI;
362  const TargetRegisterInfo &TRI;
363  const ARMBaseInstrInfo &TII;
364  MachineFunction *MF = nullptr;
365  MachineBasicBlock::iterator StartInsertPt;
366  MachineBasicBlock *StartInsertBB = nullptr;
367  MachineInstr *Start = nullptr;
368  MachineInstr *Dec = nullptr;
369  MachineInstr *End = nullptr;
370  MachineOperand TPNumElements;
373  SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
374  bool Revert = false;
375  bool CannotTailPredicate = false;
376 
377  LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI,
379  const ARMBaseInstrInfo &TII)
380  : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII),
381  TPNumElements(MachineOperand::CreateImm(0)) {
382  MF = ML.getHeader()->getParent();
383  if (auto *MBB = ML.getLoopPreheader())
384  Preheader = MBB;
385  else if (auto *MBB = MLI.findLoopPreheader(&ML, true, true))
386  Preheader = MBB;
387  VPTState::reset();
388  }
389 
390  // If this is an MVE instruction, check that we know how to use tail
391  // predication with it. Record VPT blocks and return whether the
392  // instruction is valid for tail predication.
393  bool ValidateMVEInst(MachineInstr *MI);
394 
395  void AnalyseMVEInst(MachineInstr *MI) {
396  CannotTailPredicate = !ValidateMVEInst(MI);
397  }
398 
399  bool IsTailPredicationLegal() const {
400  // For now, let's keep things really simple and only support a single
401  // block for tail predication.
402  return !Revert && FoundAllComponents() && !VCTPs.empty() &&
403  !CannotTailPredicate && ML.getNumBlocks() == 1;
404  }
405 
406  // Given that MI is a VCTP, check that is equivalent to any other VCTPs
407  // found.
408  bool AddVCTP(MachineInstr *MI);
409 
410  // Check that the predication in the loop will be equivalent once we
411  // perform the conversion. Also ensure that we can provide the number
412  // of elements to the loop start instruction.
413  bool ValidateTailPredicate();
414 
415  // Check that any values available outside of the loop will be the same
416  // after tail predication conversion.
417  bool ValidateLiveOuts();
418 
419  // Is it safe to define LR with DLS/WLS?
420  // LR can be defined if it is the operand to start, because it's the same
421  // value, or if it's going to be equivalent to the operand to Start.
422  MachineInstr *isSafeToDefineLR();
423 
424  // Check the branch targets are within range and we satisfy our
425  // restrictions.
426  void Validate(ARMBasicBlockUtils *BBUtils);
427 
428  bool FoundAllComponents() const {
429  return Start && Dec && End;
430  }
431 
432  SmallVectorImpl<VPTState> &getVPTBlocks() {
433  return VPTState::Blocks;
434  }
435 
436  // Return the operand for the loop start instruction. This will be the loop
437  // iteration count, or the number of elements if we're tail predicating.
438  MachineOperand &getLoopStartOperand() {
439  if (IsTailPredicationLegal())
440  return TPNumElements;
441  return Start->getOperand(1);
442  }
443 
444  unsigned getStartOpcode() const {
445  bool IsDo = isDoLoopStart(*Start);
446  if (!IsTailPredicationLegal())
447  return IsDo ? ARM::t2DLS : ARM::t2WLS;
448 
449  return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo);
450  }
451 
452  void dump() const {
453  if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
454  if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
455  if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;
456  if (!VCTPs.empty()) {
457  dbgs() << "ARM Loops: Found VCTP(s):\n";
458  for (auto *MI : VCTPs)
459  dbgs() << " - " << *MI;
460  }
461  if (!FoundAllComponents())
462  dbgs() << "ARM Loops: Not a low-overhead loop.\n";
463  else if (!(Start && Dec && End))
464  dbgs() << "ARM Loops: Failed to find all loop components.\n";
465  }
466  };
467 
468  class ARMLowOverheadLoops : public MachineFunctionPass {
469  MachineFunction *MF = nullptr;
470  MachineLoopInfo *MLI = nullptr;
471  ReachingDefAnalysis *RDA = nullptr;
472  const ARMBaseInstrInfo *TII = nullptr;
473  MachineRegisterInfo *MRI = nullptr;
474  const TargetRegisterInfo *TRI = nullptr;
475  std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
476 
477  public:
478  static char ID;
479 
480  ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
481 
482  void getAnalysisUsage(AnalysisUsage &AU) const override {
483  AU.setPreservesCFG();
487  }
488 
489  bool runOnMachineFunction(MachineFunction &MF) override;
490 
491  MachineFunctionProperties getRequiredProperties() const override {
495  }
496 
497  StringRef getPassName() const override {
499  }
500 
501  private:
502  bool ProcessLoop(MachineLoop *ML);
503 
504  bool RevertNonLoops();
505 
506  void RevertWhile(MachineInstr *MI) const;
507  void RevertDo(MachineInstr *MI) const;
508 
509  bool RevertLoopDec(MachineInstr *MI) const;
510 
511  void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
512 
513  void RevertLoopEndDec(MachineInstr *MI) const;
514 
515  void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
516 
517  MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
518 
519  void Expand(LowOverheadLoop &LoLoop);
520 
521  void IterationCountDCE(LowOverheadLoop &LoLoop);
522  };
523 }
524 
525 char ARMLowOverheadLoops::ID = 0;
526 
527 SmallVector<VPTState, 4> VPTState::Blocks;
528 SetVector<MachineInstr *> VPTState::CurrentPredicates;
529 std::map<MachineInstr *,
530  std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
531 
533  false, false)
534 
535 static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
536  InstSet &ToRemove, InstSet &Ignore) {
537 
538  // Check that we can remove all of Killed without having to modify any IT
539  // blocks.
540  auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) {
541  // Collect the dead code and the MBBs in which they reside.
543  for (auto *Dead : Killed)
544  BasicBlocks.insert(Dead->getParent());
545 
546  // Collect IT blocks in all affected basic blocks.
547  std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
548  for (auto *MBB : BasicBlocks) {
549  for (auto &IT : *MBB) {
550  if (IT.getOpcode() != ARM::t2IT)
551  continue;
553  ITBlocks[&IT]);
554  }
555  }
556 
557  // If we're removing all of the instructions within an IT block, then
558  // also remove the IT instruction.
559  SmallPtrSet<MachineInstr *, 2> ModifiedITs;
561  for (auto *Dead : Killed) {
562  if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
564  RemoveITs.insert(IT);
565  auto &CurrentBlock = ITBlocks[IT];
566  CurrentBlock.erase(Dead);
567  if (CurrentBlock.empty())
568  ModifiedITs.erase(IT);
569  else
570  ModifiedITs.insert(IT);
571  }
572  }
573  if (!ModifiedITs.empty())
574  return false;
575  Killed.insert(RemoveITs.begin(), RemoveITs.end());
576  return true;
577  };
578 
580  if (!RDA.isSafeToRemove(MI, Uses, Ignore))
581  return false;
582 
583  if (WontCorruptITs(Uses, RDA)) {
584  ToRemove.insert(Uses.begin(), Uses.end());
585  LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI
586  << " - can also remove:\n";
587  for (auto *Use : Uses)
588  dbgs() << " - " << *Use);
589 
591  RDA.collectKilledOperands(MI, Killed);
592  if (WontCorruptITs(Killed, RDA)) {
593  ToRemove.insert(Killed.begin(), Killed.end());
594  LLVM_DEBUG(for (auto *Dead : Killed)
595  dbgs() << " - " << *Dead);
596  }
597  return true;
598  }
599  return false;
600 }
601 
602 bool LowOverheadLoop::ValidateTailPredicate() {
603  if (!IsTailPredicationLegal()) {
604  LLVM_DEBUG(if (VCTPs.empty())
605  dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
606  dbgs() << "ARM Loops: Tail-predication is not valid.\n");
607  return false;
608  }
609 
610  assert(!VCTPs.empty() && "VCTP instruction expected but is not set");
611  assert(ML.getBlocks().size() == 1 &&
612  "Shouldn't be processing a loop with more than one block");
613 
615  LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");
616  return false;
617  }
618 
619  if (!VPTState::isValid(RDA)) {
620  LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");
621  return false;
622  }
623 
624  if (!ValidateLiveOuts()) {
625  LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");
626  return false;
627  }
628 
629  // For tail predication, we need to provide the number of elements, instead
630  // of the iteration count, to the loop start instruction. The number of
631  // elements is provided to the vctp instruction, so we need to check that
632  // we can use this register at InsertPt.
633  MachineInstr *VCTP = VCTPs.back();
634  if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
635  Start->getOpcode() == ARM::t2WhileLoopStartTP) {
636  TPNumElements = Start->getOperand(2);
637  StartInsertPt = Start;
638  StartInsertBB = Start->getParent();
639  } else {
640  TPNumElements = VCTP->getOperand(1);
641  MCRegister NumElements = TPNumElements.getReg().asMCReg();
642 
643  // If the register is defined within loop, then we can't perform TP.
644  // TODO: Check whether this is just a mov of a register that would be
645  // available.
646  if (RDA.hasLocalDefBefore(VCTP, NumElements)) {
647  LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");
648  return false;
649  }
650 
651  // The element count register maybe defined after InsertPt, in which case we
652  // need to try to move either InsertPt or the def so that the [w|d]lstp can
653  // use the value.
654 
655  if (StartInsertPt != StartInsertBB->end() &&
656  !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
657  if (auto *ElemDef =
658  RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
659  if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
660  ElemDef->removeFromParent();
661  StartInsertBB->insert(StartInsertPt, ElemDef);
662  LLVM_DEBUG(dbgs()
663  << "ARM Loops: Moved element count def: " << *ElemDef);
664  } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
665  StartInsertPt->removeFromParent();
666  StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
667  &*StartInsertPt);
668  LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
669  } else {
670  // If we fail to move an instruction and the element count is provided
671  // by a mov, use the mov operand if it will have the same value at the
672  // insertion point
673  MachineOperand Operand = ElemDef->getOperand(1);
674  if (isMovRegOpcode(ElemDef->getOpcode()) &&
675  RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) ==
676  RDA.getUniqueReachingMIDef(&*StartInsertPt,
677  Operand.getReg().asMCReg())) {
678  TPNumElements = Operand;
679  NumElements = TPNumElements.getReg();
680  } else {
681  LLVM_DEBUG(dbgs()
682  << "ARM Loops: Unable to move element count to loop "
683  << "start instruction.\n");
684  return false;
685  }
686  }
687  }
688  }
689 
690  // Especially in the case of while loops, InsertBB may not be the
691  // preheader, so we need to check that the register isn't redefined
692  // before entering the loop.
693  auto CannotProvideElements = [this](MachineBasicBlock *MBB,
694  MCRegister NumElements) {
695  if (MBB->empty())
696  return false;
697  // NumElements is redefined in this block.
698  if (RDA.hasLocalDefBefore(&MBB->back(), NumElements))
699  return true;
700 
701  // Don't continue searching up through multiple predecessors.
702  if (MBB->pred_size() > 1)
703  return true;
704 
705  return false;
706  };
707 
708  // Search backwards for a def, until we get to InsertBB.
709  MachineBasicBlock *MBB = Preheader;
710  while (MBB && MBB != StartInsertBB) {
711  if (CannotProvideElements(MBB, NumElements)) {
712  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
713  return false;
714  }
715  MBB = *MBB->pred_begin();
716  }
717  }
718 
719  // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
720  // world the [w|d]lstp instruction would be last instruction in the preheader
721  // and so it would only affect instructions within the loop body. But due to
722  // scheduling, and/or the logic in this pass (above), the insertion point can
723  // be moved earlier. So if the Loop Start isn't the last instruction in the
724  // preheader, and if the initial element count is smaller than the vector
725  // width, the Loop Start instruction will immediately generate one or more
726  // false lane mask which can, incorrectly, affect the proceeding MVE
727  // instructions in the preheader.
728  if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {
729  LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");
730  return false;
731  }
732 
733  // Check that the value change of the element count is what we expect and
734  // that the predication will be equivalent. For this we need:
735  // NumElements = NumElements - VectorWidth. The sub will be a sub immediate
736  // and we can also allow register copies within the chain too.
737  auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) {
738  return -getAddSubImmediate(*MI) == ExpectedVecWidth;
739  };
740 
741  MachineBasicBlock *MBB = VCTP->getParent();
742  // Remove modifications to the element count since they have no purpose in a
743  // tail predicated loop. Explicitly refer to the vctp operand no matter which
744  // register NumElements has been assigned to, since that is what the
745  // modifications will be using
746  if (auto *Def = RDA.getUniqueReachingMIDef(
747  &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) {
748  SmallPtrSet<MachineInstr*, 2> ElementChain;
750  unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode());
751 
752  Ignore.insert(VCTPs.begin(), VCTPs.end());
753 
754  if (TryRemove(Def, RDA, ElementChain, Ignore)) {
755  bool FoundSub = false;
756 
757  for (auto *MI : ElementChain) {
758  if (isMovRegOpcode(MI->getOpcode()))
759  continue;
760 
761  if (isSubImmOpcode(MI->getOpcode())) {
762  if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {
763  LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
764  " count: " << *MI);
765  return false;
766  }
767  FoundSub = true;
768  } else {
769  LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"
770  " count: " << *MI);
771  return false;
772  }
773  }
774  ToRemove.insert(ElementChain.begin(), ElementChain.end());
775  }
776  }
777 
778  // If we converted the LoopStart to a t2DoLoopStartTP/t2WhileLoopStartTP, we
779  // can also remove any extra instructions in the preheader, which often
780  // includes a now unused MOV.
781  if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
782  Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
783  Preheader && !Preheader->empty() &&
784  !RDA.hasLocalDefBefore(VCTP, VCTP->getOperand(1).getReg())) {
785  if (auto *Def = RDA.getUniqueReachingMIDef(
786  &Preheader->back(), VCTP->getOperand(1).getReg().asMCReg())) {
788  Ignore.insert(VCTPs.begin(), VCTPs.end());
789  TryRemove(Def, RDA, ToRemove, Ignore);
790  }
791  }
792 
793  return true;
794 }
795 
796 static bool isRegInClass(const MachineOperand &MO,
797  const TargetRegisterClass *Class) {
798  return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());
799 }
800 
801 // MVE 'narrowing' operate on half a lane, reading from half and writing
802 // to half, which are referred to has the top and bottom half. The other
803 // half retains its previous value.
805  const MCInstrDesc &MCID = MI.getDesc();
806  uint64_t Flags = MCID.TSFlags;
807  return (Flags & ARMII::RetainsPreviousHalfElement) != 0;
808 }
809 
810 // Some MVE instructions read from the top/bottom halves of their operand(s)
811 // and generate a vector result with result elements that are double the
812 // width of the input.
814  const MCInstrDesc &MCID = MI.getDesc();
815  uint64_t Flags = MCID.TSFlags;
816  return (Flags & ARMII::DoubleWidthResult) != 0;
817 }
818 
819 static bool isHorizontalReduction(const MachineInstr &MI) {
820  const MCInstrDesc &MCID = MI.getDesc();
821  uint64_t Flags = MCID.TSFlags;
822  return (Flags & ARMII::HorizontalReduction) != 0;
823 }
824 
825 // Can this instruction generate a non-zero result when given only zeroed
826 // operands? This allows us to know that, given operands with false bytes
827 // zeroed by masked loads, that the result will also contain zeros in those
828 // bytes.
829 static bool canGenerateNonZeros(const MachineInstr &MI) {
830 
831  // Check for instructions which can write into a larger element size,
832  // possibly writing into a previous zero'd lane.
834  return true;
835 
836  switch (MI.getOpcode()) {
837  default:
838  break;
839  // FIXME: VNEG FP and -0? I think we'll need to handle this once we allow
840  // fp16 -> fp32 vector conversions.
841  // Instructions that perform a NOT will generate 1s from 0s.
842  case ARM::MVE_VMVN:
843  case ARM::MVE_VORN:
844  // Count leading zeros will do just that!
845  case ARM::MVE_VCLZs8:
846  case ARM::MVE_VCLZs16:
847  case ARM::MVE_VCLZs32:
848  return true;
849  }
850  return false;
851 }
852 
853 // Look at its register uses to see if it only can only receive zeros
854 // into its false lanes which would then produce zeros. Also check that
855 // the output register is also defined by an FalseLanesZero instruction
856 // so that if tail-predication happens, the lanes that aren't updated will
857 // still be zeros.
859  const TargetRegisterClass *QPRs,
860  const ReachingDefAnalysis &RDA,
861  InstSet &FalseLanesZero) {
862  if (canGenerateNonZeros(MI))
863  return false;
864 
866  // Predicated loads will write zeros to the falsely predicated bytes of the
867  // destination register.
868  if (MI.mayLoad())
869  return isPredicated;
870 
871  auto IsZeroInit = [](MachineInstr *Def) {
872  return !isVectorPredicated(Def) &&
873  Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
874  Def->getOperand(1).getImm() == 0;
875  };
876 
877  bool AllowScalars = isHorizontalReduction(MI);
878  for (auto &MO : MI.operands()) {
879  if (!MO.isReg() || !MO.getReg())
880  continue;
881  if (!isRegInClass(MO, QPRs) && AllowScalars)
882  continue;
883 
884  // Check that this instruction will produce zeros in its false lanes:
885  // - If it only consumes false lanes zero or constant 0 (vmov #0)
886  // - If it's predicated, it only matters that it's def register already has
887  // false lane zeros, so we can ignore the uses.
889  RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs);
890  for (auto *Def : Defs) {
891  if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
892  continue;
893  if (MO.isUse() && isPredicated)
894  continue;
895  return false;
896  }
897  }
898  LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);
899  return true;
900 }
901 
902 bool LowOverheadLoop::ValidateLiveOuts() {
903  // We want to find out if the tail-predicated version of this loop will
904  // produce the same values as the loop in its original form. For this to
905  // be true, the newly inserted implicit predication must not change the
906  // the (observable) results.
907  // We're doing this because many instructions in the loop will not be
908  // predicated and so the conversion from VPT predication to tail-predication
909  // can result in different values being produced; due to the tail-predication
910  // preventing many instructions from updating their falsely predicated
911  // lanes. This analysis assumes that all the instructions perform lane-wise
912  // operations and don't perform any exchanges.
913  // A masked load, whether through VPT or tail predication, will write zeros
914  // to any of the falsely predicated bytes. So, from the loads, we know that
915  // the false lanes are zeroed and here we're trying to track that those false
916  // lanes remain zero, or where they change, the differences are masked away
917  // by their user(s).
918  // All MVE stores have to be predicated, so we know that any predicate load
919  // operands, or stored results are equivalent already. Other explicitly
920  // predicated instructions will perform the same operation in the original
921  // loop and the tail-predicated form too. Because of this, we can insert
922  // loads, stores and other predicated instructions into our Predicated
923  // set and build from there.
924  const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);
925  SetVector<MachineInstr *> FalseLanesUnknown;
928  MachineBasicBlock *Header = ML.getHeader();
929 
930  for (auto &MI : *Header) {
931  if (!shouldInspect(MI))
932  continue;
933 
934  if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode()))
935  continue;
936 
938  bool retainsOrReduces =
940 
941  if (isPredicated)
942  Predicated.insert(&MI);
944  FalseLanesZero.insert(&MI);
945  else if (MI.getNumDefs() == 0)
946  continue;
947  else if (!isPredicated && retainsOrReduces)
948  return false;
949  else if (!isPredicated)
950  FalseLanesUnknown.insert(&MI);
951  }
952 
953  auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
954  SmallPtrSetImpl<MachineInstr *> &Predicated) {
956  RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses);
957  for (auto *Use : Uses) {
958  if (Use != MI && !Predicated.count(Use))
959  return false;
960  }
961  return true;
962  };
963 
964  // Visit the unknowns in reverse so that we can start at the values being
965  // stored and then we can work towards the leaves, hopefully adding more
966  // instructions to Predicated. Successfully terminating the loop means that
967  // all the unknown values have to found to be masked by predicated user(s).
968  // For any unpredicated values, we store them in NonPredicated so that we
969  // can later check whether these form a reduction.
970  SmallPtrSet<MachineInstr*, 2> NonPredicated;
971  for (auto *MI : reverse(FalseLanesUnknown)) {
972  for (auto &MO : MI->operands()) {
973  if (!isRegInClass(MO, QPRs) || !MO.isDef())
974  continue;
975  if (!HasPredicatedUsers(MI, MO, Predicated)) {
976  LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
977  << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
978  NonPredicated.insert(MI);
979  break;
980  }
981  }
982  // Any unknown false lanes have been masked away by the user(s).
983  if (!NonPredicated.contains(MI))
984  Predicated.insert(MI);
985  }
986 
989  ML.getExitBlocks(ExitBlocks);
990  assert(ML.getNumBlocks() == 1 && "Expected single block loop!");
991  assert(ExitBlocks.size() == 1 && "Expected a single exit block");
992  MachineBasicBlock *ExitBB = ExitBlocks.front();
993  for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
994  // TODO: Instead of blocking predication, we could move the vctp to the exit
995  // block and calculate it's operand there in or the preheader.
996  if (RegMask.PhysReg == ARM::VPR)
997  return false;
998  // Check Q-regs that are live in the exit blocks. We don't collect scalars
999  // because they won't be affected by lane predication.
1000  if (QPRs->contains(RegMask.PhysReg))
1001  if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg))
1002  LiveOutMIs.insert(MI);
1003  }
1004 
1005  // We've already validated that any VPT predication within the loop will be
1006  // equivalent when we perform the predication transformation; so we know that
1007  // any VPT predicated instruction is predicated upon VCTP. Any live-out
1008  // instruction needs to be predicated, so check this here. The instructions
1009  // in NonPredicated have been found to be a reduction that we can ensure its
1010  // legality.
1011  for (auto *MI : LiveOutMIs) {
1012  if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
1013  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
1014  return false;
1015  }
1016  }
1017 
1018  return true;
1019 }
1020 
1021 void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
1022  if (Revert)
1023  return;
1024 
1025  // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP]
1026  // can only jump back.
1027  auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,
1028  ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {
1029  MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
1030  ? End->getOperand(1).getMBB()
1031  : End->getOperand(2).getMBB();
1032  // TODO Maybe there's cases where the target doesn't have to be the header,
1033  // but for now be safe and revert.
1034  if (TgtBB != ML.getHeader()) {
1035  LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");
1036  return false;
1037  }
1038 
1039  // The WLS and LE instructions have 12-bits for the label offset. WLS
1040  // requires a positive offset, while LE uses negative.
1041  if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||
1042  !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {
1043  LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
1044  return false;
1045  }
1046 
1047  if (isWhileLoopStart(*Start)) {
1048  MachineBasicBlock *TargetBB = getWhileLoopStartTargetBB(*Start);
1049  if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1050  !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1051  LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
1052  return false;
1053  }
1054  }
1055  return true;
1056  };
1057 
1058  StartInsertPt = MachineBasicBlock::iterator(Start);
1059  StartInsertBB = Start->getParent();
1060  LLVM_DEBUG(dbgs() << "ARM Loops: Will insert LoopStart at "
1061  << *StartInsertPt);
1062 
1063  Revert = !ValidateRanges(Start, End, BBUtils, ML);
1064  CannotTailPredicate = !ValidateTailPredicate();
1065 }
1066 
1067 bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
1068  LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI);
1069  if (VCTPs.empty()) {
1070  VCTPs.push_back(MI);
1071  return true;
1072  }
1073 
1074  // If we find another VCTP, check whether it uses the same value as the main VCTP.
1075  // If it does, store it in the VCTPs set, else refuse it.
1076  MachineInstr *Prev = VCTPs.back();
1077  if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) ||
1078  !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) {
1079  LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "
1080  "definition from the main VCTP");
1081  return false;
1082  }
1083  VCTPs.push_back(MI);
1084  return true;
1085 }
1086 
1088 
1089  auto GetFrameIndex = [](MachineMemOperand *Operand) {
1090  const PseudoSourceValue *PseudoValue = Operand->getPseudoValue();
1091  if (PseudoValue && PseudoValue->kind() == PseudoSourceValue::FixedStack) {
1092  if (const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1093  return FS->getFrameIndex();
1094  }
1095  }
1096  return -1;
1097  };
1098 
1099  auto IsStackOp = [GetFrameIndex](MachineInstr *I) {
1100  switch (I->getOpcode()) {
1101  case ARM::MVE_VSTRWU32:
1102  case ARM::MVE_VLDRWU32: {
1103  return I->getOperand(1).getReg() == ARM::SP &&
1104  I->memoperands().size() == 1 &&
1105  GetFrameIndex(I->memoperands().front()) >= 0;
1106  }
1107  default:
1108  return false;
1109  }
1110  };
1111 
1112  // An unpredicated vector register spill is allowed if all of the uses of the
1113  // stack slot are within the loop
1114  if (MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(MI))
1115  return false;
1116 
1117  // Search all blocks after the loop for accesses to the same stack slot.
1118  // ReachingDefAnalysis doesn't work for sp as it relies on registers being
1119  // live-out (which sp never is) to know what blocks to look in
1120  if (MI->memoperands().size() == 0)
1121  return false;
1122  int FI = GetFrameIndex(MI->memoperands().front());
1123 
1124  MachineFrameInfo FrameInfo = MI->getParent()->getParent()->getFrameInfo();
1125  if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1126  return false;
1127 
1129  ML->getExitBlocks(Frontier);
1130  SmallPtrSet<MachineBasicBlock *, 4> Visited{MI->getParent()};
1131  unsigned Idx = 0;
1132  while (Idx < Frontier.size()) {
1133  MachineBasicBlock *BB = Frontier[Idx];
1134  bool LookAtSuccessors = true;
1135  for (auto &I : *BB) {
1136  if (!IsStackOp(&I) || I.memoperands().size() == 0)
1137  continue;
1138  if (GetFrameIndex(I.memoperands().front()) != FI)
1139  continue;
1140  // If this block has a store to the stack slot before any loads then we
1141  // can ignore the block
1142  if (I.getOpcode() == ARM::MVE_VSTRWU32) {
1143  LookAtSuccessors = false;
1144  break;
1145  }
1146  // If the store and the load are using the same stack slot then the
1147  // store isn't valid for tail predication
1148  if (I.getOpcode() == ARM::MVE_VLDRWU32)
1149  return false;
1150  }
1151 
1152  if (LookAtSuccessors) {
1153  for (auto Succ : BB->successors()) {
1154  if (!Visited.contains(Succ) && !is_contained(Frontier, Succ))
1155  Frontier.push_back(Succ);
1156  }
1157  }
1158  Visited.insert(BB);
1159  Idx++;
1160  }
1161 
1162  return true;
1163 }
1164 
1165 bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
1166  if (CannotTailPredicate)
1167  return false;
1168 
1169  if (!shouldInspect(*MI))
1170  return true;
1171 
1172  if (MI->getOpcode() == ARM::MVE_VPSEL ||
1173  MI->getOpcode() == ARM::MVE_VPNOT) {
1174  // TODO: Allow VPSEL and VPNOT, we currently cannot because:
1175  // 1) It will use the VPR as a predicate operand, but doesn't have to be
1176  // instead a VPT block, which means we can assert while building up
1177  // the VPT block because we don't find another VPT or VPST to being a new
1178  // one.
1179  // 2) VPSEL still requires a VPR operand even after tail predicating,
1180  // which means we can't remove it unless there is another
1181  // instruction, such as vcmp, that can provide the VPR def.
1182  return false;
1183  }
1184 
1185  // Record all VCTPs and check that they're equivalent to one another.
1186  if (isVCTP(MI) && !AddVCTP(MI))
1187  return false;
1188 
1189  // Inspect uses first so that any instructions that alter the VPR don't
1190  // alter the predicate upon themselves.
1191  const MCInstrDesc &MCID = MI->getDesc();
1192  bool IsUse = false;
1193  unsigned LastOpIdx = MI->getNumOperands() - 1;
1194  for (auto &Op : enumerate(reverse(MCID.operands()))) {
1195  const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
1196  if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
1197  continue;
1198 
1199  if (ARM::isVpred(Op.value().OperandType)) {
1200  VPTState::addInst(MI);
1201  IsUse = true;
1202  } else if (MI->getOpcode() != ARM::MVE_VPST) {
1203  LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);
1204  return false;
1205  }
1206  }
1207 
1208  // If we find an instruction that has been marked as not valid for tail
1209  // predication, only allow the instruction if it's contained within a valid
1210  // VPT block.
1211  bool RequiresExplicitPredication =
1213  if (isDomainMVE(MI) && RequiresExplicitPredication) {
1214  LLVM_DEBUG(if (!IsUse)
1215  dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
1216  return IsUse;
1217  }
1218 
1219  // If the instruction is already explicitly predicated, then the conversion
1220  // will be fine, but ensure that all store operations are predicated.
1221  if (MI->mayStore() && !ValidateMVEStore(MI, &ML))
1222  return IsUse;
1223 
1224  // If this instruction defines the VPR, update the predicate for the
1225  // proceeding instructions.
1226  if (isVectorPredicate(MI)) {
1227  // Clear the existing predicate when we're not in VPT Active state,
1228  // otherwise we add to it.
1229  if (!isVectorPredicated(MI))
1230  VPTState::resetPredicate(MI);
1231  else
1232  VPTState::addPredicate(MI);
1233  }
1234 
1235  // Finally once the predicate has been modified, we can start a new VPT
1236  // block if necessary.
1237  if (isVPTOpcode(MI->getOpcode()))
1239 
1240  return true;
1241 }
1242 
1243 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
1244  const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
1245  if (!ST.hasLOB())
1246  return false;
1247 
1248  MF = &mf;
1249  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
1250 
1251  MLI = &getAnalysis<MachineLoopInfo>();
1252  RDA = &getAnalysis<ReachingDefAnalysis>();
1254  MRI = &MF->getRegInfo();
1255  TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
1256  TRI = ST.getRegisterInfo();
1257  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
1258  BBUtils->computeAllBlockSizes();
1259  BBUtils->adjustBBOffsetsAfter(&MF->front());
1260 
1261  bool Changed = false;
1262  for (auto ML : *MLI) {
1263  if (ML->isOutermost())
1264  Changed |= ProcessLoop(ML);
1265  }
1266  Changed |= RevertNonLoops();
1267  return Changed;
1268 }
1269 
1270 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
1271 
1272  bool Changed = false;
1273 
1274  // Process inner loops first.
1275  for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
1276  Changed |= ProcessLoop(*I);
1277 
1278  LLVM_DEBUG({
1279  dbgs() << "ARM Loops: Processing loop containing:\n";
1280  if (auto *Preheader = ML->getLoopPreheader())
1281  dbgs() << " - Preheader: " << printMBBReference(*Preheader) << "\n";
1282  else if (auto *Preheader = MLI->findLoopPreheader(ML, true, true))
1283  dbgs() << " - Preheader: " << printMBBReference(*Preheader) << "\n";
1284  for (auto *MBB : ML->getBlocks())
1285  dbgs() << " - Block: " << printMBBReference(*MBB) << "\n";
1286  });
1287 
1288  // Search the given block for a loop start instruction. If one isn't found,
1289  // and there's only one predecessor block, search that one too.
1290  std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
1291  [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
1292  for (auto &MI : *MBB) {
1293  if (isLoopStart(MI))
1294  return &MI;
1295  }
1296  if (MBB->pred_size() == 1)
1297  return SearchForStart(*MBB->pred_begin());
1298  return nullptr;
1299  };
1300 
1301  LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI, *TII);
1302  // Search the preheader for the start intrinsic.
1303  // FIXME: I don't see why we shouldn't be supporting multiple predecessors
1304  // with potentially multiple set.loop.iterations, so we need to enable this.
1305  if (LoLoop.Preheader)
1306  LoLoop.Start = SearchForStart(LoLoop.Preheader);
1307  else
1308  return Changed;
1309 
1310  // Find the low-overhead loop components and decide whether or not to fall
1311  // back to a normal loop. Also look for a vctp instructions and decide
1312  // whether we can convert that predicate using tail predication.
1313  for (auto *MBB : reverse(ML->getBlocks())) {
1314  for (auto &MI : *MBB) {
1315  if (MI.isDebugValue())
1316  continue;
1317  else if (MI.getOpcode() == ARM::t2LoopDec)
1318  LoLoop.Dec = &MI;
1319  else if (MI.getOpcode() == ARM::t2LoopEnd)
1320  LoLoop.End = &MI;
1321  else if (MI.getOpcode() == ARM::t2LoopEndDec)
1322  LoLoop.End = LoLoop.Dec = &MI;
1323  else if (isLoopStart(MI))
1324  LoLoop.Start = &MI;
1325  else if (MI.getDesc().isCall()) {
1326  // TODO: Though the call will require LE to execute again, does this
1327  // mean we should revert? Always executing LE hopefully should be
1328  // faster than performing a sub,cmp,br or even subs,br.
1329  LoLoop.Revert = true;
1330  LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
1331  } else {
1332  // Record VPR defs and build up their corresponding vpt blocks.
1333  // Check we know how to tail predicate any mve instructions.
1334  LoLoop.AnalyseMVEInst(&MI);
1335  }
1336  }
1337  }
1338 
1339  LLVM_DEBUG(LoLoop.dump());
1340  if (!LoLoop.FoundAllComponents()) {
1341  LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n");
1342  return Changed;
1343  }
1344 
1345  assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1346  "Expected t2WhileLoopStart to be removed before regalloc!");
1347 
1348  // Check that the only instruction using LoopDec is LoopEnd. This can only
1349  // happen when the Dec and End are separate, not a single t2LoopEndDec.
1350  // TODO: Check for copy chains that really have no effect.
1351  if (LoLoop.Dec != LoLoop.End) {
1353  RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses);
1354  if (Uses.size() > 1 || !Uses.count(LoLoop.End)) {
1355  LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");
1356  LoLoop.Revert = true;
1357  }
1358  }
1359  LoLoop.Validate(BBUtils.get());
1360  Expand(LoLoop);
1361  return true;
1362 }
1363 
1364 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
1365 // beq that branches to the exit branch.
1366 // TODO: We could also try to generate a cbz if the value in LR is also in
1367 // another low register.
1368 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
1369  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
1371  unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
1372  ARM::tBcc : ARM::t2Bcc;
1373 
1374  RevertWhileLoopStartLR(MI, TII, BrOpc);
1375 }
1376 
1377 void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
1378  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
1380 }
1381 
1383  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
1384  MachineBasicBlock *MBB = MI->getParent();
1386  for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) {
1387  if (I->getOpcode() == ARM::t2LoopEnd) {
1388  Ignore.insert(&*I);
1389  break;
1390  }
1391  }
1392 
1393  // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
1394  bool SetFlags =
1396 
1397  llvm::RevertLoopDec(MI, TII, SetFlags);
1398  return SetFlags;
1399 }
1400 
1401 // Generate a subs, or sub and cmp, and a branch instead of an LE.
1402 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
1403  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
1404 
1405  MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
1406  unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
1407  ARM::tBcc : ARM::t2Bcc;
1408 
1409  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
1410 }
1411 
1412 // Generate a subs, or sub and cmp, and a branch instead of an LE.
1413 void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {
1414  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);
1415  assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");
1416  MachineBasicBlock *MBB = MI->getParent();
1417 
1418  MachineInstrBuilder MIB =
1419  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
1420  MIB.addDef(ARM::LR);
1421  MIB.add(MI->getOperand(1));
1422  MIB.addImm(1);
1423  MIB.addImm(ARMCC::AL);
1424  MIB.addReg(ARM::NoRegister);
1425  MIB.addReg(ARM::CPSR);
1426  MIB->getOperand(5).setIsDef(true);
1427 
1428  MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();
1429  unsigned BrOpc =
1430  BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1431 
1432  // Create bne
1433  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
1434  MIB.add(MI->getOperand(2)); // branch target
1435  MIB.addImm(ARMCC::NE); // condition code
1436  MIB.addReg(ARM::CPSR);
1437 
1438  MI->eraseFromParent();
1439 }
1440 
1441 // Perform dead code elimation on the loop iteration count setup expression.
1442 // If we are tail-predicating, the number of elements to be processed is the
1443 // operand of the VCTP instruction in the vector body, see getCount(), which is
1444 // register $r3 in this example:
1445 //
1446 // $lr = big-itercount-expression
1447 // ..
1448 // $lr = t2DoLoopStart renamable $lr
1449 // vector.body:
1450 // ..
1451 // $vpr = MVE_VCTP32 renamable $r3
1452 // renamable $lr = t2LoopDec killed renamable $lr, 1
1453 // t2LoopEnd renamable $lr, %vector.body
1454 // tB %end
1455 //
1456 // What we would like achieve here is to replace the do-loop start pseudo
1457 // instruction t2DoLoopStart with:
1458 //
1459 // $lr = MVE_DLSTP_32 killed renamable $r3
1460 //
1461 // Thus, $r3 which defines the number of elements, is written to $lr,
1462 // and then we want to delete the whole chain that used to define $lr,
1463 // see the comment below how this chain could look like.
1464 //
1465 void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1466  if (!LoLoop.IsTailPredicationLegal())
1467  return;
1468 
1469  LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
1470 
1471  MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 1);
1472  if (!Def) {
1473  LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
1474  return;
1475  }
1476 
1477  // Collect and remove the users of iteration count.
1478  SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
1479  LoLoop.End };
1480  if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
1481  LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
1482 }
1483 
1484 MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1485  LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");
1486  // When using tail-predication, try to delete the dead code that was used to
1487  // calculate the number of loop iterations.
1488  IterationCountDCE(LoLoop);
1489 
1490  MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
1491  MachineInstr *Start = LoLoop.Start;
1492  MachineBasicBlock *MBB = LoLoop.StartInsertBB;
1493  unsigned Opc = LoLoop.getStartOpcode();
1494  MachineOperand &Count = LoLoop.getLoopStartOperand();
1495 
1496  // A DLS lr, lr we needn't emit
1497  MachineInstr* NewStart;
1498  if (Opc == ARM::t2DLS && Count.isReg() && Count.getReg() == ARM::LR) {
1499  LLVM_DEBUG(dbgs() << "ARM Loops: Didn't insert start: DLS lr, lr");
1500  NewStart = nullptr;
1501  } else {
1502  MachineInstrBuilder MIB =
1503  BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
1504 
1505  MIB.addDef(ARM::LR);
1506  MIB.add(Count);
1507  if (isWhileLoopStart(*Start))
1508  MIB.addMBB(getWhileLoopStartTargetBB(*Start));
1509 
1510  LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
1511  NewStart = &*MIB;
1512  }
1513 
1514  LoLoop.ToRemove.insert(Start);
1515  return NewStart;
1516 }
1517 
1518 void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1519  auto RemovePredicate = [](MachineInstr *MI) {
1520  if (MI->isDebugInstr())
1521  return;
1522  LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);
1523  int PIdx = llvm::findFirstVPTPredOperandIdx(*MI);
1524  assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");
1525  assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then &&
1526  "Expected Then predicate!");
1527  MI->getOperand(PIdx).setImm(ARMVCC::None);
1528  MI->getOperand(PIdx + 1).setReg(0);
1529  };
1530 
1531  for (auto &Block : LoLoop.getVPTBlocks()) {
1532  SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
1533 
1534  auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
1535  assert(TheVCMP && "Replacing a removed or non-existent VCMP");
1536  // Replace the VCMP with a VPT
1537  MachineInstrBuilder MIB =
1538  BuildMI(*At->getParent(), At, At->getDebugLoc(),
1539  TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode())));
1540  MIB.addImm(ARMVCC::Then);
1541  // Register one
1542  MIB.add(TheVCMP->getOperand(1));
1543  // Register two
1544  MIB.add(TheVCMP->getOperand(2));
1545  // The comparison code, e.g. ge, eq, lt
1546  MIB.add(TheVCMP->getOperand(3));
1547  LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);
1548  LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
1549  LoLoop.ToRemove.insert(TheVCMP);
1550  TheVCMP = nullptr;
1551  };
1552 
1553  if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) {
1554  MachineInstr *VPST = Insts.front();
1555  if (VPTState::hasUniformPredicate(Block)) {
1556  // A vpt block starting with VPST, is only predicated upon vctp and has no
1557  // internal vpr defs:
1558  // - Remove vpst.
1559  // - Unpredicate the remaining instructions.
1560  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1561  for (unsigned i = 1; i < Insts.size(); ++i)
1562  RemovePredicate(Insts[i]);
1563  } else {
1564  // The VPT block has a non-uniform predicate but it uses a vpst and its
1565  // entry is guarded only by a vctp, which means we:
1566  // - Need to remove the original vpst.
1567  // - Then need to unpredicate any following instructions, until
1568  // we come across the divergent vpr def.
1569  // - Insert a new vpst to predicate the instruction(s) that following
1570  // the divergent vpr def.
1571  MachineInstr *Divergent = VPTState::getDivergent(Block);
1572  MachineBasicBlock *MBB = Divergent->getParent();
1573  auto DivergentNext = ++MachineBasicBlock::iterator(Divergent);
1574  while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())
1575  ++DivergentNext;
1576 
1577  bool DivergentNextIsPredicated =
1578  DivergentNext != MBB->end() &&
1579  getVPTInstrPredicate(*DivergentNext) != ARMVCC::None;
1580 
1581  for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext;
1582  I != E; ++I)
1583  RemovePredicate(&*I);
1584 
1585  // Check if the instruction defining vpr is a vcmp so it can be combined
1586  // with the VPST This should be the divergent instruction
1587  MachineInstr *VCMP =
1588  VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr;
1589 
1590  if (DivergentNextIsPredicated) {
1591  // Insert a VPST at the divergent only if the next instruction
1592  // would actually use it. A VCMP following a VPST can be
1593  // merged into a VPT so do that instead if the VCMP exists.
1594  if (!VCMP) {
1595  // Create a VPST (with a null mask for now, we'll recompute it
1596  // later)
1597  MachineInstrBuilder MIB =
1598  BuildMI(*Divergent->getParent(), Divergent,
1599  Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST));
1600  MIB.addImm(0);
1601  LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);
1602  LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());
1603  } else {
1604  // No RDA checks are necessary here since the VPST would have been
1605  // directly after the VCMP
1606  ReplaceVCMPWithVPT(VCMP, VCMP);
1607  }
1608  }
1609  }
1610  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1611  LoLoop.ToRemove.insert(VPST);
1612  } else if (Block.containsVCTP()) {
1613  // The vctp will be removed, so either the entire block will be dead or
1614  // the block mask of the vp(s)t will need to be recomputed.
1615  MachineInstr *VPST = Insts.front();
1616  if (Block.size() == 2) {
1617  assert(VPST->getOpcode() == ARM::MVE_VPST &&
1618  "Found a VPST in an otherwise empty vpt block");
1619  LoLoop.ToRemove.insert(VPST);
1620  } else
1621  LoLoop.BlockMasksToRecompute.insert(VPST);
1622  } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
1623  // If this block starts with a VPST then attempt to merge it with the
1624  // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT
1625  // block that no longer exists
1626  MachineInstr *VPST = Insts.front();
1627  auto Next = ++MachineBasicBlock::iterator(VPST);
1629  "The instruction after a VPST must be predicated");
1630  (void)Next;
1631  MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR);
1632  if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) &&
1633  !LoLoop.ToRemove.contains(VprDef)) {
1634  MachineInstr *VCMP = VprDef;
1635  // The VCMP and VPST can only be merged if the VCMP's operands will have
1636  // the same values at the VPST.
1637  // If any of the instructions between the VCMP and VPST are predicated
1638  // then a different code path is expected to have merged the VCMP and
1639  // VPST already.
1642  RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) &&
1643  RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) {
1644  ReplaceVCMPWithVPT(VCMP, VPST);
1645  LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);
1646  LoLoop.ToRemove.insert(VPST);
1647  }
1648  }
1649  }
1650  }
1651 
1652  LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1653 }
1654 
1655 void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1656 
1657  // Combine the LoopDec and LoopEnd instructions into LE(TP).
1658  auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {
1659  MachineInstr *End = LoLoop.End;
1660  MachineBasicBlock *MBB = End->getParent();
1661  unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1662  ARM::MVE_LETP : ARM::t2LEUpdate;
1663  MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
1664  TII->get(Opc));
1665  MIB.addDef(ARM::LR);
1666  unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1667  MIB.add(End->getOperand(Off + 0));
1668  MIB.add(End->getOperand(Off + 1));
1669  LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
1670  LoLoop.ToRemove.insert(LoLoop.Dec);
1671  LoLoop.ToRemove.insert(End);
1672  return &*MIB;
1673  };
1674 
1675  // TODO: We should be able to automatically remove these branches before we
1676  // get here - probably by teaching analyzeBranch about the pseudo
1677  // instructions.
1678  // If there is an unconditional branch, after I, that just branches to the
1679  // next block, remove it.
1680  auto RemoveDeadBranch = [](MachineInstr *I) {
1681  MachineBasicBlock *BB = I->getParent();
1682  MachineInstr *Terminator = &BB->instr_back();
1683  if (Terminator->isUnconditionalBranch() && I != Terminator) {
1684  MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
1685  if (BB->isLayoutSuccessor(Succ)) {
1686  LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
1687  Terminator->eraseFromParent();
1688  }
1689  }
1690  };
1691 
1692  if (LoLoop.Revert) {
1693  if (isWhileLoopStart(*LoLoop.Start))
1694  RevertWhile(LoLoop.Start);
1695  else
1696  RevertDo(LoLoop.Start);
1697  if (LoLoop.Dec == LoLoop.End)
1698  RevertLoopEndDec(LoLoop.End);
1699  else
1700  RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
1701  } else {
1702  LoLoop.Start = ExpandLoopStart(LoLoop);
1703  if (LoLoop.Start)
1704  RemoveDeadBranch(LoLoop.Start);
1705  LoLoop.End = ExpandLoopEnd(LoLoop);
1706  RemoveDeadBranch(LoLoop.End);
1707  if (LoLoop.IsTailPredicationLegal())
1708  ConvertVPTBlocks(LoLoop);
1709  for (auto *I : LoLoop.ToRemove) {
1710  LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I);
1711  I->eraseFromParent();
1712  }
1713  for (auto *I : LoLoop.BlockMasksToRecompute) {
1714  LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I);
1716  LLVM_DEBUG(dbgs() << " ... done: " << *I);
1717  }
1718  }
1719 
1720  PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1721  DFS.ProcessLoop();
1722  const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
1723  for (auto *MBB : PostOrder) {
1725  // FIXME: For some reason, the live-in print order is non-deterministic for
1726  // our tests and I can't out why... So just sort them.
1728  }
1729 
1730  for (auto *MBB : reverse(PostOrder))
1732 
1733  // We've moved, removed and inserted new instructions, so update RDA.
1734  RDA->reset();
1735 }
1736 
1737 bool ARMLowOverheadLoops::RevertNonLoops() {
1738  LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
1739  bool Changed = false;
1740 
1741  for (auto &MBB : *MF) {
1746 
1747  for (auto &I : MBB) {
1748  if (isLoopStart(I))
1749  Starts.push_back(&I);
1750  else if (I.getOpcode() == ARM::t2LoopDec)
1751  Decs.push_back(&I);
1752  else if (I.getOpcode() == ARM::t2LoopEnd)
1753  Ends.push_back(&I);
1754  else if (I.getOpcode() == ARM::t2LoopEndDec)
1755  EndDecs.push_back(&I);
1756  }
1757 
1758  if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
1759  continue;
1760 
1761  Changed = true;
1762 
1763  for (auto *Start : Starts) {
1764  if (isWhileLoopStart(*Start))
1765  RevertWhile(Start);
1766  else
1767  RevertDo(Start);
1768  }
1769  for (auto *Dec : Decs)
1770  RevertLoopDec(Dec);
1771 
1772  for (auto *End : Ends)
1773  RevertLoopEnd(End);
1774  for (auto *End : EndDecs)
1775  RevertLoopEndDec(End);
1776  }
1777  return Changed;
1778 }
1779 
1781  return new ARMLowOverheadLoops();
1782 }
isVectorPredicated
static bool isVectorPredicated(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:82
llvm::TargetRegisterInfo::getRegAsmName
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
Definition: TargetRegisterInfo.h:1013
i
i
Definition: README.txt:29
ARMSubtarget.h
INITIALIZE_PASS
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) static bool TryRemove(MachineInstr *MI
producesFalseLanesZero
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
Definition: ARMLowOverheadLoops.cpp:858
llvm::ReachingDefAnalysis::reset
void reset()
Re-run the analysis.
Definition: ReachingDefAnalysis.cpp:224
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm::ReachingDefAnalysis::hasSameReachingDef
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
Definition: ReachingDefAnalysis.cpp:285
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::ReachingDefAnalysis::isSafeToRemove
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
Definition: ReachingDefAnalysis.cpp:600
llvm::ARM::isVpred
bool isVpred(OperandType op)
Definition: ARMMCTargetDesc.h:122
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::ReachingDefAnalysis::getUniqueReachingMIDef
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
Definition: ReachingDefAnalysis.cpp:433
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::ReachingDefAnalysis
This class provides the reaching def analysis.
Definition: ReachingDefAnalysis.h:69
MCInstrDesc.h
llvm::ARMSubtarget
Definition: ARMSubtarget.h:46
SetOperations.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCRegister::from
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:66
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::createARMLowOverheadLoopsPass
FunctionPass * createARMLowOverheadLoopsPass()
Definition: ARMLowOverheadLoops.cpp:1780
llvm::AArch64::FalseLanesZero
@ FalseLanesZero
Definition: AArch64InstrInfo.h:490
llvm::recomputeLivenessFlags
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
Definition: LivePhysRegs.cpp:279
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::MachineLoopInfo::findLoopPreheader
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
Definition: MachineLoopInfo.cpp:118
llvm::ARMII::DomainMask
@ DomainMask
Definition: ARMBaseInfo.h:414
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::isDoLoopStart
static bool isDoLoopStart(const MachineInstr &MI)
Definition: MVETailPredUtils.h:71
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:536
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1957
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:536
llvm::recomputeLiveIns
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
Definition: LivePhysRegs.h:196
isDomainMVE
static bool isDomainMVE(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:95
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
hasVPRUse
static bool hasVPRUse(MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:91
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
MachineLoopUtils.h
llvm::MachineBasicBlock::liveins
iterator_range< livein_iterator > liveins() const
Definition: MachineBasicBlock.h:412
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
shouldInspect
static bool shouldInspect(MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:100
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::LoopBase::begin
iterator begin() const
Definition: LoopInfo.h:154
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
CreateVPTBlock
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
Definition: MVEVPTBlockPass.cpp:163
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::MachineBasicBlock::back
MachineInstr & back()
Definition: MachineBasicBlock.h:248
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::VCTPOpcodeToLSTP
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
Definition: MVETailPredUtils.h:25
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::ReachingDefAnalysis::hasLocalDefBefore
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
Definition: ReachingDefAnalysis.cpp:320
llvm::ReachingDefAnalysis::getLocalLiveOutMIDef
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
Definition: ReachingDefAnalysis.cpp:522
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:579
llvm::ReachingDefAnalysis::isSafeToMoveBackwards
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
Definition: ReachingDefAnalysis.cpp:590
llvm::getAddSubImmediate
int getAddSubImmediate(MachineInstr &MI)
Definition: ARMBaseInstrInfo.h:850
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition: MachineBasicBlock.h:328
llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition: TargetRegisterInfo.h:91
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:816
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:632
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:203
llvm::PseudoSourceValue::kind
unsigned kind() const
Definition: PseudoSourceValue.h:66
llvm::MachineBasicBlock::insertAfter
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
Definition: MachineBasicBlock.h:871
MachineLoopInfo.h
llvm::ARMII::DomainMVE
@ DomainMVE
Definition: ARMBaseInfo.h:419
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:181
llvm::ReachingDefAnalysis::getGlobalUses
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
Definition: ReachingDefAnalysis.cpp:369
producesDoubleWidthResult
static bool producesDoubleWidthResult(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:813
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:370
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::RevertLoopDec
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
Definition: MVETailPredUtils.h:145
llvm::findFirstVPTPredOperandIdx
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
Definition: Thumb2InstrInfo.cpp:761
llvm::X86AS::FS
@ FS
Definition: X86.h:188
llvm::RevertLoopEnd
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Definition: MVETailPredUtils.h:167
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Domain
Domain
Definition: CorrelatedValuePropagation.cpp:660
llvm::LoopBase::end
iterator end() const
Definition: LoopInfo.h:155
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:713
MVETailPredUtils.h
llvm::getVPTInstrPredicate
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
Definition: Thumb2InstrInfo.cpp:774
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:169
llvm::ARMII::ValidForTailPredication
@ ValidForTailPredication
Definition: ARMBaseInfo.h:398
llvm::MachineBasicBlock::RegisterMaskPair
Pair of physical register and lane mask.
Definition: MachineBasicBlock.h:101
llvm::ARMISD::VCMP
@ VCMP
Definition: ARMISelLowering.h:146
llvm::ARMII::RetainsPreviousHalfElement
@ RetainsPreviousHalfElement
Definition: ARMBaseInfo.h:402
llvm::PseudoSourceValue::FixedStack
@ FixedStack
Definition: PseudoSourceValue.h:42
DisableTailPredication
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
llvm::PseudoSourceValue
Special value supplied for machine level alias analysis.
Definition: PseudoSourceValue.h:35
isVectorPredicate
static bool isVectorPredicate(MachineInstr *MI)
Definition: ARMLowOverheadLoops.cpp:87
llvm::VCMPOpcodeToVPT
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
Definition: ARMBaseInstrInfo.h:586
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
llvm::ARMCC::NE
@ NE
Definition: ARMBaseInfo.h:32
Passes.h
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:622
llvm::cl::opt< bool >
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
ARMBaseRegisterInfo.h
llvm::isVPTOpcode
static bool isVPTOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:570
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:737
llvm::MachineFunctionProperties::Property::TracksLiveness
@ TracksLiveness
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::RevertDoLoopStart
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETailPredUtils.h:135
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ARMBaseInstrInfo
Definition: ARMBaseInstrInfo.h:37
llvm::ARMII::DoubleWidthResult
@ DoubleWidthResult
Definition: ARMBaseInfo.h:409
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1599
MachineFunctionPass.h
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::isVCTP
static bool isVCTP(const MachineInstr *MI)
Definition: MVETailPredUtils.h:58
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
DEBUG_TYPE
#define DEBUG_TYPE
Definition: ARMLowOverheadLoops.cpp:74
ARMBaseInstrInfo.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
ARM.h
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
isRegInClass
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
Definition: ARMLowOverheadLoops.cpp:796
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1515
llvm::ARMII::HorizontalReduction
@ HorizontalReduction
Definition: ARMBaseInfo.h:405
llvm::MachineOperand::setIsDef
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Definition: MachineOperand.cpp:101
llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition: MachineBasicBlock.h:233
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
isHorizontalReduction
static bool isHorizontalReduction(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:819
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1541
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::ReachingDefAnalysis::getReachingLocalUses
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
Definition: ReachingDefAnalysis.cpp:325
llvm::LoopBase::isOutermost
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
Definition: LoopInfo.h:168
llvm::RevertWhileLoopStartLR
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
Definition: MVETailPredUtils.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1724
llvm::isWhileLoopStart
static bool isWhileLoopStart(const MachineInstr &MI)
Definition: MVETailPredUtils.h:76
ARMBasicBlockInfo.h
llvm::getWhileLoopStartTargetBB
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
Definition: MVETailPredUtils.h:87
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::ReachingDefAnalysis::collectKilledOperands
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
Definition: ReachingDefAnalysis.cpp:644
llvm::SetVector::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
ValidateMVEStore
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
Definition: ARMLowOverheadLoops.cpp:1087
llvm::ReachingDefAnalysis::isSafeToDefRegAt
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
Definition: ReachingDefAnalysis.cpp:676
Thumb2InstrInfo.h
llvm::MachineBasicBlock::sortUniqueLiveIns
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
Definition: MachineBasicBlock.cpp:581
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
ReachingDefAnalysis.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::MachineFrameInfo::isSpillSlotObjectIndex
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
Definition: MachineFrameInfo.h:684
llvm::recomputeVPTBlockMask
void recomputeVPTBlockMask(MachineInstr &Instr)
Definition: Thumb2InstrInfo.cpp:786
llvm::ReachingDefAnalysis::getGlobalReachingDefs
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
Definition: ReachingDefAnalysis.cpp:395
canGenerateNonZeros
static bool canGenerateNonZeros(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:829
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
ARM_LOW_OVERHEAD_LOOPS_NAME
#define ARM_LOW_OVERHEAD_LOOPS_NAME
Definition: ARMLowOverheadLoops.cpp:75
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:216
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
RDA
ReachingDefAnalysis & RDA
Definition: ARMLowOverheadLoops.cpp:535
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::ReachingDefAnalysis::isReachingDefLiveOut
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Definition: ReachingDefAnalysis.cpp:500
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ReachingDefAnalysis::getMIOperand
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
Definition: ReachingDefAnalysis.cpp:453
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::pdb::PDB_SymType::Block
@ Block
retainsPreviousHalfElement
static bool retainsPreviousHalfElement(const MachineInstr &MI)
Definition: ARMLowOverheadLoops.cpp:804
llvm::isMovRegOpcode
static bool isMovRegOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:733
llvm::MCID::Terminator
@ Terminator
Definition: MCInstrDesc.h:155
llvm::MCInstrDesc::operands
iterator_range< const_opInfo_iterator > operands() const
Definition: MCInstrDesc.h:235
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::isSubImmOpcode
static bool isSubImmOpcode(int Opc)
Definition: ARMBaseInstrInfo.h:726
llvm::ARMBasicBlockUtils
Definition: ARMBasicBlockInfo.h:109
llvm::getTailPredVectorWidth
static unsigned getTailPredVectorWidth(unsigned Opcode)
Definition: MVETailPredUtils.h:42
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::ReachingDefAnalysis::isSafeToMoveForwards
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
Definition: ReachingDefAnalysis.cpp:580
llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:23
llvm::isLoopStart
static bool isLoopStart(const MachineInstr &MI)
Definition: MVETailPredUtils.h:82
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
LivePhysRegs.h