LLVM  13.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40  cl::desc("Enable merging Loop End and Dec instructions."),
41  cl::init(true));
42 
43 namespace {
44 class MVETPAndVPTOptimisations : public MachineFunctionPass {
45 public:
46  static char ID;
47  const Thumb2InstrInfo *TII;
49 
50  MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
51 
52  bool runOnMachineFunction(MachineFunction &Fn) override;
53 
54  void getAnalysisUsage(AnalysisUsage &AU) const override {
60  }
61 
62  StringRef getPassName() const override {
63  return "ARM MVE TailPred and VPT Optimisation Pass";
64  }
65 
66 private:
67  bool LowerWhileLoopStart(MachineLoop *ML);
68  bool MergeLoopEnd(MachineLoop *ML);
69  bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
70  MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
71  MachineInstr &Instr,
74  bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
75  bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
76  bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
77  bool ConvertVPSEL(MachineBasicBlock &MBB);
78  bool HintDoLoopStartReg(MachineBasicBlock &MBB);
79 };
80 
82 
83 } // end anonymous namespace
84 
85 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
86  "ARM MVE TailPred and VPT Optimisations pass", false,
87  false)
90 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
91  "ARM MVE TailPred and VPT Optimisations pass", false, false)
92 
95  while (MI && MI->getOpcode() == TargetOpcode::COPY &&
96  MI->getOperand(1).getReg().isVirtual())
97  MI = MRI->getVRegDef(MI->getOperand(1).getReg());
98  return MI;
99 }
100 
101 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
102 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
103 // at the moment, returning a t2DoLoopStart in LoopStart.
105  MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
106  MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
107  MachineBasicBlock *Header = ML->getHeader();
108  MachineBasicBlock *Latch = ML->getLoopLatch();
109  if (!Header || !Latch) {
110  LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
111  return false;
112  }
113 
114  // Find the loop end from the terminators.
115  LoopEnd = nullptr;
116  for (auto &T : Latch->terminators()) {
117  if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
118  LoopEnd = &T;
119  break;
120  }
121  if (T.getOpcode() == ARM::t2LoopEndDec &&
122  T.getOperand(2).getMBB() == Header) {
123  LoopEnd = &T;
124  break;
125  }
126  }
127  if (!LoopEnd) {
128  LLVM_DEBUG(dbgs() << " no LoopEnd\n");
129  return false;
130  }
131  LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
132 
133  // Find the dec from the use of the end. There may be copies between
134  // instructions. We expect the loop to loop like:
135  // $vs = t2DoLoopStart ...
136  // loop:
137  // $vp = phi [ $vs ], [ $vd ]
138  // ...
139  // $vd = t2LoopDec $vp
140  // ...
141  // t2LoopEnd $vd, loop
142  if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
143  LoopDec = LoopEnd;
144  else {
145  LoopDec =
147  if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
148  LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
149  return false;
150  }
151  }
152  LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
153 
154  LoopPhi =
156  if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
157  LoopPhi->getNumOperands() != 5 ||
158  (LoopPhi->getOperand(2).getMBB() != Latch &&
159  LoopPhi->getOperand(4).getMBB() != Latch)) {
160  LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
161  return false;
162  }
163  LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
164 
165  Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
166  ? LoopPhi->getOperand(3).getReg()
167  : LoopPhi->getOperand(1).getReg();
168  LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
169  if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
170  LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
171  LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
172  LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
173  return false;
174  }
175  LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
176 
177  return true;
178 }
179 
181  MachineBasicBlock *MBB = MI->getParent();
182  assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
183  "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
184 
185  // Subs
186  MachineInstrBuilder MIB =
187  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
188  MIB.add(MI->getOperand(0));
189  MIB.add(MI->getOperand(1));
190  MIB.addImm(0);
191  MIB.addImm(ARMCC::AL);
192  MIB.addReg(ARM::NoRegister);
193  MIB.addReg(ARM::CPSR, RegState::Define);
194 
195  // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
196  for (MachineInstr &I : MBB->terminators()) {
197  if (I.getOpcode() == ARM::t2WhileLoopStart) {
198  MachineInstrBuilder MIB =
199  BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
200  MIB.add(MI->getOperand(1)); // branch target
201  MIB.addImm(ARMCC::EQ);
202  MIB.addReg(ARM::CPSR);
203  I.eraseFromParent();
204  break;
205  }
206  }
207 
208  MI->eraseFromParent();
209 }
210 
211 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
212 // start of a while loop:
213 // %a:gprlr = t2WhileLoopSetup %Cnt
214 // t2WhileLoopStart %a, %BB
215 // We want to convert those to a single instruction which, like t2LoopEndDec and
216 // t2DoLoopStartTP is both a terminator and produces a value:
217 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
218 //
219 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
220 // t2WhileLoopStart are not valid past regalloc.
221 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
222  LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
223  << ML->getHeader()->getName() << "\n");
224 
225  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
226  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
227  return false;
228 
229  if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
230  return false;
231 
232  Register LR = LoopStart->getOperand(0).getReg();
233  auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
234  return MI.getOpcode() == ARM::t2WhileLoopStart;
235  });
236  if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
237  RevertWhileLoopSetup(LoopStart, TII);
238  RevertLoopDec(LoopStart, TII);
239  RevertLoopEnd(LoopStart, TII);
240  return true;
241  }
242 
244  BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
245  TII->get(ARM::t2WhileLoopStartLR), LR)
246  .add(LoopStart->getOperand(1))
247  .add(WLSIt->getOperand(1));
248  (void)MI;
249  LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
250 
251  WLSIt->eraseFromParent();
252  LoopStart->eraseFromParent();
253  return true;
254 }
255 
256 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
257 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
258 // will be valid to be used for the low overhead loop, which means nothing else
259 // is using LR (especially calls) and there are no superfluous copies in the
260 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
261 // decrement) around the loop edge, which means we need to be careful that they
262 // will be valid to allocate without any spilling.
263 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
264  if (!MergeEndDec)
265  return false;
266 
267  LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
268  << "\n");
269 
270  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
271  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
272  return false;
273 
274  // Check if there is an illegal instruction (a call) in the low overhead loop
275  // and if so revert it now before we get any further. While loops also need to
276  // check the preheaders, but can be reverted to a DLS loop if needed.
277  auto *PreHeader = ML->getLoopPreheader();
278  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader &&
279  LoopStart->getParent() != PreHeader) {
280  for (MachineInstr &MI : *PreHeader) {
281  if (MI.isCall()) {
282  // Create a t2DoLoopStart at the end of the preheader.
283  MachineInstrBuilder MIB =
284  BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
285  LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
286  MIB.add(LoopStart->getOperand(0));
287  MIB.add(LoopStart->getOperand(1));
288 
289  // Revert the t2WhileLoopStartLR to a CMP and Br.
290  RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
291  LoopStart = MIB;
292  break;
293  }
294  }
295  }
296 
297  for (MachineBasicBlock *MBB : ML->blocks()) {
298  for (MachineInstr &MI : *MBB) {
299  if (MI.isCall()) {
300  LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
301  if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
302  RevertDoLoopStart(LoopStart, TII);
303  else
304  RevertWhileLoopStartLR(LoopStart, TII);
305  RevertLoopDec(LoopDec, TII);
306  RevertLoopEnd(LoopEnd, TII);
307  return true;
308  }
309  }
310  }
311 
312  // Remove any copies from the loop, to ensure the phi that remains is both
313  // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
314  // that cannot spill, we need to be careful what remains in the loop.
315  Register PhiReg = LoopPhi->getOperand(0).getReg();
316  Register DecReg = LoopDec->getOperand(0).getReg();
317  Register StartReg = LoopStart->getOperand(0).getReg();
318  // Ensure the uses are expected, and collect any copies we want to remove.
320  auto CheckUsers = [&Copies](Register BaseReg,
321  ArrayRef<MachineInstr *> ExpectedUsers,
323  SmallVector<Register, 4> Worklist;
324  Worklist.push_back(BaseReg);
325  while (!Worklist.empty()) {
326  Register Reg = Worklist.pop_back_val();
328  if (count(ExpectedUsers, &MI))
329  continue;
330  if (MI.getOpcode() != TargetOpcode::COPY ||
331  !MI.getOperand(0).getReg().isVirtual()) {
332  LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
333  return false;
334  }
335  Worklist.push_back(MI.getOperand(0).getReg());
336  Copies.push_back(&MI);
337  }
338  }
339  return true;
340  };
341  if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
342  !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
343  !CheckUsers(StartReg, {LoopPhi}, MRI)) {
344  // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
345  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
346  RevertWhileLoopStartLR(LoopStart, TII);
347  RevertLoopDec(LoopDec, TII);
348  RevertLoopEnd(LoopEnd, TII);
349  return true;
350  }
351  return false;
352  }
353 
354  MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
355  MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
356  MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
357 
358  if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
359  LoopPhi->getOperand(3).setReg(StartReg);
360  LoopPhi->getOperand(1).setReg(DecReg);
361  } else {
362  LoopPhi->getOperand(1).setReg(StartReg);
363  LoopPhi->getOperand(3).setReg(DecReg);
364  }
365 
366  // Replace the loop dec and loop end as a single instruction.
368  BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
369  TII->get(ARM::t2LoopEndDec), DecReg)
370  .addReg(PhiReg)
371  .add(LoopEnd->getOperand(1));
372  (void)MI;
373  LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
374 
375  LoopDec->eraseFromParent();
376  LoopEnd->eraseFromParent();
377  for (auto *MI : Copies)
378  MI->eraseFromParent();
379  return true;
380 }
381 
382 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
383 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
384 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
385 // VCTP operand much simpler.
386 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
387  MachineDominatorTree *DT) {
388  LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
389  << ML->getHeader()->getName() << "\n");
390 
391  // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
392  // in the loop.
393  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
394  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
395  return false;
396  if (LoopDec != LoopEnd || LoopStart->getOpcode() != ARM::t2DoLoopStart)
397  return false;
398 
400  for (MachineBasicBlock *BB : ML->blocks())
401  for (MachineInstr &MI : *BB)
402  if (isVCTP(&MI))
403  VCTPs.push_back(&MI);
404 
405  if (VCTPs.empty()) {
406  LLVM_DEBUG(dbgs() << " no VCTPs\n");
407  return false;
408  }
409 
410  // Check all VCTPs are the same.
411  MachineInstr *FirstVCTP = *VCTPs.begin();
412  for (MachineInstr *VCTP : VCTPs) {
413  LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
414  if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
415  VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
416  LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
417  return false;
418  }
419  }
420 
421  // Check for the register being used can be setup before the loop. We expect
422  // this to be:
423  // $vx = ...
424  // loop:
425  // $vp = PHI [ $vx ], [ $vd ]
426  // ..
427  // $vpr = VCTP $vp
428  // ..
429  // $vd = t2SUBri $vp, #n
430  // ..
431  Register CountReg = FirstVCTP->getOperand(1).getReg();
432  if (!CountReg.isVirtual()) {
433  LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
434  return false;
435  }
436  MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
437  if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
438  Phi->getNumOperands() != 5 ||
439  (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
440  Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
441  LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
442  return false;
443  }
444  CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
445  ? Phi->getOperand(3).getReg()
446  : Phi->getOperand(1).getReg();
447 
448  // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
449  // the preheader and add the new CountReg to it. We attempt to place it late
450  // in the preheader, but may need to move that earlier based on uses.
451  MachineBasicBlock *MBB = LoopStart->getParent();
453  for (MachineInstr &Use :
454  MRI->use_instructions(LoopStart->getOperand(0).getReg()))
455  if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
456  !DT->dominates(ML->getHeader(), Use.getParent())) {
457  LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
458  return false;
459  }
460 
461  MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
462  TII->get(ARM::t2DoLoopStartTP))
463  .add(LoopStart->getOperand(0))
464  .add(LoopStart->getOperand(1))
465  .addReg(CountReg);
466  (void)MI;
467  LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
468  << *MI.getInstr());
469  MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
470  LoopStart->eraseFromParent();
471 
472  return true;
473 }
474 
475 // Returns true if Opcode is any VCMP Opcode.
476 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
477 
478 // Returns true if a VCMP with this Opcode can have its operands swapped.
479 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
480 // and VCMPr instructions (since the r is always on the right).
481 static bool CanHaveSwappedOperands(unsigned Opcode) {
482  switch (Opcode) {
483  default:
484  return true;
485  case ARM::MVE_VCMPf32:
486  case ARM::MVE_VCMPf16:
487  case ARM::MVE_VCMPf32r:
488  case ARM::MVE_VCMPf16r:
489  case ARM::MVE_VCMPi8r:
490  case ARM::MVE_VCMPi16r:
491  case ARM::MVE_VCMPi32r:
492  case ARM::MVE_VCMPu8r:
493  case ARM::MVE_VCMPu16r:
494  case ARM::MVE_VCMPu32r:
495  case ARM::MVE_VCMPs8r:
496  case ARM::MVE_VCMPs16r:
497  case ARM::MVE_VCMPs32r:
498  return false;
499  }
500 }
501 
502 // Returns the CondCode of a VCMP Instruction.
504  assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
505  return ARMCC::CondCodes(Instr.getOperand(3).getImm());
506 }
507 
508 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
509 // Prev. Cond and Prev must be VCMPs.
511  assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
512 
513  // Opcodes must match.
514  if (Cond.getOpcode() != Prev.getOpcode())
515  return false;
516 
517  MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
518  MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
519 
520  // If the VCMP has the opposite condition with the same operands, we can
521  // replace it with a VPNOT
522  ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
523  ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
524  if (ExpectedCode == GetCondCode(Prev))
525  if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
526  return true;
527  // Check again with operands swapped if possible
528  if (!CanHaveSwappedOperands(Cond.getOpcode()))
529  return false;
530  ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
531  return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
532  CondOP2.isIdenticalTo(PrevOP1);
533 }
534 
535 // Returns true if Instr writes to VCCR.
536 static bool IsWritingToVCCR(MachineInstr &Instr) {
537  if (Instr.getNumOperands() == 0)
538  return false;
539  MachineOperand &Dst = Instr.getOperand(0);
540  if (!Dst.isReg())
541  return false;
542  Register DstReg = Dst.getReg();
543  if (!DstReg.isVirtual())
544  return false;
545  MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
546  const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
547  return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
548 }
549 
550 // Transforms
551 // <Instr that uses %A ('User' Operand)>
552 // Into
553 // %K = VPNOT %Target
554 // <Instr that uses %K ('User' Operand)>
555 // And returns the newly inserted VPNOT.
556 // This optimization is done in the hopes of preventing spills/reloads of VPR by
557 // reducing the number of VCCR values with overlapping lifetimes.
558 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
560  Register Target) {
562 
563  MachineInstrBuilder MIBuilder =
564  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
565  .addDef(NewResult)
566  .addReg(Target);
567  addUnpredicatedMveVpredNOp(MIBuilder);
568 
569  // Make the user use NewResult instead, and clear its kill flag.
570  User.setReg(NewResult);
571  User.setIsKill(false);
572 
573  LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
574  MIBuilder.getInstr()->dump());
575 
576  return *MIBuilder.getInstr();
577 }
578 
579 // Moves a VPNOT before its first user if an instruction that uses Reg is found
580 // in-between the VPNOT and its user.
581 // Returns true if there is at least one user of the VPNOT in the block.
584  Register Reg) {
585  assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
587  "The VPNOT cannot be predicated");
588 
589  MachineInstr &VPNOT = *Iter;
590  Register VPNOTResult = VPNOT.getOperand(0).getReg();
591  Register VPNOTOperand = VPNOT.getOperand(1).getReg();
592 
593  // Whether the VPNOT will need to be moved, and whether we found a user of the
594  // VPNOT.
595  bool MustMove = false, HasUser = false;
596  MachineOperand *VPNOTOperandKiller = nullptr;
597  for (; Iter != MBB.end(); ++Iter) {
598  if (MachineOperand *MO =
599  Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
600  // If we find the operand that kills the VPNOTOperand's result, save it.
601  VPNOTOperandKiller = MO;
602  }
603 
604  if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
605  MustMove = true;
606  continue;
607  }
608 
609  if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
610  continue;
611 
612  HasUser = true;
613  if (!MustMove)
614  break;
615 
616  // Move the VPNOT right before Iter
617  LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
618  Iter->dump());
619  MBB.splice(Iter, &MBB, VPNOT.getIterator());
620  // If we move the instr, and its operand was killed earlier, remove the kill
621  // flag.
622  if (VPNOTOperandKiller)
623  VPNOTOperandKiller->setIsKill(false);
624 
625  break;
626  }
627  return HasUser;
628 }
629 
630 // This optimisation attempts to reduce the number of overlapping lifetimes of
631 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
632 // this replaces
633 // %A:vccr = (something)
634 // %B:vccr = VPNOT %A
635 // %Foo = (some op that uses %B)
636 // %Bar = (some op that uses %A)
637 // With
638 // %A:vccr = (something)
639 // %B:vccr = VPNOT %A
640 // %Foo = (some op that uses %B)
641 // %TMP2:vccr = VPNOT %B
642 // %Bar = (some op that uses %A)
643 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
644  MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
645  SmallVector<MachineInstr *, 4> DeadInstructions;
646  bool Modified = false;
647 
648  while (Iter != End) {
649  Register VCCRValue, OppositeVCCRValue;
650  // The first loop looks for 2 unpredicated instructions:
651  // %A:vccr = (instr) ; A is stored in VCCRValue
652  // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
653  for (; Iter != End; ++Iter) {
654  // We're only interested in unpredicated instructions that write to VCCR.
655  if (!IsWritingToVCCR(*Iter) ||
657  continue;
658  Register Dst = Iter->getOperand(0).getReg();
659 
660  // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
661  // found what we were looking for.
662  if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
663  Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
664  // Move the VPNOT closer to its first user if needed, and ignore if it
665  // has no users.
666  if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
667  continue;
668 
669  OppositeVCCRValue = Dst;
670  ++Iter;
671  break;
672  }
673 
674  // Else, just set VCCRValue.
675  VCCRValue = Dst;
676  }
677 
678  // If the first inner loop didn't find anything, stop here.
679  if (Iter == End)
680  break;
681 
682  assert(VCCRValue && OppositeVCCRValue &&
683  "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
684  "stopped before the end of the block!");
685  assert(VCCRValue != OppositeVCCRValue &&
686  "VCCRValue should not be equal to OppositeVCCRValue!");
687 
688  // LastVPNOTResult always contains the same value as OppositeVCCRValue.
689  Register LastVPNOTResult = OppositeVCCRValue;
690 
691  // This second loop tries to optimize the remaining instructions.
692  for (; Iter != End; ++Iter) {
693  bool IsInteresting = false;
694 
695  if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
696  IsInteresting = true;
697 
698  // - If the instruction is a VPNOT, it can be removed, and we can just
699  // replace its uses with LastVPNOTResult.
700  // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
701  if (Iter->getOpcode() == ARM::MVE_VPNOT) {
702  Register Result = Iter->getOperand(0).getReg();
703 
704  MRI->replaceRegWith(Result, LastVPNOTResult);
705  DeadInstructions.push_back(&*Iter);
706  Modified = true;
707 
708  LLVM_DEBUG(dbgs()
709  << "Replacing all uses of '" << printReg(Result)
710  << "' with '" << printReg(LastVPNOTResult) << "'\n");
711  } else {
712  MachineInstr &VPNOT =
713  ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
714  Modified = true;
715 
716  LastVPNOTResult = VPNOT.getOperand(0).getReg();
717  std::swap(VCCRValue, OppositeVCCRValue);
718 
719  LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
720  << "' with '" << printReg(LastVPNOTResult)
721  << "' in instr: " << *Iter);
722  }
723  } else {
724  // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
725  // instead as they contain the same value.
726  if (MachineOperand *MO =
727  Iter->findRegisterUseOperand(OppositeVCCRValue)) {
728  IsInteresting = true;
729 
730  // This is pointless if LastVPNOTResult == OppositeVCCRValue.
731  if (LastVPNOTResult != OppositeVCCRValue) {
732  LLVM_DEBUG(dbgs() << "Replacing usage of '"
733  << printReg(OppositeVCCRValue) << "' with '"
734  << printReg(LastVPNOTResult) << " for instr: ";
735  Iter->dump());
736  MO->setReg(LastVPNOTResult);
737  Modified = true;
738  }
739 
740  MO->setIsKill(false);
741  }
742 
743  // If this is an unpredicated VPNOT on
744  // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
745  if (Iter->getOpcode() == ARM::MVE_VPNOT &&
747  Register VPNOTOperand = Iter->getOperand(1).getReg();
748  if (VPNOTOperand == LastVPNOTResult ||
749  VPNOTOperand == OppositeVCCRValue) {
750  IsInteresting = true;
751 
752  std::swap(VCCRValue, OppositeVCCRValue);
753  LastVPNOTResult = Iter->getOperand(0).getReg();
754  }
755  }
756  }
757 
758  // If this instruction was not interesting, and it writes to VCCR, stop.
759  if (!IsInteresting && IsWritingToVCCR(*Iter))
760  break;
761  }
762  }
763 
764  for (MachineInstr *DeadInstruction : DeadInstructions)
765  DeadInstruction->eraseFromParent();
766 
767  return Modified;
768 }
769 
770 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
771 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
772  SmallVector<MachineInstr *, 4> DeadInstructions;
773 
774  // The last VCMP that we have seen and that couldn't be replaced.
775  // This is reset when an instruction that writes to VCCR/VPR is found, or when
776  // a VCMP is replaced with a VPNOT.
777  // We'll only replace VCMPs with VPNOTs when this is not null, and when the
778  // current VCMP is the opposite of PrevVCMP.
779  MachineInstr *PrevVCMP = nullptr;
780  // If we find an instruction that kills the result of PrevVCMP, we save the
781  // operand here to remove the kill flag in case we need to use PrevVCMP's
782  // result.
783  MachineOperand *PrevVCMPResultKiller = nullptr;
784 
785  for (MachineInstr &Instr : MBB.instrs()) {
786  if (PrevVCMP) {
787  if (MachineOperand *MO = Instr.findRegisterUseOperand(
788  PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
789  // If we come accross the instr that kills PrevVCMP's result, record it
790  // so we can remove the kill flag later if we need to.
791  PrevVCMPResultKiller = MO;
792  }
793  }
794 
795  // Ignore predicated instructions.
796  if (getVPTInstrPredicate(Instr) != ARMVCC::None)
797  continue;
798 
799  // Only look at VCMPs
800  if (!IsVCMP(Instr.getOpcode())) {
801  // If the instruction writes to VCCR, forget the previous VCMP.
802  if (IsWritingToVCCR(Instr))
803  PrevVCMP = nullptr;
804  continue;
805  }
806 
807  if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
808  PrevVCMP = &Instr;
809  continue;
810  }
811 
812  // The register containing the result of the VCMP that we're going to
813  // replace.
814  Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
815 
816  // Build a VPNOT to replace the VCMP, reusing its operands.
817  MachineInstrBuilder MIBuilder =
818  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
819  .add(Instr.getOperand(0))
820  .addReg(PrevVCMPResultReg);
821  addUnpredicatedMveVpredNOp(MIBuilder);
822  LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
823  MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
824  Instr.dump());
825 
826  // If we found an instruction that uses, and kills PrevVCMP's result,
827  // remove the kill flag.
828  if (PrevVCMPResultKiller)
829  PrevVCMPResultKiller->setIsKill(false);
830 
831  // Finally, mark the old VCMP for removal and reset
832  // PrevVCMP/PrevVCMPResultKiller.
833  DeadInstructions.push_back(&Instr);
834  PrevVCMP = nullptr;
835  PrevVCMPResultKiller = nullptr;
836  }
837 
838  for (MachineInstr *DeadInstruction : DeadInstructions)
839  DeadInstruction->eraseFromParent();
840 
841  return !DeadInstructions.empty();
842 }
843 
844 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
845  MachineDominatorTree *DT) {
846  // Scan through the block, looking for instructions that use constants moves
847  // into VPR that are the negative of one another. These are expected to be
848  // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
849  // mask is kept it or and VPNOT's of it are added or reused as we scan through
850  // the function.
851  unsigned LastVPTImm = 0;
852  Register LastVPTReg = 0;
853  SmallSet<MachineInstr *, 4> DeadInstructions;
854 
855  for (MachineInstr &Instr : MBB.instrs()) {
856  // Look for predicated MVE instructions.
857  int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
858  if (PIdx == -1)
859  continue;
860  Register VPR = Instr.getOperand(PIdx + 1).getReg();
861  if (!VPR.isVirtual())
862  continue;
863 
864  // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
865  MachineInstr *Copy = MRI->getVRegDef(VPR);
866  if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
867  !Copy->getOperand(1).getReg().isVirtual() ||
868  MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
869  LastVPTReg = 0;
870  continue;
871  }
872  Register GPR = Copy->getOperand(1).getReg();
873 
874  // Find the Immediate used by the copy.
875  auto getImm = [&](Register GPR) -> unsigned {
876  MachineInstr *Def = MRI->getVRegDef(GPR);
877  if (Def && (Def->getOpcode() == ARM::t2MOVi ||
878  Def->getOpcode() == ARM::t2MOVi16))
879  return Def->getOperand(1).getImm();
880  return -1U;
881  };
882  unsigned Imm = getImm(GPR);
883  if (Imm == -1U) {
884  LastVPTReg = 0;
885  continue;
886  }
887 
888  unsigned NotImm = ~Imm & 0xffff;
889  if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
890  Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
891  if (MRI->use_empty(VPR)) {
892  DeadInstructions.insert(Copy);
893  if (MRI->hasOneUse(GPR))
894  DeadInstructions.insert(MRI->getVRegDef(GPR));
895  }
896  LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
897  } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
898  // We have found the not of a previous constant. Create a VPNot of the
899  // earlier predicate reg and use it instead of the copy.
900  Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
901  auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
902  TII->get(ARM::MVE_VPNOT), NewVPR)
903  .addReg(LastVPTReg);
905 
906  // Use the new register and check if the def is now dead.
907  Instr.getOperand(PIdx + 1).setReg(NewVPR);
908  if (MRI->use_empty(VPR)) {
909  DeadInstructions.insert(Copy);
910  if (MRI->hasOneUse(GPR))
911  DeadInstructions.insert(MRI->getVRegDef(GPR));
912  }
913  LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
914  << Instr);
915  VPR = NewVPR;
916  }
917 
918  LastVPTImm = Imm;
919  LastVPTReg = VPR;
920  }
921 
922  for (MachineInstr *DI : DeadInstructions)
923  DI->eraseFromParent();
924 
925  return !DeadInstructions.empty();
926 }
927 
928 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
929 // somewhat blunt approximation to allow tail predicated with vpsel
930 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
931 // different semantics under tail predication. Until that is modelled we just
932 // convert to a VMOVT (via a predicated VORR) instead.
933 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
934  bool HasVCTP = false;
935  SmallVector<MachineInstr *, 4> DeadInstructions;
936 
937  for (MachineInstr &MI : MBB.instrs()) {
938  if (isVCTP(&MI)) {
939  HasVCTP = true;
940  continue;
941  }
942 
943  if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
944  continue;
945 
946  MachineInstrBuilder MIBuilder =
947  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
948  .add(MI.getOperand(0))
949  .add(MI.getOperand(1))
950  .add(MI.getOperand(1))
952  .add(MI.getOperand(4))
953  .add(MI.getOperand(2));
954  // Silence unused variable warning in release builds.
955  (void)MIBuilder;
956  LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
957  dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
958  DeadInstructions.push_back(&MI);
959  }
960 
961  for (MachineInstr *DeadInstruction : DeadInstructions)
962  DeadInstruction->eraseFromParent();
963 
964  return !DeadInstructions.empty();
965 }
966 
967 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
968 // the instruction may be removable as a noop.
969 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
970  bool Changed = false;
971  for (MachineInstr &MI : MBB.instrs()) {
972  if (MI.getOpcode() != ARM::t2DoLoopStart)
973  continue;
974  Register R = MI.getOperand(1).getReg();
975  MachineFunction *MF = MI.getParent()->getParent();
977  Changed = true;
978  }
979  return Changed;
980 }
981 
982 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
983  const ARMSubtarget &STI =
984  static_cast<const ARMSubtarget &>(Fn.getSubtarget());
985 
986  if (!STI.isThumb2() || !STI.hasLOB())
987  return false;
988 
989  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
990  MRI = &Fn.getRegInfo();
991  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
992  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
993 
994  LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
995  << "********** Function: " << Fn.getName() << '\n');
996 
997  bool Modified = false;
998  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
999  Modified |= LowerWhileLoopStart(ML);
1000  Modified |= MergeLoopEnd(ML);
1001  Modified |= ConvertTailPredLoop(ML, DT);
1002  }
1003 
1004  for (MachineBasicBlock &MBB : Fn) {
1005  Modified |= HintDoLoopStartReg(MBB);
1006  Modified |= ReplaceConstByVPNOTs(MBB, DT);
1007  Modified |= ReplaceVCMPsByVPNOTs(MBB);
1008  Modified |= ReduceOldVCCRValueUses(MBB);
1009  Modified |= ConvertVPSEL(MBB);
1010  }
1011 
1012  LLVM_DEBUG(dbgs() << "**************************************\n");
1013  return Modified;
1014 }
1015 
1016 /// createMVETPAndVPTOptimisationsPass
1018  return new MVETPAndVPTOptimisations();
1019 }
ARMSubtarget.h
pass
ARM MVE TailPred and VPT Optimisations pass
Definition: MVETPAndVPTOptimisationsPass.cpp:91
IsVPNOTEquivalent
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
Definition: MVETPAndVPTOptimisationsPass.cpp:510
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:69
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:538
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ARMSubtarget
Definition: ARMSubtarget.h:46
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
IsWritingToVCCR
static bool IsWritingToVCCR(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:536
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:263
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:124
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::SmallVector< MachineInstr *, 4 >
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
LookThroughCOPY
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
Definition: MVETPAndVPTOptimisationsPass.cpp:93
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1418
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::ARMSubtarget::getInstrInfo
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:559
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:663
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:109
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:288
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:133
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
GetCondCode
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:503
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::ARMSubtarget::hasLOB
bool hasLOB() const
Definition: ARMSubtarget.h:657
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:568
llvm::ARMCC::EQ
@ EQ
Definition: ARMBaseInfo.h:31
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
ARMBaseInfo.h
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::User
Definition: User.h:44
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::RevertLoopDec
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
Definition: MVETailPredUtils.h:129
llvm::findFirstVPTPredOperandIdx
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
Definition: Thumb2InstrInfo.cpp:761
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::RevertLoopEnd
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Definition: MVETailPredUtils.h:151
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
MVETailPredUtils.h
llvm::getVPTInstrPredicate
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
Definition: Thumb2InstrInfo.cpp:774
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
findLoopComponents
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
Definition: MVETPAndVPTOptimisationsPass.cpp:104
LoopDeletionResult::Modified
@ Modified
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:406
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::VCMPOpcodeToVPT
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
Definition: ARMBaseInstrInfo.h:585
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
MoveVPNOTBeforeFirstUser
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
Definition: MVETPAndVPTOptimisationsPass.cpp:582
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::ARMRI::RegLR
@ RegLR
Definition: ARMBaseRegisterInfo.h:39
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
RevertWhileLoopSetup
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETPAndVPTOptimisationsPass.cpp:180
llvm::cl::opt< bool >
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, "ARM MVE TailPred and VPT Optimisations pass", false, false) INITIALIZE_PASS_END(MVETPAndVPTOptimisations
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1565
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:321
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::RevertDoLoopStart
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETailPredUtils.h:119
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::createMVETPAndVPTOptimisationsPass
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
Definition: MVETPAndVPTOptimisationsPass.cpp:1017
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::addUnpredicatedMveVpredNOp
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Definition: ARMBaseInstrInfo.cpp:867
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
MachineFunctionPass.h
llvm::MachineRegisterInfo::getRegClassOrNull
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
Definition: MachineRegisterInfo.h:651
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::isVCTP
static bool isVCTP(const MachineInstr *MI)
Definition: MVETailPredUtils.h:58
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::ARMCC::getSwappedCondition
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition: ARMBaseInfo.h:71
ARM.h
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:241
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
IsVCMP
static bool IsVCMP(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:476
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:863
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm::MachineRegisterInfo::setRegAllocationHint
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
Definition: MachineRegisterInfo.h:765
llvm::RevertWhileLoopStartLR
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
Definition: MVETailPredUtils.h:83
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MVETPAndVPTOptimisationsPass.cpp:36
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1509
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Thumb2InstrInfo.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::Thumb2InstrInfo
Definition: Thumb2InstrInfo.h:23
llvm::ARMCC::CondCodes
CondCodes
Definition: ARMBaseInfo.h:30
llvm::MachineLoopInfo::getBase
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
Definition: MachineLoopInfo.h:106
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:510
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MergeEndDec
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
CanHaveSwappedOperands
static bool CanHaveSwappedOperands(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:481
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:481
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ARMCC::getOppositeCondition
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
llvm::ARMSubtarget::isThumb2
bool isThumb2() const
Definition: ARMSubtarget.h:809
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:313
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38