LLVM  16.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40  cl::desc("Enable merging Loop End and Dec instructions."),
41  cl::init(true));
42 
43 static cl::opt<bool>
44 SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45  cl::desc("Enable setting lr as a predicate in tail predication regions."),
46  cl::init(true));
47 
48 namespace {
49 class MVETPAndVPTOptimisations : public MachineFunctionPass {
50 public:
51  static char ID;
52  const Thumb2InstrInfo *TII;
54 
55  MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56 
57  bool runOnMachineFunction(MachineFunction &Fn) override;
58 
59  void getAnalysisUsage(AnalysisUsage &AU) const override {
65  }
66 
67  StringRef getPassName() const override {
68  return "ARM MVE TailPred and VPT Optimisation Pass";
69  }
70 
71 private:
72  bool LowerWhileLoopStart(MachineLoop *ML);
73  bool MergeLoopEnd(MachineLoop *ML);
74  bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75  MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76  MachineInstr &Instr,
79  bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80  bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81  bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82  bool ConvertVPSEL(MachineBasicBlock &MBB);
83  bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84  MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85  MachineInstr *LoopStart);
86 };
87 
89 
90 } // end anonymous namespace
91 
92 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93  "ARM MVE TailPred and VPT Optimisations pass", false,
94  false)
97 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98  "ARM MVE TailPred and VPT Optimisations pass", false, false)
99 
102  while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103  MI->getOperand(1).getReg().isVirtual())
104  MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105  return MI;
106 }
107 
108 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110 // at the moment, returning a t2DoLoopStart in LoopStart.
112  MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113  MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114  MachineBasicBlock *Header = ML->getHeader();
115  MachineBasicBlock *Latch = ML->getLoopLatch();
116  if (!Header || !Latch) {
117  LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
118  return false;
119  }
120 
121  // Find the loop end from the terminators.
122  LoopEnd = nullptr;
123  for (auto &T : Latch->terminators()) {
124  if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125  LoopEnd = &T;
126  break;
127  }
128  if (T.getOpcode() == ARM::t2LoopEndDec &&
129  T.getOperand(2).getMBB() == Header) {
130  LoopEnd = &T;
131  break;
132  }
133  }
134  if (!LoopEnd) {
135  LLVM_DEBUG(dbgs() << " no LoopEnd\n");
136  return false;
137  }
138  LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
139 
140  // Find the dec from the use of the end. There may be copies between
141  // instructions. We expect the loop to loop like:
142  // $vs = t2DoLoopStart ...
143  // loop:
144  // $vp = phi [ $vs ], [ $vd ]
145  // ...
146  // $vd = t2LoopDec $vp
147  // ...
148  // t2LoopEnd $vd, loop
149  if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150  LoopDec = LoopEnd;
151  else {
152  LoopDec =
154  if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155  LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
156  return false;
157  }
158  }
159  LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
160 
161  LoopPhi =
163  if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164  LoopPhi->getNumOperands() != 5 ||
165  (LoopPhi->getOperand(2).getMBB() != Latch &&
166  LoopPhi->getOperand(4).getMBB() != Latch)) {
167  LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
168  return false;
169  }
170  LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
171 
172  Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173  ? LoopPhi->getOperand(3).getReg()
174  : LoopPhi->getOperand(1).getReg();
175  LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176  if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177  LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178  LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179  LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
180  return false;
181  }
182  LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
183 
184  return true;
185 }
186 
188  MachineBasicBlock *MBB = MI->getParent();
189  assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190  "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191 
192  // Subs
193  MachineInstrBuilder MIB =
194  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195  MIB.add(MI->getOperand(0));
196  MIB.add(MI->getOperand(1));
197  MIB.addImm(0);
198  MIB.addImm(ARMCC::AL);
199  MIB.addReg(ARM::NoRegister);
200  MIB.addReg(ARM::CPSR, RegState::Define);
201 
202  // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203  for (MachineInstr &I : MBB->terminators()) {
204  if (I.getOpcode() == ARM::t2WhileLoopStart) {
205  MachineInstrBuilder MIB =
206  BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207  MIB.add(MI->getOperand(1)); // branch target
208  MIB.addImm(ARMCC::EQ);
209  MIB.addReg(ARM::CPSR);
210  I.eraseFromParent();
211  break;
212  }
213  }
214 
215  MI->eraseFromParent();
216 }
217 
218 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219 // start of a while loop:
220 // %a:gprlr = t2WhileLoopSetup %Cnt
221 // t2WhileLoopStart %a, %BB
222 // We want to convert those to a single instruction which, like t2LoopEndDec and
223 // t2DoLoopStartTP is both a terminator and produces a value:
224 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225 //
226 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227 // t2WhileLoopStart are not valid past regalloc.
228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229  LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230  << ML->getHeader()->getName() << "\n");
231 
232  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234  return false;
235 
236  if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237  return false;
238 
239  Register LR = LoopStart->getOperand(0).getReg();
240  auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241  return MI.getOpcode() == ARM::t2WhileLoopStart;
242  });
243  if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244  RevertWhileLoopSetup(LoopStart, TII);
245  RevertLoopDec(LoopStart, TII);
246  RevertLoopEnd(LoopStart, TII);
247  return true;
248  }
249 
251  BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252  TII->get(ARM::t2WhileLoopStartLR), LR)
253  .add(LoopStart->getOperand(1))
254  .add(WLSIt->getOperand(1));
255  (void)MI;
256  LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257 
258  WLSIt->eraseFromParent();
259  LoopStart->eraseFromParent();
260  return true;
261 }
262 
263 // Return true if this instruction is invalid in a low overhead loop, usually
264 // because it clobbers LR.
266  return MI.isCall() || isLoopStart(MI);
267 }
268 
269 // Starting from PreHeader, search for invalid instructions back until the
270 // LoopStart block is reached. If invalid instructions are found, the loop start
271 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272 // return the new DLS LoopStart if updated.
273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274  MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
277  Worklist.push_back(PreHeader);
278  Visited.insert(LoopStart->getParent());
279 
280  while (!Worklist.empty()) {
281  MachineBasicBlock *MBB = Worklist.pop_back_val();
282  if (Visited.count(MBB))
283  continue;
284 
285  for (MachineInstr &MI : *MBB) {
287  continue;
288 
289  LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290 
291  // Create a t2DoLoopStart at the end of the preheader.
292  MachineInstrBuilder MIB =
293  BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294  LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295  MIB.add(LoopStart->getOperand(0));
296  MIB.add(LoopStart->getOperand(1));
297 
298  // Make sure to remove the kill flags, to prevent them from being invalid.
299  LoopStart->getOperand(1).setIsKill(false);
300 
301  // Revert the t2WhileLoopStartLR to a CMP and Br.
302  RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303  return MIB;
304  }
305 
306  Visited.insert(MBB);
307  for (auto *Pred : MBB->predecessors())
308  Worklist.push_back(Pred);
309  }
310  return LoopStart;
311 }
312 
313 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315 // will be valid to be used for the low overhead loop, which means nothing else
316 // is using LR (especially calls) and there are no superfluous copies in the
317 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
318 // decrement) around the loop edge, which means we need to be careful that they
319 // will be valid to allocate without any spilling.
320 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321  if (!MergeEndDec)
322  return false;
323 
324  LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325  << "\n");
326 
327  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329  return false;
330 
331  // Check if there is an illegal instruction (a call) in the low overhead loop
332  // and if so revert it now before we get any further. While loops also need to
333  // check the preheaders, but can be reverted to a DLS loop if needed.
334  auto *PreHeader = ML->getLoopPreheader();
335  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336  LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337 
338  for (MachineBasicBlock *MBB : ML->blocks()) {
339  for (MachineInstr &MI : *MBB) {
340  if (IsInvalidTPInstruction(MI)) {
341  LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342  if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343  RevertDoLoopStart(LoopStart, TII);
344  else
345  RevertWhileLoopStartLR(LoopStart, TII);
346  RevertLoopDec(LoopDec, TII);
347  RevertLoopEnd(LoopEnd, TII);
348  return true;
349  }
350  }
351  }
352 
353  // Remove any copies from the loop, to ensure the phi that remains is both
354  // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355  // that cannot spill, we need to be careful what remains in the loop.
356  Register PhiReg = LoopPhi->getOperand(0).getReg();
357  Register DecReg = LoopDec->getOperand(0).getReg();
358  Register StartReg = LoopStart->getOperand(0).getReg();
359  // Ensure the uses are expected, and collect any copies we want to remove.
361  auto CheckUsers = [&Copies](Register BaseReg,
362  ArrayRef<MachineInstr *> ExpectedUsers,
364  SmallVector<Register, 4> Worklist;
365  Worklist.push_back(BaseReg);
366  while (!Worklist.empty()) {
367  Register Reg = Worklist.pop_back_val();
369  if (llvm::is_contained(ExpectedUsers, &MI))
370  continue;
371  if (MI.getOpcode() != TargetOpcode::COPY ||
372  !MI.getOperand(0).getReg().isVirtual()) {
373  LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374  return false;
375  }
376  Worklist.push_back(MI.getOperand(0).getReg());
377  Copies.push_back(&MI);
378  }
379  }
380  return true;
381  };
382  if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383  !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384  !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385  // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387  RevertWhileLoopStartLR(LoopStart, TII);
388  RevertLoopDec(LoopDec, TII);
389  RevertLoopEnd(LoopEnd, TII);
390  return true;
391  }
392  return false;
393  }
394 
395  MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396  MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397  MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398 
399  if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400  LoopPhi->getOperand(3).setReg(StartReg);
401  LoopPhi->getOperand(1).setReg(DecReg);
402  } else {
403  LoopPhi->getOperand(1).setReg(StartReg);
404  LoopPhi->getOperand(3).setReg(DecReg);
405  }
406 
407  SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
408  MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
409  if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
410  // If the LoopEnd falls through, need to insert a t2B to the fall-through
411  // block so that the non-analyzable t2LoopEndDec doesn't fall through.
413  BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
414  .addMBB(&*MBBI)
415  .add(predOps(ARMCC::AL));
416  }
417 
418  // Replace the loop dec and loop end as a single instruction.
420  BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
421  TII->get(ARM::t2LoopEndDec), DecReg)
422  .addReg(PhiReg)
423  .add(LoopEnd->getOperand(1));
424  (void)MI;
425  LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
426 
427  LoopDec->eraseFromParent();
428  LoopEnd->eraseFromParent();
429  for (auto *MI : Copies)
430  MI->eraseFromParent();
431  return true;
432 }
433 
434 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
435 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
436 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
437 // VCTP operand much simpler.
438 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
439  MachineDominatorTree *DT) {
440  LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
441  << ML->getHeader()->getName() << "\n");
442 
443  // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
444  // in the loop.
445  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
446  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
447  return false;
448  if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
449  LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
450  return false;
451 
454  for (MachineBasicBlock *BB : ML->blocks()) {
455  for (MachineInstr &MI : *BB)
456  if (isVCTP(&MI))
457  VCTPs.push_back(&MI);
458  else if (findFirstVPTPredOperandIdx(MI) != -1)
459  MVEInstrs.push_back(&MI);
460  }
461 
462  if (VCTPs.empty()) {
463  LLVM_DEBUG(dbgs() << " no VCTPs\n");
464  return false;
465  }
466 
467  // Check all VCTPs are the same.
468  MachineInstr *FirstVCTP = *VCTPs.begin();
469  for (MachineInstr *VCTP : VCTPs) {
470  LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
471  if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
472  VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
473  LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
474  return false;
475  }
476  }
477 
478  // Check for the register being used can be setup before the loop. We expect
479  // this to be:
480  // $vx = ...
481  // loop:
482  // $vp = PHI [ $vx ], [ $vd ]
483  // ..
484  // $vpr = VCTP $vp
485  // ..
486  // $vd = t2SUBri $vp, #n
487  // ..
488  Register CountReg = FirstVCTP->getOperand(1).getReg();
489  if (!CountReg.isVirtual()) {
490  LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
491  return false;
492  }
493  MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
494  if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
495  Phi->getNumOperands() != 5 ||
496  (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
497  Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
498  LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
499  return false;
500  }
501  CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
502  ? Phi->getOperand(3).getReg()
503  : Phi->getOperand(1).getReg();
504 
505  // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
506  // the preheader and add the new CountReg to it. We attempt to place it late
507  // in the preheader, but may need to move that earlier based on uses.
508  MachineBasicBlock *MBB = LoopStart->getParent();
510  for (MachineInstr &Use :
511  MRI->use_instructions(LoopStart->getOperand(0).getReg()))
512  if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
513  !DT->dominates(ML->getHeader(), Use.getParent())) {
514  LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
515  return false;
516  }
517 
518  unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
519  ? ARM::t2DoLoopStartTP
520  : ARM::t2WhileLoopStartTP;
522  BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
523  .add(LoopStart->getOperand(0))
524  .add(LoopStart->getOperand(1))
525  .addReg(CountReg);
526  if (NewOpc == ARM::t2WhileLoopStartTP)
527  MI.add(LoopStart->getOperand(2));
528  LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
529  << *MI.getInstr());
530  MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
531  LoopStart->eraseFromParent();
532 
533  if (SetLRPredicate) {
534  // Each instruction in the loop needs to be using LR as the predicate from
535  // the Phi as the predicate.
536  Register LR = LoopPhi->getOperand(0).getReg();
537  for (MachineInstr *MI : MVEInstrs) {
538  int Idx = findFirstVPTPredOperandIdx(*MI);
539  MI->getOperand(Idx + 2).setReg(LR);
540  }
541  }
542 
543  return true;
544 }
545 
546 // Returns true if Opcode is any VCMP Opcode.
547 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
548 
549 // Returns true if a VCMP with this Opcode can have its operands swapped.
550 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
551 // and VCMPr instructions (since the r is always on the right).
552 static bool CanHaveSwappedOperands(unsigned Opcode) {
553  switch (Opcode) {
554  default:
555  return true;
556  case ARM::MVE_VCMPf32:
557  case ARM::MVE_VCMPf16:
558  case ARM::MVE_VCMPf32r:
559  case ARM::MVE_VCMPf16r:
560  case ARM::MVE_VCMPi8r:
561  case ARM::MVE_VCMPi16r:
562  case ARM::MVE_VCMPi32r:
563  case ARM::MVE_VCMPu8r:
564  case ARM::MVE_VCMPu16r:
565  case ARM::MVE_VCMPu32r:
566  case ARM::MVE_VCMPs8r:
567  case ARM::MVE_VCMPs16r:
568  case ARM::MVE_VCMPs32r:
569  return false;
570  }
571 }
572 
573 // Returns the CondCode of a VCMP Instruction.
575  assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
576  return ARMCC::CondCodes(Instr.getOperand(3).getImm());
577 }
578 
579 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
580 // Prev. Cond and Prev must be VCMPs.
582  assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
583 
584  // Opcodes must match.
585  if (Cond.getOpcode() != Prev.getOpcode())
586  return false;
587 
588  MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
589  MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
590 
591  // If the VCMP has the opposite condition with the same operands, we can
592  // replace it with a VPNOT
593  ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
594  ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
595  if (ExpectedCode == GetCondCode(Prev))
596  if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
597  return true;
598  // Check again with operands swapped if possible
599  if (!CanHaveSwappedOperands(Cond.getOpcode()))
600  return false;
601  ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
602  return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
603  CondOP2.isIdenticalTo(PrevOP1);
604 }
605 
606 // Returns true if Instr writes to VCCR.
607 static bool IsWritingToVCCR(MachineInstr &Instr) {
608  if (Instr.getNumOperands() == 0)
609  return false;
610  MachineOperand &Dst = Instr.getOperand(0);
611  if (!Dst.isReg())
612  return false;
613  Register DstReg = Dst.getReg();
614  if (!DstReg.isVirtual())
615  return false;
616  MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
617  const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
618  return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
619 }
620 
621 // Transforms
622 // <Instr that uses %A ('User' Operand)>
623 // Into
624 // %K = VPNOT %Target
625 // <Instr that uses %K ('User' Operand)>
626 // And returns the newly inserted VPNOT.
627 // This optimization is done in the hopes of preventing spills/reloads of VPR by
628 // reducing the number of VCCR values with overlapping lifetimes.
629 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
631  Register Target) {
633 
634  MachineInstrBuilder MIBuilder =
635  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
636  .addDef(NewResult)
637  .addReg(Target);
638  addUnpredicatedMveVpredNOp(MIBuilder);
639 
640  // Make the user use NewResult instead, and clear its kill flag.
641  User.setReg(NewResult);
642  User.setIsKill(false);
643 
644  LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
645  MIBuilder.getInstr()->dump());
646 
647  return *MIBuilder.getInstr();
648 }
649 
650 // Moves a VPNOT before its first user if an instruction that uses Reg is found
651 // in-between the VPNOT and its user.
652 // Returns true if there is at least one user of the VPNOT in the block.
655  Register Reg) {
656  assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
658  "The VPNOT cannot be predicated");
659 
660  MachineInstr &VPNOT = *Iter;
661  Register VPNOTResult = VPNOT.getOperand(0).getReg();
662  Register VPNOTOperand = VPNOT.getOperand(1).getReg();
663 
664  // Whether the VPNOT will need to be moved, and whether we found a user of the
665  // VPNOT.
666  bool MustMove = false, HasUser = false;
667  MachineOperand *VPNOTOperandKiller = nullptr;
668  for (; Iter != MBB.end(); ++Iter) {
669  if (MachineOperand *MO =
670  Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
671  // If we find the operand that kills the VPNOTOperand's result, save it.
672  VPNOTOperandKiller = MO;
673  }
674 
675  if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
676  MustMove = true;
677  continue;
678  }
679 
680  if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
681  continue;
682 
683  HasUser = true;
684  if (!MustMove)
685  break;
686 
687  // Move the VPNOT right before Iter
688  LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
689  Iter->dump());
690  MBB.splice(Iter, &MBB, VPNOT.getIterator());
691  // If we move the instr, and its operand was killed earlier, remove the kill
692  // flag.
693  if (VPNOTOperandKiller)
694  VPNOTOperandKiller->setIsKill(false);
695 
696  break;
697  }
698  return HasUser;
699 }
700 
701 // This optimisation attempts to reduce the number of overlapping lifetimes of
702 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
703 // this replaces
704 // %A:vccr = (something)
705 // %B:vccr = VPNOT %A
706 // %Foo = (some op that uses %B)
707 // %Bar = (some op that uses %A)
708 // With
709 // %A:vccr = (something)
710 // %B:vccr = VPNOT %A
711 // %Foo = (some op that uses %B)
712 // %TMP2:vccr = VPNOT %B
713 // %Bar = (some op that uses %A)
714 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
715  MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
716  SmallVector<MachineInstr *, 4> DeadInstructions;
717  bool Modified = false;
718 
719  while (Iter != End) {
720  Register VCCRValue, OppositeVCCRValue;
721  // The first loop looks for 2 unpredicated instructions:
722  // %A:vccr = (instr) ; A is stored in VCCRValue
723  // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
724  for (; Iter != End; ++Iter) {
725  // We're only interested in unpredicated instructions that write to VCCR.
726  if (!IsWritingToVCCR(*Iter) ||
728  continue;
729  Register Dst = Iter->getOperand(0).getReg();
730 
731  // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
732  // found what we were looking for.
733  if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
734  Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
735  // Move the VPNOT closer to its first user if needed, and ignore if it
736  // has no users.
737  if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
738  continue;
739 
740  OppositeVCCRValue = Dst;
741  ++Iter;
742  break;
743  }
744 
745  // Else, just set VCCRValue.
746  VCCRValue = Dst;
747  }
748 
749  // If the first inner loop didn't find anything, stop here.
750  if (Iter == End)
751  break;
752 
753  assert(VCCRValue && OppositeVCCRValue &&
754  "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
755  "stopped before the end of the block!");
756  assert(VCCRValue != OppositeVCCRValue &&
757  "VCCRValue should not be equal to OppositeVCCRValue!");
758 
759  // LastVPNOTResult always contains the same value as OppositeVCCRValue.
760  Register LastVPNOTResult = OppositeVCCRValue;
761 
762  // This second loop tries to optimize the remaining instructions.
763  for (; Iter != End; ++Iter) {
764  bool IsInteresting = false;
765 
766  if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
767  IsInteresting = true;
768 
769  // - If the instruction is a VPNOT, it can be removed, and we can just
770  // replace its uses with LastVPNOTResult.
771  // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
772  if (Iter->getOpcode() == ARM::MVE_VPNOT) {
773  Register Result = Iter->getOperand(0).getReg();
774 
775  MRI->replaceRegWith(Result, LastVPNOTResult);
776  DeadInstructions.push_back(&*Iter);
777  Modified = true;
778 
779  LLVM_DEBUG(dbgs()
780  << "Replacing all uses of '" << printReg(Result)
781  << "' with '" << printReg(LastVPNOTResult) << "'\n");
782  } else {
783  MachineInstr &VPNOT =
784  ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
785  Modified = true;
786 
787  LastVPNOTResult = VPNOT.getOperand(0).getReg();
788  std::swap(VCCRValue, OppositeVCCRValue);
789 
790  LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
791  << "' with '" << printReg(LastVPNOTResult)
792  << "' in instr: " << *Iter);
793  }
794  } else {
795  // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
796  // instead as they contain the same value.
797  if (MachineOperand *MO =
798  Iter->findRegisterUseOperand(OppositeVCCRValue)) {
799  IsInteresting = true;
800 
801  // This is pointless if LastVPNOTResult == OppositeVCCRValue.
802  if (LastVPNOTResult != OppositeVCCRValue) {
803  LLVM_DEBUG(dbgs() << "Replacing usage of '"
804  << printReg(OppositeVCCRValue) << "' with '"
805  << printReg(LastVPNOTResult) << " for instr: ";
806  Iter->dump());
807  MO->setReg(LastVPNOTResult);
808  Modified = true;
809  }
810 
811  MO->setIsKill(false);
812  }
813 
814  // If this is an unpredicated VPNOT on
815  // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
816  if (Iter->getOpcode() == ARM::MVE_VPNOT &&
818  Register VPNOTOperand = Iter->getOperand(1).getReg();
819  if (VPNOTOperand == LastVPNOTResult ||
820  VPNOTOperand == OppositeVCCRValue) {
821  IsInteresting = true;
822 
823  std::swap(VCCRValue, OppositeVCCRValue);
824  LastVPNOTResult = Iter->getOperand(0).getReg();
825  }
826  }
827  }
828 
829  // If this instruction was not interesting, and it writes to VCCR, stop.
830  if (!IsInteresting && IsWritingToVCCR(*Iter))
831  break;
832  }
833  }
834 
835  for (MachineInstr *DeadInstruction : DeadInstructions)
836  DeadInstruction->eraseFromParent();
837 
838  return Modified;
839 }
840 
841 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
842 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
843  SmallVector<MachineInstr *, 4> DeadInstructions;
844 
845  // The last VCMP that we have seen and that couldn't be replaced.
846  // This is reset when an instruction that writes to VCCR/VPR is found, or when
847  // a VCMP is replaced with a VPNOT.
848  // We'll only replace VCMPs with VPNOTs when this is not null, and when the
849  // current VCMP is the opposite of PrevVCMP.
850  MachineInstr *PrevVCMP = nullptr;
851  // If we find an instruction that kills the result of PrevVCMP, we save the
852  // operand here to remove the kill flag in case we need to use PrevVCMP's
853  // result.
854  MachineOperand *PrevVCMPResultKiller = nullptr;
855 
856  for (MachineInstr &Instr : MBB.instrs()) {
857  if (PrevVCMP) {
858  if (MachineOperand *MO = Instr.findRegisterUseOperand(
859  PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
860  // If we come accross the instr that kills PrevVCMP's result, record it
861  // so we can remove the kill flag later if we need to.
862  PrevVCMPResultKiller = MO;
863  }
864  }
865 
866  // Ignore predicated instructions.
867  if (getVPTInstrPredicate(Instr) != ARMVCC::None)
868  continue;
869 
870  // Only look at VCMPs
871  if (!IsVCMP(Instr.getOpcode())) {
872  // If the instruction writes to VCCR, forget the previous VCMP.
873  if (IsWritingToVCCR(Instr))
874  PrevVCMP = nullptr;
875  continue;
876  }
877 
878  if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
879  PrevVCMP = &Instr;
880  continue;
881  }
882 
883  // The register containing the result of the VCMP that we're going to
884  // replace.
885  Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
886 
887  // Build a VPNOT to replace the VCMP, reusing its operands.
888  MachineInstrBuilder MIBuilder =
889  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
890  .add(Instr.getOperand(0))
891  .addReg(PrevVCMPResultReg);
892  addUnpredicatedMveVpredNOp(MIBuilder);
893  LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
894  MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
895  Instr.dump());
896 
897  // If we found an instruction that uses, and kills PrevVCMP's result,
898  // remove the kill flag.
899  if (PrevVCMPResultKiller)
900  PrevVCMPResultKiller->setIsKill(false);
901 
902  // Finally, mark the old VCMP for removal and reset
903  // PrevVCMP/PrevVCMPResultKiller.
904  DeadInstructions.push_back(&Instr);
905  PrevVCMP = nullptr;
906  PrevVCMPResultKiller = nullptr;
907  }
908 
909  for (MachineInstr *DeadInstruction : DeadInstructions)
910  DeadInstruction->eraseFromParent();
911 
912  return !DeadInstructions.empty();
913 }
914 
915 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
916  MachineDominatorTree *DT) {
917  // Scan through the block, looking for instructions that use constants moves
918  // into VPR that are the negative of one another. These are expected to be
919  // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
920  // mask is kept it or and VPNOT's of it are added or reused as we scan through
921  // the function.
922  unsigned LastVPTImm = 0;
923  Register LastVPTReg = 0;
924  SmallSet<MachineInstr *, 4> DeadInstructions;
925 
926  for (MachineInstr &Instr : MBB.instrs()) {
927  // Look for predicated MVE instructions.
928  int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
929  if (PIdx == -1)
930  continue;
931  Register VPR = Instr.getOperand(PIdx + 1).getReg();
932  if (!VPR.isVirtual())
933  continue;
934 
935  // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
936  MachineInstr *Copy = MRI->getVRegDef(VPR);
937  if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
938  !Copy->getOperand(1).getReg().isVirtual() ||
939  MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
940  LastVPTReg = 0;
941  continue;
942  }
943  Register GPR = Copy->getOperand(1).getReg();
944 
945  // Find the Immediate used by the copy.
946  auto getImm = [&](Register GPR) -> unsigned {
947  MachineInstr *Def = MRI->getVRegDef(GPR);
948  if (Def && (Def->getOpcode() == ARM::t2MOVi ||
949  Def->getOpcode() == ARM::t2MOVi16))
950  return Def->getOperand(1).getImm();
951  return -1U;
952  };
953  unsigned Imm = getImm(GPR);
954  if (Imm == -1U) {
955  LastVPTReg = 0;
956  continue;
957  }
958 
959  unsigned NotImm = ~Imm & 0xffff;
960  if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
961  Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
962  if (MRI->use_empty(VPR)) {
963  DeadInstructions.insert(Copy);
964  if (MRI->hasOneUse(GPR))
965  DeadInstructions.insert(MRI->getVRegDef(GPR));
966  }
967  LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
968  } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
969  // We have found the not of a previous constant. Create a VPNot of the
970  // earlier predicate reg and use it instead of the copy.
971  Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
972  auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
973  TII->get(ARM::MVE_VPNOT), NewVPR)
974  .addReg(LastVPTReg);
976 
977  // Use the new register and check if the def is now dead.
978  Instr.getOperand(PIdx + 1).setReg(NewVPR);
979  if (MRI->use_empty(VPR)) {
980  DeadInstructions.insert(Copy);
981  if (MRI->hasOneUse(GPR))
982  DeadInstructions.insert(MRI->getVRegDef(GPR));
983  }
984  LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
985  << Instr);
986  VPR = NewVPR;
987  }
988 
989  LastVPTImm = Imm;
990  LastVPTReg = VPR;
991  }
992 
993  for (MachineInstr *DI : DeadInstructions)
994  DI->eraseFromParent();
995 
996  return !DeadInstructions.empty();
997 }
998 
999 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1000 // somewhat blunt approximation to allow tail predicated with vpsel
1001 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1002 // different semantics under tail predication. Until that is modelled we just
1003 // convert to a VMOVT (via a predicated VORR) instead.
1004 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1005  bool HasVCTP = false;
1006  SmallVector<MachineInstr *, 4> DeadInstructions;
1007 
1008  for (MachineInstr &MI : MBB.instrs()) {
1009  if (isVCTP(&MI)) {
1010  HasVCTP = true;
1011  continue;
1012  }
1013 
1014  if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1015  continue;
1016 
1017  MachineInstrBuilder MIBuilder =
1018  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1019  .add(MI.getOperand(0))
1020  .add(MI.getOperand(1))
1021  .add(MI.getOperand(1))
1023  .add(MI.getOperand(4))
1024  .add(MI.getOperand(5))
1025  .add(MI.getOperand(2));
1026  // Silence unused variable warning in release builds.
1027  (void)MIBuilder;
1028  LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1029  dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1030  DeadInstructions.push_back(&MI);
1031  }
1032 
1033  for (MachineInstr *DeadInstruction : DeadInstructions)
1034  DeadInstruction->eraseFromParent();
1035 
1036  return !DeadInstructions.empty();
1037 }
1038 
1039 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1040 // the instruction may be removable as a noop.
1041 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1042  bool Changed = false;
1043  for (MachineInstr &MI : MBB.instrs()) {
1044  if (MI.getOpcode() != ARM::t2DoLoopStart)
1045  continue;
1046  Register R = MI.getOperand(1).getReg();
1047  MachineFunction *MF = MI.getParent()->getParent();
1049  Changed = true;
1050  }
1051  return Changed;
1052 }
1053 
1054 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1055  const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1056 
1057  if (!STI.isThumb2() || !STI.hasLOB())
1058  return false;
1059 
1060  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1061  MRI = &Fn.getRegInfo();
1062  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1063  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1064 
1065  LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1066  << "********** Function: " << Fn.getName() << '\n');
1067 
1068  bool Modified = false;
1069  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1070  Modified |= LowerWhileLoopStart(ML);
1071  Modified |= MergeLoopEnd(ML);
1072  Modified |= ConvertTailPredLoop(ML, DT);
1073  }
1074 
1075  for (MachineBasicBlock &MBB : Fn) {
1076  Modified |= HintDoLoopStartReg(MBB);
1077  Modified |= ReplaceConstByVPNOTs(MBB, DT);
1078  Modified |= ReplaceVCMPsByVPNOTs(MBB);
1079  Modified |= ReduceOldVCCRValueUses(MBB);
1080  Modified |= ConvertVPSEL(MBB);
1081  }
1082 
1083  LLVM_DEBUG(dbgs() << "**************************************\n");
1084  return Modified;
1085 }
1086 
1087 /// createMVETPAndVPTOptimisationsPass
1089  return new MVETPAndVPTOptimisations();
1090 }
ARMSubtarget.h
pass
ARM MVE TailPred and VPT Optimisations pass
Definition: MVETPAndVPTOptimisationsPass.cpp:98
IsVPNOTEquivalent
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
Definition: MVETPAndVPTOptimisationsPass.cpp:581
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:74
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:546
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::ARMSubtarget
Definition: ARMSubtarget.h:47
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
T
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
IsWritingToVCCR
static bool IsWritingToVCCR(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:607
llvm::HexagonInstrInfo::analyzeBranch
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: HexagonInstrInfo.cpp:434
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:300
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:150
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:509
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:551
LookThroughCOPY
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
Definition: MVETPAndVPTOptimisationsPass.cpp:100
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1453
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::ARMSubtarget::getInstrInfo
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:262
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:678
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:114
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:325
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
SetLRPredicate
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
GetCondCode
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:574
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::ARMCC::EQ
@ EQ
Definition: ARMBaseInfo.h:31
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
MachineLoopInfo.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:146
ARMBaseInfo.h
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::User
Definition: User.h:44
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::RevertLoopDec
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
Definition: MVETailPredUtils.h:145
llvm::findFirstVPTPredOperandIdx
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
Definition: Thumb2InstrInfo.cpp:774
llvm::RevertLoopEnd
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Definition: MVETailPredUtils.h:167
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
TBB
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
Definition: RISCVRedundantCopyElimination.cpp:76
false
Definition: StackSlotColoring.cpp:141
MVETailPredUtils.h
llvm::getVPTInstrPredicate
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
Definition: Thumb2InstrInfo.cpp:787
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
findLoopComponents
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
Definition: MVETPAndVPTOptimisationsPass.cpp:111
LoopDeletionResult::Modified
@ Modified
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:397
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::VCMPOpcodeToVPT
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
Definition: ARMBaseInstrInfo.h:586
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:514
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
MoveVPNOTBeforeFirstUser
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
Definition: MVETPAndVPTOptimisationsPass.cpp:653
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
RevertWhileLoopSetup
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETPAndVPTOptimisationsPass.cpp:187
llvm::cl::opt< bool >
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, "ARM MVE TailPred and VPT Optimisations pass", false, false) INITIALIZE_PASS_END(MVETPAndVPTOptimisations
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::RevertDoLoopStart
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETailPredUtils.h:135
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::createMVETPAndVPTOptimisationsPass
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
Definition: MVETPAndVPTOptimisationsPass.cpp:1088
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::addUnpredicatedMveVpredNOp
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Definition: ARMBaseInstrInfo.cpp:866
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
MachineFunctionPass.h
llvm::MachineRegisterInfo::getRegClassOrNull
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
Definition: MachineRegisterInfo.h:664
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::isVCTP
static bool isVCTP(const MachineInstr *MI)
Definition: MVETailPredUtils.h:58
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
InlinePriorityMode::ML
@ ML
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
llvm::ARMCC::getSwappedCondition
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition: ARMBaseInfo.h:71
ARM.h
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1526
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
IsVCMP
static bool IsVCMP(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:547
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1009
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::MachineRegisterInfo::setRegAllocationHint
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
Definition: MachineRegisterInfo.h:778
llvm::RevertWhileLoopStartLR
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
Definition: MVETailPredUtils.h:98
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:136
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MVETPAndVPTOptimisationsPass.cpp:36
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1761
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Thumb2InstrInfo.h
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::Thumb2InstrInfo
Definition: Thumb2InstrInfo.h:22
llvm::ARMCC::CondCodes
CondCodes
Definition: ARMBaseInfo.h:30
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::RISCVMatInt::Imm
@ Imm
Definition: RISCVMatInt.h:23
llvm::MachineLoopInfo::getBase
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
Definition: MachineLoopInfo.h:105
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:518
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
MergeEndDec
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
CanHaveSwappedOperands
static bool CanHaveSwappedOperands(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:552
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:56
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:82
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::predOps
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Definition: ARMBaseInstrInfo.h:541
llvm::ARMCC::getOppositeCondition
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
llvm::ARMSubtarget::isThumb2
bool isThumb2() const
Definition: ARMSubtarget.h:421
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::ARMRI::RegLR
@ RegLR
Definition: ARMBaseRegisterInfo.h:39
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:412
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
MachineFunction.h
IsInvalidTPInstruction
static bool IsInvalidTPInstruction(MachineInstr &MI)
Definition: MVETPAndVPTOptimisationsPass.cpp:265
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:692
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:288
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
MachineDominators.h
llvm::isLoopStart
static bool isLoopStart(const MachineInstr &MI)
Definition: MVETailPredUtils.h:82
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365