LLVM  14.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1 //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass does a few optimisations related to Tail predicated loops
10 /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11 /// the goal is to maximize the sizes of the blocks that will be created by the
12 /// MVE VPT Block Insertion pass (which runs after register allocation). For
13 /// tail predicated loops we transform the loop into something that will
14 /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15 ///
16 //===----------------------------------------------------------------------===//
17 
18 #include "ARM.h"
19 #include "ARMSubtarget.h"
21 #include "MVETailPredUtils.h"
22 #include "Thumb2InstrInfo.h"
23 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Support/Debug.h"
32 #include <cassert>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "arm-mve-vpt-opts"
37 
38 static cl::opt<bool>
39 MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40  cl::desc("Enable merging Loop End and Dec instructions."),
41  cl::init(true));
42 
43 static cl::opt<bool>
44 SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45  cl::desc("Enable setting lr as a predicate in tail predication regions."),
46  cl::init(true));
47 
48 namespace {
49 class MVETPAndVPTOptimisations : public MachineFunctionPass {
50 public:
51  static char ID;
52  const Thumb2InstrInfo *TII;
54 
55  MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56 
57  bool runOnMachineFunction(MachineFunction &Fn) override;
58 
59  void getAnalysisUsage(AnalysisUsage &AU) const override {
65  }
66 
67  StringRef getPassName() const override {
68  return "ARM MVE TailPred and VPT Optimisation Pass";
69  }
70 
71 private:
72  bool LowerWhileLoopStart(MachineLoop *ML);
73  bool MergeLoopEnd(MachineLoop *ML);
74  bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75  MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76  MachineInstr &Instr,
79  bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80  bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81  bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82  bool ConvertVPSEL(MachineBasicBlock &MBB);
83  bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84  MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85  MachineInstr *LoopStart);
86 };
87 
89 
90 } // end anonymous namespace
91 
92 INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93  "ARM MVE TailPred and VPT Optimisations pass", false,
94  false)
97 INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98  "ARM MVE TailPred and VPT Optimisations pass", false, false)
99 
102  while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103  MI->getOperand(1).getReg().isVirtual())
104  MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105  return MI;
106 }
107 
108 // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109 // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110 // at the moment, returning a t2DoLoopStart in LoopStart.
112  MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113  MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114  MachineBasicBlock *Header = ML->getHeader();
115  MachineBasicBlock *Latch = ML->getLoopLatch();
116  if (!Header || !Latch) {
117  LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
118  return false;
119  }
120 
121  // Find the loop end from the terminators.
122  LoopEnd = nullptr;
123  for (auto &T : Latch->terminators()) {
124  if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125  LoopEnd = &T;
126  break;
127  }
128  if (T.getOpcode() == ARM::t2LoopEndDec &&
129  T.getOperand(2).getMBB() == Header) {
130  LoopEnd = &T;
131  break;
132  }
133  }
134  if (!LoopEnd) {
135  LLVM_DEBUG(dbgs() << " no LoopEnd\n");
136  return false;
137  }
138  LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
139 
140  // Find the dec from the use of the end. There may be copies between
141  // instructions. We expect the loop to loop like:
142  // $vs = t2DoLoopStart ...
143  // loop:
144  // $vp = phi [ $vs ], [ $vd ]
145  // ...
146  // $vd = t2LoopDec $vp
147  // ...
148  // t2LoopEnd $vd, loop
149  if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150  LoopDec = LoopEnd;
151  else {
152  LoopDec =
154  if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155  LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
156  return false;
157  }
158  }
159  LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
160 
161  LoopPhi =
163  if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164  LoopPhi->getNumOperands() != 5 ||
165  (LoopPhi->getOperand(2).getMBB() != Latch &&
166  LoopPhi->getOperand(4).getMBB() != Latch)) {
167  LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
168  return false;
169  }
170  LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
171 
172  Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173  ? LoopPhi->getOperand(3).getReg()
174  : LoopPhi->getOperand(1).getReg();
175  LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176  if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177  LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178  LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179  LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
180  return false;
181  }
182  LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
183 
184  return true;
185 }
186 
188  MachineBasicBlock *MBB = MI->getParent();
189  assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190  "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191 
192  // Subs
193  MachineInstrBuilder MIB =
194  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195  MIB.add(MI->getOperand(0));
196  MIB.add(MI->getOperand(1));
197  MIB.addImm(0);
198  MIB.addImm(ARMCC::AL);
199  MIB.addReg(ARM::NoRegister);
200  MIB.addReg(ARM::CPSR, RegState::Define);
201 
202  // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203  for (MachineInstr &I : MBB->terminators()) {
204  if (I.getOpcode() == ARM::t2WhileLoopStart) {
205  MachineInstrBuilder MIB =
206  BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207  MIB.add(MI->getOperand(1)); // branch target
208  MIB.addImm(ARMCC::EQ);
209  MIB.addReg(ARM::CPSR);
210  I.eraseFromParent();
211  break;
212  }
213  }
214 
215  MI->eraseFromParent();
216 }
217 
218 // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219 // start of a while loop:
220 // %a:gprlr = t2WhileLoopSetup %Cnt
221 // t2WhileLoopStart %a, %BB
222 // We want to convert those to a single instruction which, like t2LoopEndDec and
223 // t2DoLoopStartTP is both a terminator and produces a value:
224 // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225 //
226 // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227 // t2WhileLoopStart are not valid past regalloc.
228 bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229  LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230  << ML->getHeader()->getName() << "\n");
231 
232  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234  return false;
235 
236  if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237  return false;
238 
239  Register LR = LoopStart->getOperand(0).getReg();
240  auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241  return MI.getOpcode() == ARM::t2WhileLoopStart;
242  });
243  if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244  RevertWhileLoopSetup(LoopStart, TII);
245  RevertLoopDec(LoopStart, TII);
246  RevertLoopEnd(LoopStart, TII);
247  return true;
248  }
249 
251  BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252  TII->get(ARM::t2WhileLoopStartLR), LR)
253  .add(LoopStart->getOperand(1))
254  .add(WLSIt->getOperand(1));
255  (void)MI;
256  LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257 
258  WLSIt->eraseFromParent();
259  LoopStart->eraseFromParent();
260  return true;
261 }
262 
263 // Return true if this instruction is invalid in a low overhead loop, usually
264 // because it clobbers LR.
266  return MI.isCall() || isLoopStart(MI);
267 }
268 
269 // Starting from PreHeader, search for invalid instructions back until the
270 // LoopStart block is reached. If invalid instructions are found, the loop start
271 // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272 // return the new DLS LoopStart if updated.
273 MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274  MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
277  Worklist.push_back(PreHeader);
278  Visited.insert(LoopStart->getParent());
279 
280  while (!Worklist.empty()) {
281  MachineBasicBlock *MBB = Worklist.pop_back_val();
282  if (Visited.count(MBB))
283  continue;
284 
285  for (MachineInstr &MI : *MBB) {
287  continue;
288 
289  LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290 
291  // Create a t2DoLoopStart at the end of the preheader.
292  MachineInstrBuilder MIB =
293  BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294  LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295  MIB.add(LoopStart->getOperand(0));
296  MIB.add(LoopStart->getOperand(1));
297 
298  // Make sure to remove the kill flags, to prevent them from being invalid.
299  LoopStart->getOperand(1).setIsKill(false);
300 
301  // Revert the t2WhileLoopStartLR to a CMP and Br.
302  RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303  return MIB;
304  }
305 
306  Visited.insert(MBB);
307  for (auto *Pred : MBB->predecessors())
308  Worklist.push_back(Pred);
309  }
310  return LoopStart;
311 }
312 
313 // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314 // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315 // will be valid to be used for the low overhead loop, which means nothing else
316 // is using LR (especially calls) and there are no superfluous copies in the
317 // loop. The t2LoopEndDec is a branching terminator that produces a value (the
318 // decrement) around the loop edge, which means we need to be careful that they
319 // will be valid to allocate without any spilling.
320 bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321  if (!MergeEndDec)
322  return false;
323 
324  LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325  << "\n");
326 
327  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329  return false;
330 
331  // Check if there is an illegal instruction (a call) in the low overhead loop
332  // and if so revert it now before we get any further. While loops also need to
333  // check the preheaders, but can be reverted to a DLS loop if needed.
334  auto *PreHeader = ML->getLoopPreheader();
335  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336  LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337 
338  for (MachineBasicBlock *MBB : ML->blocks()) {
339  for (MachineInstr &MI : *MBB) {
340  if (IsInvalidTPInstruction(MI)) {
341  LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342  if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343  RevertDoLoopStart(LoopStart, TII);
344  else
345  RevertWhileLoopStartLR(LoopStart, TII);
346  RevertLoopDec(LoopDec, TII);
347  RevertLoopEnd(LoopEnd, TII);
348  return true;
349  }
350  }
351  }
352 
353  // Remove any copies from the loop, to ensure the phi that remains is both
354  // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355  // that cannot spill, we need to be careful what remains in the loop.
356  Register PhiReg = LoopPhi->getOperand(0).getReg();
357  Register DecReg = LoopDec->getOperand(0).getReg();
358  Register StartReg = LoopStart->getOperand(0).getReg();
359  // Ensure the uses are expected, and collect any copies we want to remove.
361  auto CheckUsers = [&Copies](Register BaseReg,
362  ArrayRef<MachineInstr *> ExpectedUsers,
364  SmallVector<Register, 4> Worklist;
365  Worklist.push_back(BaseReg);
366  while (!Worklist.empty()) {
367  Register Reg = Worklist.pop_back_val();
369  if (count(ExpectedUsers, &MI))
370  continue;
371  if (MI.getOpcode() != TargetOpcode::COPY ||
372  !MI.getOperand(0).getReg().isVirtual()) {
373  LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374  return false;
375  }
376  Worklist.push_back(MI.getOperand(0).getReg());
377  Copies.push_back(&MI);
378  }
379  }
380  return true;
381  };
382  if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383  !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384  !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385  // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386  if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387  RevertWhileLoopStartLR(LoopStart, TII);
388  RevertLoopDec(LoopDec, TII);
389  RevertLoopEnd(LoopEnd, TII);
390  return true;
391  }
392  return false;
393  }
394 
395  MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396  MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397  MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398 
399  if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400  LoopPhi->getOperand(3).setReg(StartReg);
401  LoopPhi->getOperand(1).setReg(DecReg);
402  } else {
403  LoopPhi->getOperand(1).setReg(StartReg);
404  LoopPhi->getOperand(3).setReg(DecReg);
405  }
406 
407  // Replace the loop dec and loop end as a single instruction.
409  BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
410  TII->get(ARM::t2LoopEndDec), DecReg)
411  .addReg(PhiReg)
412  .add(LoopEnd->getOperand(1));
413  (void)MI;
414  LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
415 
416  LoopDec->eraseFromParent();
417  LoopEnd->eraseFromParent();
418  for (auto *MI : Copies)
419  MI->eraseFromParent();
420  return true;
421 }
422 
423 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
424 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
425 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
426 // VCTP operand much simpler.
427 bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
428  MachineDominatorTree *DT) {
429  LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
430  << ML->getHeader()->getName() << "\n");
431 
432  // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
433  // in the loop.
434  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
435  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
436  return false;
437  if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
438  LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
439  return false;
440 
443  for (MachineBasicBlock *BB : ML->blocks()) {
444  for (MachineInstr &MI : *BB)
445  if (isVCTP(&MI))
446  VCTPs.push_back(&MI);
447  else if (findFirstVPTPredOperandIdx(MI) != -1)
448  MVEInstrs.push_back(&MI);
449  }
450 
451  if (VCTPs.empty()) {
452  LLVM_DEBUG(dbgs() << " no VCTPs\n");
453  return false;
454  }
455 
456  // Check all VCTPs are the same.
457  MachineInstr *FirstVCTP = *VCTPs.begin();
458  for (MachineInstr *VCTP : VCTPs) {
459  LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
460  if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
461  VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
462  LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
463  return false;
464  }
465  }
466 
467  // Check for the register being used can be setup before the loop. We expect
468  // this to be:
469  // $vx = ...
470  // loop:
471  // $vp = PHI [ $vx ], [ $vd ]
472  // ..
473  // $vpr = VCTP $vp
474  // ..
475  // $vd = t2SUBri $vp, #n
476  // ..
477  Register CountReg = FirstVCTP->getOperand(1).getReg();
478  if (!CountReg.isVirtual()) {
479  LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
480  return false;
481  }
482  MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
483  if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
484  Phi->getNumOperands() != 5 ||
485  (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
486  Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
487  LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
488  return false;
489  }
490  CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
491  ? Phi->getOperand(3).getReg()
492  : Phi->getOperand(1).getReg();
493 
494  // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
495  // the preheader and add the new CountReg to it. We attempt to place it late
496  // in the preheader, but may need to move that earlier based on uses.
497  MachineBasicBlock *MBB = LoopStart->getParent();
499  for (MachineInstr &Use :
500  MRI->use_instructions(LoopStart->getOperand(0).getReg()))
501  if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
502  !DT->dominates(ML->getHeader(), Use.getParent())) {
503  LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
504  return false;
505  }
506 
507  unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
508  ? ARM::t2DoLoopStartTP
509  : ARM::t2WhileLoopStartTP;
511  BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
512  .add(LoopStart->getOperand(0))
513  .add(LoopStart->getOperand(1))
514  .addReg(CountReg);
515  if (NewOpc == ARM::t2WhileLoopStartTP)
516  MI.add(LoopStart->getOperand(2));
517  LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
518  << *MI.getInstr());
519  MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
520  LoopStart->eraseFromParent();
521 
522  if (SetLRPredicate) {
523  // Each instruction in the loop needs to be using LR as the predicate from
524  // the Phi as the predicate.
525  Register LR = LoopPhi->getOperand(0).getReg();
526  for (MachineInstr *MI : MVEInstrs) {
527  int Idx = findFirstVPTPredOperandIdx(*MI);
528  MI->getOperand(Idx + 2).setReg(LR);
529  }
530  }
531 
532  return true;
533 }
534 
535 // Returns true if Opcode is any VCMP Opcode.
536 static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
537 
538 // Returns true if a VCMP with this Opcode can have its operands swapped.
539 // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
540 // and VCMPr instructions (since the r is always on the right).
541 static bool CanHaveSwappedOperands(unsigned Opcode) {
542  switch (Opcode) {
543  default:
544  return true;
545  case ARM::MVE_VCMPf32:
546  case ARM::MVE_VCMPf16:
547  case ARM::MVE_VCMPf32r:
548  case ARM::MVE_VCMPf16r:
549  case ARM::MVE_VCMPi8r:
550  case ARM::MVE_VCMPi16r:
551  case ARM::MVE_VCMPi32r:
552  case ARM::MVE_VCMPu8r:
553  case ARM::MVE_VCMPu16r:
554  case ARM::MVE_VCMPu32r:
555  case ARM::MVE_VCMPs8r:
556  case ARM::MVE_VCMPs16r:
557  case ARM::MVE_VCMPs32r:
558  return false;
559  }
560 }
561 
562 // Returns the CondCode of a VCMP Instruction.
564  assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
565  return ARMCC::CondCodes(Instr.getOperand(3).getImm());
566 }
567 
568 // Returns true if Cond is equivalent to a VPNOT instruction on the result of
569 // Prev. Cond and Prev must be VCMPs.
571  assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
572 
573  // Opcodes must match.
574  if (Cond.getOpcode() != Prev.getOpcode())
575  return false;
576 
577  MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
578  MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
579 
580  // If the VCMP has the opposite condition with the same operands, we can
581  // replace it with a VPNOT
582  ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
583  ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
584  if (ExpectedCode == GetCondCode(Prev))
585  if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
586  return true;
587  // Check again with operands swapped if possible
588  if (!CanHaveSwappedOperands(Cond.getOpcode()))
589  return false;
590  ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
591  return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
592  CondOP2.isIdenticalTo(PrevOP1);
593 }
594 
595 // Returns true if Instr writes to VCCR.
596 static bool IsWritingToVCCR(MachineInstr &Instr) {
597  if (Instr.getNumOperands() == 0)
598  return false;
599  MachineOperand &Dst = Instr.getOperand(0);
600  if (!Dst.isReg())
601  return false;
602  Register DstReg = Dst.getReg();
603  if (!DstReg.isVirtual())
604  return false;
605  MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
606  const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
607  return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
608 }
609 
610 // Transforms
611 // <Instr that uses %A ('User' Operand)>
612 // Into
613 // %K = VPNOT %Target
614 // <Instr that uses %K ('User' Operand)>
615 // And returns the newly inserted VPNOT.
616 // This optimization is done in the hopes of preventing spills/reloads of VPR by
617 // reducing the number of VCCR values with overlapping lifetimes.
618 MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
620  Register Target) {
622 
623  MachineInstrBuilder MIBuilder =
624  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
625  .addDef(NewResult)
626  .addReg(Target);
627  addUnpredicatedMveVpredNOp(MIBuilder);
628 
629  // Make the user use NewResult instead, and clear its kill flag.
630  User.setReg(NewResult);
631  User.setIsKill(false);
632 
633  LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
634  MIBuilder.getInstr()->dump());
635 
636  return *MIBuilder.getInstr();
637 }
638 
639 // Moves a VPNOT before its first user if an instruction that uses Reg is found
640 // in-between the VPNOT and its user.
641 // Returns true if there is at least one user of the VPNOT in the block.
644  Register Reg) {
645  assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
647  "The VPNOT cannot be predicated");
648 
649  MachineInstr &VPNOT = *Iter;
650  Register VPNOTResult = VPNOT.getOperand(0).getReg();
651  Register VPNOTOperand = VPNOT.getOperand(1).getReg();
652 
653  // Whether the VPNOT will need to be moved, and whether we found a user of the
654  // VPNOT.
655  bool MustMove = false, HasUser = false;
656  MachineOperand *VPNOTOperandKiller = nullptr;
657  for (; Iter != MBB.end(); ++Iter) {
658  if (MachineOperand *MO =
659  Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
660  // If we find the operand that kills the VPNOTOperand's result, save it.
661  VPNOTOperandKiller = MO;
662  }
663 
664  if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
665  MustMove = true;
666  continue;
667  }
668 
669  if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
670  continue;
671 
672  HasUser = true;
673  if (!MustMove)
674  break;
675 
676  // Move the VPNOT right before Iter
677  LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
678  Iter->dump());
679  MBB.splice(Iter, &MBB, VPNOT.getIterator());
680  // If we move the instr, and its operand was killed earlier, remove the kill
681  // flag.
682  if (VPNOTOperandKiller)
683  VPNOTOperandKiller->setIsKill(false);
684 
685  break;
686  }
687  return HasUser;
688 }
689 
690 // This optimisation attempts to reduce the number of overlapping lifetimes of
691 // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
692 // this replaces
693 // %A:vccr = (something)
694 // %B:vccr = VPNOT %A
695 // %Foo = (some op that uses %B)
696 // %Bar = (some op that uses %A)
697 // With
698 // %A:vccr = (something)
699 // %B:vccr = VPNOT %A
700 // %Foo = (some op that uses %B)
701 // %TMP2:vccr = VPNOT %B
702 // %Bar = (some op that uses %A)
703 bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
704  MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
705  SmallVector<MachineInstr *, 4> DeadInstructions;
706  bool Modified = false;
707 
708  while (Iter != End) {
709  Register VCCRValue, OppositeVCCRValue;
710  // The first loop looks for 2 unpredicated instructions:
711  // %A:vccr = (instr) ; A is stored in VCCRValue
712  // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
713  for (; Iter != End; ++Iter) {
714  // We're only interested in unpredicated instructions that write to VCCR.
715  if (!IsWritingToVCCR(*Iter) ||
717  continue;
718  Register Dst = Iter->getOperand(0).getReg();
719 
720  // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
721  // found what we were looking for.
722  if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
723  Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
724  // Move the VPNOT closer to its first user if needed, and ignore if it
725  // has no users.
726  if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
727  continue;
728 
729  OppositeVCCRValue = Dst;
730  ++Iter;
731  break;
732  }
733 
734  // Else, just set VCCRValue.
735  VCCRValue = Dst;
736  }
737 
738  // If the first inner loop didn't find anything, stop here.
739  if (Iter == End)
740  break;
741 
742  assert(VCCRValue && OppositeVCCRValue &&
743  "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
744  "stopped before the end of the block!");
745  assert(VCCRValue != OppositeVCCRValue &&
746  "VCCRValue should not be equal to OppositeVCCRValue!");
747 
748  // LastVPNOTResult always contains the same value as OppositeVCCRValue.
749  Register LastVPNOTResult = OppositeVCCRValue;
750 
751  // This second loop tries to optimize the remaining instructions.
752  for (; Iter != End; ++Iter) {
753  bool IsInteresting = false;
754 
755  if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
756  IsInteresting = true;
757 
758  // - If the instruction is a VPNOT, it can be removed, and we can just
759  // replace its uses with LastVPNOTResult.
760  // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
761  if (Iter->getOpcode() == ARM::MVE_VPNOT) {
762  Register Result = Iter->getOperand(0).getReg();
763 
764  MRI->replaceRegWith(Result, LastVPNOTResult);
765  DeadInstructions.push_back(&*Iter);
766  Modified = true;
767 
768  LLVM_DEBUG(dbgs()
769  << "Replacing all uses of '" << printReg(Result)
770  << "' with '" << printReg(LastVPNOTResult) << "'\n");
771  } else {
772  MachineInstr &VPNOT =
773  ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
774  Modified = true;
775 
776  LastVPNOTResult = VPNOT.getOperand(0).getReg();
777  std::swap(VCCRValue, OppositeVCCRValue);
778 
779  LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
780  << "' with '" << printReg(LastVPNOTResult)
781  << "' in instr: " << *Iter);
782  }
783  } else {
784  // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
785  // instead as they contain the same value.
786  if (MachineOperand *MO =
787  Iter->findRegisterUseOperand(OppositeVCCRValue)) {
788  IsInteresting = true;
789 
790  // This is pointless if LastVPNOTResult == OppositeVCCRValue.
791  if (LastVPNOTResult != OppositeVCCRValue) {
792  LLVM_DEBUG(dbgs() << "Replacing usage of '"
793  << printReg(OppositeVCCRValue) << "' with '"
794  << printReg(LastVPNOTResult) << " for instr: ";
795  Iter->dump());
796  MO->setReg(LastVPNOTResult);
797  Modified = true;
798  }
799 
800  MO->setIsKill(false);
801  }
802 
803  // If this is an unpredicated VPNOT on
804  // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
805  if (Iter->getOpcode() == ARM::MVE_VPNOT &&
807  Register VPNOTOperand = Iter->getOperand(1).getReg();
808  if (VPNOTOperand == LastVPNOTResult ||
809  VPNOTOperand == OppositeVCCRValue) {
810  IsInteresting = true;
811 
812  std::swap(VCCRValue, OppositeVCCRValue);
813  LastVPNOTResult = Iter->getOperand(0).getReg();
814  }
815  }
816  }
817 
818  // If this instruction was not interesting, and it writes to VCCR, stop.
819  if (!IsInteresting && IsWritingToVCCR(*Iter))
820  break;
821  }
822  }
823 
824  for (MachineInstr *DeadInstruction : DeadInstructions)
825  DeadInstruction->eraseFromParent();
826 
827  return Modified;
828 }
829 
830 // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
831 bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
832  SmallVector<MachineInstr *, 4> DeadInstructions;
833 
834  // The last VCMP that we have seen and that couldn't be replaced.
835  // This is reset when an instruction that writes to VCCR/VPR is found, or when
836  // a VCMP is replaced with a VPNOT.
837  // We'll only replace VCMPs with VPNOTs when this is not null, and when the
838  // current VCMP is the opposite of PrevVCMP.
839  MachineInstr *PrevVCMP = nullptr;
840  // If we find an instruction that kills the result of PrevVCMP, we save the
841  // operand here to remove the kill flag in case we need to use PrevVCMP's
842  // result.
843  MachineOperand *PrevVCMPResultKiller = nullptr;
844 
845  for (MachineInstr &Instr : MBB.instrs()) {
846  if (PrevVCMP) {
847  if (MachineOperand *MO = Instr.findRegisterUseOperand(
848  PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
849  // If we come accross the instr that kills PrevVCMP's result, record it
850  // so we can remove the kill flag later if we need to.
851  PrevVCMPResultKiller = MO;
852  }
853  }
854 
855  // Ignore predicated instructions.
856  if (getVPTInstrPredicate(Instr) != ARMVCC::None)
857  continue;
858 
859  // Only look at VCMPs
860  if (!IsVCMP(Instr.getOpcode())) {
861  // If the instruction writes to VCCR, forget the previous VCMP.
862  if (IsWritingToVCCR(Instr))
863  PrevVCMP = nullptr;
864  continue;
865  }
866 
867  if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
868  PrevVCMP = &Instr;
869  continue;
870  }
871 
872  // The register containing the result of the VCMP that we're going to
873  // replace.
874  Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
875 
876  // Build a VPNOT to replace the VCMP, reusing its operands.
877  MachineInstrBuilder MIBuilder =
878  BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
879  .add(Instr.getOperand(0))
880  .addReg(PrevVCMPResultReg);
881  addUnpredicatedMveVpredNOp(MIBuilder);
882  LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
883  MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
884  Instr.dump());
885 
886  // If we found an instruction that uses, and kills PrevVCMP's result,
887  // remove the kill flag.
888  if (PrevVCMPResultKiller)
889  PrevVCMPResultKiller->setIsKill(false);
890 
891  // Finally, mark the old VCMP for removal and reset
892  // PrevVCMP/PrevVCMPResultKiller.
893  DeadInstructions.push_back(&Instr);
894  PrevVCMP = nullptr;
895  PrevVCMPResultKiller = nullptr;
896  }
897 
898  for (MachineInstr *DeadInstruction : DeadInstructions)
899  DeadInstruction->eraseFromParent();
900 
901  return !DeadInstructions.empty();
902 }
903 
904 bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
905  MachineDominatorTree *DT) {
906  // Scan through the block, looking for instructions that use constants moves
907  // into VPR that are the negative of one another. These are expected to be
908  // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
909  // mask is kept it or and VPNOT's of it are added or reused as we scan through
910  // the function.
911  unsigned LastVPTImm = 0;
912  Register LastVPTReg = 0;
913  SmallSet<MachineInstr *, 4> DeadInstructions;
914 
915  for (MachineInstr &Instr : MBB.instrs()) {
916  // Look for predicated MVE instructions.
917  int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
918  if (PIdx == -1)
919  continue;
920  Register VPR = Instr.getOperand(PIdx + 1).getReg();
921  if (!VPR.isVirtual())
922  continue;
923 
924  // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
925  MachineInstr *Copy = MRI->getVRegDef(VPR);
926  if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
927  !Copy->getOperand(1).getReg().isVirtual() ||
928  MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
929  LastVPTReg = 0;
930  continue;
931  }
932  Register GPR = Copy->getOperand(1).getReg();
933 
934  // Find the Immediate used by the copy.
935  auto getImm = [&](Register GPR) -> unsigned {
936  MachineInstr *Def = MRI->getVRegDef(GPR);
937  if (Def && (Def->getOpcode() == ARM::t2MOVi ||
938  Def->getOpcode() == ARM::t2MOVi16))
939  return Def->getOperand(1).getImm();
940  return -1U;
941  };
942  unsigned Imm = getImm(GPR);
943  if (Imm == -1U) {
944  LastVPTReg = 0;
945  continue;
946  }
947 
948  unsigned NotImm = ~Imm & 0xffff;
949  if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
950  Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
951  if (MRI->use_empty(VPR)) {
952  DeadInstructions.insert(Copy);
953  if (MRI->hasOneUse(GPR))
954  DeadInstructions.insert(MRI->getVRegDef(GPR));
955  }
956  LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
957  } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
958  // We have found the not of a previous constant. Create a VPNot of the
959  // earlier predicate reg and use it instead of the copy.
960  Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
961  auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
962  TII->get(ARM::MVE_VPNOT), NewVPR)
963  .addReg(LastVPTReg);
965 
966  // Use the new register and check if the def is now dead.
967  Instr.getOperand(PIdx + 1).setReg(NewVPR);
968  if (MRI->use_empty(VPR)) {
969  DeadInstructions.insert(Copy);
970  if (MRI->hasOneUse(GPR))
971  DeadInstructions.insert(MRI->getVRegDef(GPR));
972  }
973  LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
974  << Instr);
975  VPR = NewVPR;
976  }
977 
978  LastVPTImm = Imm;
979  LastVPTReg = VPR;
980  }
981 
982  for (MachineInstr *DI : DeadInstructions)
983  DI->eraseFromParent();
984 
985  return !DeadInstructions.empty();
986 }
987 
988 // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
989 // somewhat blunt approximation to allow tail predicated with vpsel
990 // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
991 // different semantics under tail predication. Until that is modelled we just
992 // convert to a VMOVT (via a predicated VORR) instead.
993 bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
994  bool HasVCTP = false;
995  SmallVector<MachineInstr *, 4> DeadInstructions;
996 
997  for (MachineInstr &MI : MBB.instrs()) {
998  if (isVCTP(&MI)) {
999  HasVCTP = true;
1000  continue;
1001  }
1002 
1003  if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1004  continue;
1005 
1006  MachineInstrBuilder MIBuilder =
1007  BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1008  .add(MI.getOperand(0))
1009  .add(MI.getOperand(1))
1010  .add(MI.getOperand(1))
1012  .add(MI.getOperand(4))
1013  .add(MI.getOperand(5))
1014  .add(MI.getOperand(2));
1015  // Silence unused variable warning in release builds.
1016  (void)MIBuilder;
1017  LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1018  dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1019  DeadInstructions.push_back(&MI);
1020  }
1021 
1022  for (MachineInstr *DeadInstruction : DeadInstructions)
1023  DeadInstruction->eraseFromParent();
1024 
1025  return !DeadInstructions.empty();
1026 }
1027 
1028 // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1029 // the instruction may be removable as a noop.
1030 bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1031  bool Changed = false;
1032  for (MachineInstr &MI : MBB.instrs()) {
1033  if (MI.getOpcode() != ARM::t2DoLoopStart)
1034  continue;
1035  Register R = MI.getOperand(1).getReg();
1036  MachineFunction *MF = MI.getParent()->getParent();
1038  Changed = true;
1039  }
1040  return Changed;
1041 }
1042 
1043 bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1044  const ARMSubtarget &STI =
1045  static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1046 
1047  if (!STI.isThumb2() || !STI.hasLOB())
1048  return false;
1049 
1050  TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1051  MRI = &Fn.getRegInfo();
1052  MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1053  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
1054 
1055  LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1056  << "********** Function: " << Fn.getName() << '\n');
1057 
1058  bool Modified = false;
1059  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1060  Modified |= LowerWhileLoopStart(ML);
1061  Modified |= MergeLoopEnd(ML);
1062  Modified |= ConvertTailPredLoop(ML, DT);
1063  }
1064 
1065  for (MachineBasicBlock &MBB : Fn) {
1066  Modified |= HintDoLoopStartReg(MBB);
1067  Modified |= ReplaceConstByVPNOTs(MBB, DT);
1068  Modified |= ReplaceVCMPsByVPNOTs(MBB);
1069  Modified |= ReduceOldVCCRValueUses(MBB);
1070  Modified |= ConvertVPSEL(MBB);
1071  }
1072 
1073  LLVM_DEBUG(dbgs() << "**************************************\n");
1074  return Modified;
1075 }
1076 
1077 /// createMVETPAndVPTOptimisationsPass
1079  return new MVETPAndVPTOptimisations();
1080 }
ARMSubtarget.h
pass
ARM MVE TailPred and VPT Optimisations pass
Definition: MVETPAndVPTOptimisationsPass.cpp:98
IsVPNOTEquivalent
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
Definition: MVETPAndVPTOptimisationsPass.cpp:570
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:71
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MachineRegisterInfo::use_instr_nodbg_end
static use_instr_nodbg_iterator use_instr_nodbg_end()
Definition: MachineRegisterInfo.h:538
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ARMSubtarget
Definition: ARMSubtarget.h:46
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
IsWritingToVCCR
static bool IsWritingToVCCR(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:596
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:263
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:137
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:500
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::MachineRegisterInfo::use_nodbg_instructions
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:543
LookThroughCOPY
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
Definition: MVETPAndVPTOptimisationsPass.cpp:100
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1431
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
llvm::ARMSubtarget::getInstrInfo
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:562
llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:663
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ARMVCC::Then
@ Then
Definition: ARMBaseInfo.h:91
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
llvm::MachineDominatorTree::dominates
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
Definition: MachineDominators.h:109
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:288
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
SetLRPredicate
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:193
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
GetCondCode
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
Definition: MVETPAndVPTOptimisationsPass.cpp:563
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition: MachineInstrBuilder.h:116
llvm::ARMSubtarget::hasLOB
bool hasLOB() const
Definition: ARMSubtarget.h:660
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::ARMCC::EQ
@ EQ
Definition: ARMBaseInfo.h:31
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
MachineLoopInfo.h
ARMBaseInfo.h
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::User
Definition: User.h:44
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::RevertLoopDec
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
Definition: MVETailPredUtils.h:145
llvm::findFirstVPTPredOperandIdx
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
Definition: Thumb2InstrInfo.cpp:761
llvm::RevertLoopEnd
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Definition: MVETailPredUtils.h:167
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
false
Definition: StackSlotColoring.cpp:142
MVETailPredUtils.h
llvm::getVPTInstrPredicate
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
Definition: Thumb2InstrInfo.cpp:774
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
findLoopComponents
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
Definition: MVETPAndVPTOptimisationsPass.cpp:111
LoopDeletionResult::Modified
@ Modified
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:406
llvm::RegState::Define
@ Define
Register definition.
Definition: MachineInstrBuilder.h:44
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::VCMPOpcodeToVPT
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
Definition: ARMBaseInstrInfo.h:586
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
MoveVPNOTBeforeFirstUser
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
Definition: MVETPAndVPTOptimisationsPass.cpp:642
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ARMCC::AL
@ AL
Definition: ARMBaseInfo.h:45
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
RevertWhileLoopSetup
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETPAndVPTOptimisationsPass.cpp:187
llvm::cl::opt< bool >
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, "ARM MVE TailPred and VPT Optimisations pass", false, false) INITIALIZE_PASS_END(MVETPAndVPTOptimisations
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1630
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::RevertDoLoopStart
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
Definition: MVETailPredUtils.h:135
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::createMVETPAndVPTOptimisationsPass
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
Definition: MVETPAndVPTOptimisationsPass.cpp:1078
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::addUnpredicatedMveVpredNOp
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Definition: ARMBaseInstrInfo.cpp:867
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
MachineFunctionPass.h
llvm::MachineRegisterInfo::getRegClassOrNull
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
Definition: MachineRegisterInfo.h:651
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:541
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::isVCTP
static bool isVCTP(const MachineInstr *MI)
Definition: MVETailPredUtils.h:58
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::ARMVCC::None
@ None
Definition: ARMBaseInfo.h:90
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::ARMCC::getSwappedCondition
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition: ARMBaseInfo.h:71
ARM.h
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:242
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
IsVCMP
static bool IsVCMP(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:536
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm::MachineRegisterInfo::setRegAllocationHint
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
Definition: MachineRegisterInfo.h:765
llvm::RevertWhileLoopStartLR
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
Definition: MVETailPredUtils.h:98
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition: MachineInstrBuilder.h:89
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MVETPAndVPTOptimisationsPass.cpp:36
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1574
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Thumb2InstrInfo.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::Thumb2InstrInfo
Definition: Thumb2InstrInfo.h:23
llvm::ARMCC::CondCodes
CondCodes
Definition: ARMBaseInfo.h:30
llvm::MachineLoopInfo::getBase
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
Definition: MachineLoopInfo.h:106
llvm::MachineRegisterInfo::hasOneUse
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
Definition: MachineRegisterInfo.h:510
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MergeEndDec
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
CanHaveSwappedOperands
static bool CanHaveSwappedOperands(unsigned Opcode)
Definition: MVETPAndVPTOptimisationsPass.cpp:541
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:492
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:85
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::ARMCC::getOppositeCondition
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
llvm::ARMSubtarget::isThumb2
bool isThumb2() const
Definition: ARMSubtarget.h:812
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
MachineFunction.h
IsInvalidTPInstruction
static bool IsInvalidTPInstruction(MachineInstr &MI)
Definition: MVETPAndVPTOptimisationsPass.cpp:265
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:110
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::ARMRI::RegLR
@ RegLR
Definition: ARMBaseRegisterInfo.h:39
InitializePasses.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:314
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
MachineDominators.h
llvm::isLoopStart
static bool isLoopStart(const MachineInstr &MI)
Definition: MVETailPredUtils.h:82
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37