LLVM 22.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1//===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass does a few optimisations related to Tail predicated loops
10/// and MVE VPT blocks before register allocation is performed. For VPT blocks
11/// the goal is to maximize the sizes of the blocks that will be created by the
12/// MVE VPT Block Insertion pass (which runs after register allocation). For
13/// tail predicated loops we transform the loop into something that will
14/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15///
16//===----------------------------------------------------------------------===//
17
18#include "ARM.h"
19#include "ARMSubtarget.h"
20#include "MVETailPredUtils.h"
21#include "Thumb2InstrInfo.h"
30#include "llvm/Support/Debug.h"
31#include <cassert>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "arm-mve-vpt-opts"
36
37static cl::opt<bool>
38MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
39 cl::desc("Enable merging Loop End and Dec instructions."),
40 cl::init(true));
41
42static cl::opt<bool>
43SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
44 cl::desc("Enable setting lr as a predicate in tail predication regions."),
45 cl::init(true));
46
47namespace {
48class MVETPAndVPTOptimisations : public MachineFunctionPass {
49public:
50 static char ID;
51 const Thumb2InstrInfo *TII;
53
54 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
55
56 bool runOnMachineFunction(MachineFunction &Fn) override;
57
58 void getAnalysisUsage(AnalysisUsage &AU) const override {
59 AU.addRequired<MachineLoopInfoWrapperPass>();
60 AU.addPreserved<MachineLoopInfoWrapperPass>();
61 AU.addRequired<MachineDominatorTreeWrapperPass>();
62 AU.addPreserved<MachineDominatorTreeWrapperPass>();
64 }
65
66 StringRef getPassName() const override {
67 return "ARM MVE TailPred and VPT Optimisation Pass";
68 }
69
70private:
71 bool LowerWhileLoopStart(MachineLoop *ML);
72 bool MergeLoopEnd(MachineLoop *ML);
73 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
74 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
75 MachineInstr &Instr,
76 MachineOperand &User,
77 Register Target);
78 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
79 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
80 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
81 bool ConvertVPSEL(MachineBasicBlock &MBB);
82 bool HintDoLoopStartReg(MachineBasicBlock &MBB);
83 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
84 MachineInstr *LoopStart);
85};
86
87char MVETPAndVPTOptimisations::ID = 0;
88
89} // end anonymous namespace
90
91INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
92 "ARM MVE TailPred and VPT Optimisations pass", false,
93 false)
96INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
97 "ARM MVE TailPred and VPT Optimisations pass", false, false)
98
101 while (MI && MI->getOpcode() == TargetOpcode::COPY &&
102 MI->getOperand(1).getReg().isVirtual())
103 MI = MRI->getVRegDef(MI->getOperand(1).getReg());
104 return MI;
105}
106
107// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
108// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
109// at the moment, returning a t2DoLoopStart in LoopStart.
111 MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
112 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
113 MachineBasicBlock *Header = ML->getHeader();
114 MachineBasicBlock *Latch = ML->getLoopLatch();
115 if (!Header || !Latch) {
116 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
117 return false;
118 }
119
120 // Find the loop end from the terminators.
121 LoopEnd = nullptr;
122 for (auto &T : Latch->terminators()) {
123 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
124 LoopEnd = &T;
125 break;
126 }
127 if (T.getOpcode() == ARM::t2LoopEndDec &&
128 T.getOperand(2).getMBB() == Header) {
129 LoopEnd = &T;
130 break;
131 }
132 }
133 if (!LoopEnd) {
134 LLVM_DEBUG(dbgs() << " no LoopEnd\n");
135 return false;
136 }
137 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
138
139 // Find the dec from the use of the end. There may be copies between
140 // instructions. We expect the loop to loop like:
141 // $vs = t2DoLoopStart ...
142 // loop:
143 // $vp = phi [ $vs ], [ $vd ]
144 // ...
145 // $vd = t2LoopDec $vp
146 // ...
147 // t2LoopEnd $vd, loop
148 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
149 LoopDec = LoopEnd;
150 else {
151 LoopDec =
152 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
153 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
154 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
155 return false;
156 }
157 }
158 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
159
160 LoopPhi =
161 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
162 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
163 LoopPhi->getNumOperands() != 5 ||
164 (LoopPhi->getOperand(2).getMBB() != Latch &&
165 LoopPhi->getOperand(4).getMBB() != Latch)) {
166 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
167 return false;
168 }
169 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
170
171 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
172 ? LoopPhi->getOperand(3).getReg()
173 : LoopPhi->getOperand(1).getReg();
174 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
175 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
176 LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
177 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
178 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
179 return false;
180 }
181 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
182
183 return true;
184}
185
187 MachineBasicBlock *MBB = MI->getParent();
188 assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
189 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
190
191 // Subs
193 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
194 MIB.add(MI->getOperand(0));
195 MIB.add(MI->getOperand(1));
196 MIB.addImm(0);
197 MIB.addImm(ARMCC::AL);
198 MIB.addReg(ARM::NoRegister);
199 MIB.addReg(ARM::CPSR, RegState::Define);
200
201 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
202 for (MachineInstr &I : MBB->terminators()) {
203 if (I.getOpcode() == ARM::t2WhileLoopStart) {
205 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
206 MIB.add(MI->getOperand(1)); // branch target
207 MIB.addImm(ARMCC::EQ);
208 MIB.addReg(ARM::CPSR);
209 I.eraseFromParent();
210 break;
211 }
212 }
213
214 MI->eraseFromParent();
215}
216
217// The Hardware Loop insertion and ISel Lowering produce the pseudos for the
218// start of a while loop:
219// %a:gprlr = t2WhileLoopSetup %Cnt
220// t2WhileLoopStart %a, %BB
221// We want to convert those to a single instruction which, like t2LoopEndDec and
222// t2DoLoopStartTP is both a terminator and produces a value:
223// %a:grplr: t2WhileLoopStartLR %Cnt, %BB
224//
225// Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
226// t2WhileLoopStart are not valid past regalloc.
227bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
228 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
229 << ML->getHeader()->getName() << "\n");
230
231 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
232 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
233 return false;
234
235 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
236 return false;
237
238 Register LR = LoopStart->getOperand(0).getReg();
239 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
240 return MI.getOpcode() == ARM::t2WhileLoopStart;
241 });
242 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
243 RevertWhileLoopSetup(LoopStart, TII);
244 RevertLoopDec(LoopStart, TII);
245 RevertLoopEnd(LoopStart, TII);
246 return true;
247 }
248
249 MachineInstrBuilder MI =
250 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
251 TII->get(ARM::t2WhileLoopStartLR), LR)
252 .add(LoopStart->getOperand(1))
253 .add(WLSIt->getOperand(1));
254 (void)MI;
255 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
256
257 WLSIt->eraseFromParent();
258 LoopStart->eraseFromParent();
259 return true;
260}
261
262// Return true if this instruction is invalid in a low overhead loop, usually
263// because it clobbers LR.
265 return MI.isCall() || isLoopStart(MI);
266}
267
268// Starting from PreHeader, search for invalid instructions back until the
269// LoopStart block is reached. If invalid instructions are found, the loop start
270// is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
271// return the new DLS LoopStart if updated.
272MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
273 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
275 SmallPtrSet<MachineBasicBlock *, 4> Visited;
276 Worklist.push_back(PreHeader);
277 Visited.insert(LoopStart->getParent());
278
279 while (!Worklist.empty()) {
280 MachineBasicBlock *MBB = Worklist.pop_back_val();
281 if (Visited.count(MBB))
282 continue;
283
284 for (MachineInstr &MI : *MBB) {
286 continue;
287
288 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
289
290 // Create a t2DoLoopStart at the end of the preheader.
291 MachineInstrBuilder MIB =
292 BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
293 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
294 MIB.add(LoopStart->getOperand(0));
295 MIB.add(LoopStart->getOperand(1));
296
297 // Make sure to remove the kill flags, to prevent them from being invalid.
298 LoopStart->getOperand(1).setIsKill(false);
299
300 // Revert the t2WhileLoopStartLR to a CMP and Br.
301 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
302 return MIB;
303 }
304
305 Visited.insert(MBB);
306 llvm::append_range(Worklist, MBB->predecessors());
307 }
308 return LoopStart;
309}
310
311// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
312// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
313// will be valid to be used for the low overhead loop, which means nothing else
314// is using LR (especially calls) and there are no superfluous copies in the
315// loop. The t2LoopEndDec is a branching terminator that produces a value (the
316// decrement) around the loop edge, which means we need to be careful that they
317// will be valid to allocate without any spilling.
318bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
319 if (!MergeEndDec)
320 return false;
321
322 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
323 << "\n");
324
325 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
326 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
327 return false;
328
329 // Check if there is an illegal instruction (a call) in the low overhead loop
330 // and if so revert it now before we get any further. While loops also need to
331 // check the preheaders, but can be reverted to a DLS loop if needed.
332 auto *PreHeader = ML->getLoopPreheader();
333 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
334 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
335
336 for (MachineBasicBlock *MBB : ML->blocks()) {
337 for (MachineInstr &MI : *MBB) {
339 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
340 if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
341 RevertDoLoopStart(LoopStart, TII);
342 else
343 RevertWhileLoopStartLR(LoopStart, TII);
344 RevertLoopDec(LoopDec, TII);
345 RevertLoopEnd(LoopEnd, TII);
346 return true;
347 }
348 }
349 }
350
351 // Remove any copies from the loop, to ensure the phi that remains is both
352 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
353 // that cannot spill, we need to be careful what remains in the loop.
354 Register PhiReg = LoopPhi->getOperand(0).getReg();
355 Register DecReg = LoopDec->getOperand(0).getReg();
356 Register StartReg = LoopStart->getOperand(0).getReg();
357 // Ensure the uses are expected, and collect any copies we want to remove.
358 SmallVector<MachineInstr *, 4> Copies;
359 auto CheckUsers = [&Copies](Register BaseReg,
360 ArrayRef<MachineInstr *> ExpectedUsers,
361 MachineRegisterInfo *MRI) {
363 Worklist.push_back(BaseReg);
364 while (!Worklist.empty()) {
365 Register Reg = Worklist.pop_back_val();
366 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
367 if (llvm::is_contained(ExpectedUsers, &MI))
368 continue;
369 if (MI.getOpcode() != TargetOpcode::COPY ||
370 !MI.getOperand(0).getReg().isVirtual()) {
371 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
372 return false;
373 }
374 Worklist.push_back(MI.getOperand(0).getReg());
375 Copies.push_back(&MI);
376 }
377 }
378 return true;
379 };
380 if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
381 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
382 !CheckUsers(StartReg, {LoopPhi}, MRI)) {
383 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
384 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
385 RevertWhileLoopStartLR(LoopStart, TII);
386 RevertLoopDec(LoopDec, TII);
387 RevertLoopEnd(LoopEnd, TII);
388 return true;
389 }
390 return false;
391 }
392
393 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
394 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
395 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
396
397 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
398 LoopPhi->getOperand(3).setReg(StartReg);
399 LoopPhi->getOperand(1).setReg(DecReg);
400 } else {
401 LoopPhi->getOperand(1).setReg(StartReg);
402 LoopPhi->getOperand(3).setReg(DecReg);
403 }
404
405 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
406 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
407 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
408 // If the LoopEnd falls through, need to insert a t2B to the fall-through
409 // block so that the non-analyzable t2LoopEndDec doesn't fall through.
411 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
412 .addMBB(&*MBBI)
414 }
415
416 // Replace the loop dec and loop end as a single instruction.
417 MachineInstrBuilder MI =
418 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
419 TII->get(ARM::t2LoopEndDec), DecReg)
420 .addReg(PhiReg)
421 .add(LoopEnd->getOperand(1));
422 (void)MI;
423 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
424
425 LoopDec->eraseFromParent();
426 LoopEnd->eraseFromParent();
427 for (auto *MI : Copies)
428 MI->eraseFromParent();
429 return true;
430}
431
432// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
433// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
434// instruction, making the backend ARMLowOverheadLoops passes job of finding the
435// VCTP operand much simpler.
436bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
437 MachineDominatorTree *DT) {
438 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
439 << ML->getHeader()->getName() << "\n");
440
441 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
442 // in the loop.
443 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
444 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
445 return false;
446 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
447 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
448 return false;
449
450 SmallVector<MachineInstr *, 4> VCTPs;
451 SmallVector<MachineInstr *, 4> MVEInstrs;
452 for (MachineBasicBlock *BB : ML->blocks()) {
453 for (MachineInstr &MI : *BB)
454 if (isVCTP(&MI))
455 VCTPs.push_back(&MI);
456 else if (findFirstVPTPredOperandIdx(MI) != -1)
457 MVEInstrs.push_back(&MI);
458 }
459
460 if (VCTPs.empty()) {
461 LLVM_DEBUG(dbgs() << " no VCTPs\n");
462 return false;
463 }
464
465 // Check all VCTPs are the same.
466 MachineInstr *FirstVCTP = *VCTPs.begin();
467 for (MachineInstr *VCTP : VCTPs) {
468 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
469 if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
470 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
471 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
472 return false;
473 }
474 }
475
476 // Check for the register being used can be setup before the loop. We expect
477 // this to be:
478 // $vx = ...
479 // loop:
480 // $vp = PHI [ $vx ], [ $vd ]
481 // ..
482 // $vpr = VCTP $vp
483 // ..
484 // $vd = t2SUBri $vp, #n
485 // ..
486 Register CountReg = FirstVCTP->getOperand(1).getReg();
487 if (!CountReg.isVirtual()) {
488 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
489 return false;
490 }
491 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
492 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
493 Phi->getNumOperands() != 5 ||
494 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
495 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
496 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
497 return false;
498 }
499 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
500 ? Phi->getOperand(3).getReg()
501 : Phi->getOperand(1).getReg();
502
503 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
504 // the preheader and add the new CountReg to it. We attempt to place it late
505 // in the preheader, but may need to move that earlier based on uses.
506 MachineBasicBlock *MBB = LoopStart->getParent();
508 for (MachineInstr &Use :
509 MRI->use_instructions(LoopStart->getOperand(0).getReg()))
510 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
511 !DT->dominates(ML->getHeader(), Use.getParent())) {
512 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
513 return false;
514 }
515
516 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
517 ? ARM::t2DoLoopStartTP
518 : ARM::t2WhileLoopStartTP;
519 MachineInstrBuilder MI =
520 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
521 .add(LoopStart->getOperand(0))
522 .add(LoopStart->getOperand(1))
523 .addReg(CountReg);
524 if (NewOpc == ARM::t2WhileLoopStartTP)
525 MI.add(LoopStart->getOperand(2));
526 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
527 << *MI.getInstr());
528 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
529 LoopStart->eraseFromParent();
530
531 if (SetLRPredicate) {
532 // Each instruction in the loop needs to be using LR as the predicate from
533 // the Phi as the predicate.
534 Register LR = LoopPhi->getOperand(0).getReg();
535 for (MachineInstr *MI : MVEInstrs) {
536 int Idx = findFirstVPTPredOperandIdx(*MI);
537 MI->getOperand(Idx + 2).setReg(LR);
538 }
539 }
540
541 return true;
542}
543
544// Returns true if Opcode is any VCMP Opcode.
545static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
546
547// Returns true if a VCMP with this Opcode can have its operands swapped.
548// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
549// and VCMPr instructions (since the r is always on the right).
550static bool CanHaveSwappedOperands(unsigned Opcode) {
551 switch (Opcode) {
552 default:
553 return true;
554 case ARM::MVE_VCMPf32:
555 case ARM::MVE_VCMPf16:
556 case ARM::MVE_VCMPf32r:
557 case ARM::MVE_VCMPf16r:
558 case ARM::MVE_VCMPi8r:
559 case ARM::MVE_VCMPi16r:
560 case ARM::MVE_VCMPi32r:
561 case ARM::MVE_VCMPu8r:
562 case ARM::MVE_VCMPu16r:
563 case ARM::MVE_VCMPu32r:
564 case ARM::MVE_VCMPs8r:
565 case ARM::MVE_VCMPs16r:
566 case ARM::MVE_VCMPs32r:
567 return false;
568 }
569}
570
571// Returns the CondCode of a VCMP Instruction.
573 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
574 return ARMCC::CondCodes(Instr.getOperand(3).getImm());
575}
576
577// Returns true if Cond is equivalent to a VPNOT instruction on the result of
578// Prev. Cond and Prev must be VCMPs.
580 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
581
582 // Opcodes must match.
583 if (Cond.getOpcode() != Prev.getOpcode())
584 return false;
585
586 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
587 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
588
589 // If the VCMP has the opposite condition with the same operands, we can
590 // replace it with a VPNOT
591 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
592 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
593 if (ExpectedCode == GetCondCode(Prev))
594 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
595 return true;
596 // Check again with operands swapped if possible
597 if (!CanHaveSwappedOperands(Cond.getOpcode()))
598 return false;
599 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
600 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
601 CondOP2.isIdenticalTo(PrevOP1);
602}
603
604// Returns true if Instr writes to VCCR.
605static bool IsWritingToVCCR(MachineInstr &Instr) {
606 if (Instr.getNumOperands() == 0)
607 return false;
608 MachineOperand &Dst = Instr.getOperand(0);
609 if (!Dst.isReg())
610 return false;
611 Register DstReg = Dst.getReg();
612 if (!DstReg.isVirtual())
613 return false;
614 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
615 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
616 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
617}
618
619// Transforms
620// <Instr that uses %A ('User' Operand)>
621// Into
622// %K = VPNOT %Target
623// <Instr that uses %K ('User' Operand)>
624// And returns the newly inserted VPNOT.
625// This optimization is done in the hopes of preventing spills/reloads of VPR by
626// reducing the number of VCCR values with overlapping lifetimes.
627MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
628 MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
629 Register Target) {
630 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
631
632 MachineInstrBuilder MIBuilder =
633 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
634 .addDef(NewResult)
635 .addReg(Target);
637
638 // Make the user use NewResult instead, and clear its kill flag.
639 User.setReg(NewResult);
640 User.setIsKill(false);
641
642 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
643 MIBuilder.getInstr()->dump());
644
645 return *MIBuilder.getInstr();
646}
647
648// Moves a VPNOT before its first user if an instruction that uses Reg is found
649// in-between the VPNOT and its user.
650// Returns true if there is at least one user of the VPNOT in the block.
653 Register Reg) {
654 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
656 "The VPNOT cannot be predicated");
657
658 MachineInstr &VPNOT = *Iter;
659 Register VPNOTResult = VPNOT.getOperand(0).getReg();
660 Register VPNOTOperand = VPNOT.getOperand(1).getReg();
661
662 // Whether the VPNOT will need to be moved, and whether we found a user of the
663 // VPNOT.
664 bool MustMove = false, HasUser = false;
665 MachineOperand *VPNOTOperandKiller = nullptr;
666 for (; Iter != MBB.end(); ++Iter) {
667 if (MachineOperand *MO =
668 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
669 /*isKill*/ true)) {
670 // If we find the operand that kills the VPNOTOperand's result, save it.
671 VPNOTOperandKiller = MO;
672 }
673
674 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
675 MustMove = true;
676 continue;
677 }
678
679 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
680 continue;
681
682 HasUser = true;
683 if (!MustMove)
684 break;
685
686 // Move the VPNOT right before Iter
687 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
688 Iter->dump());
689 MBB.splice(Iter, &MBB, VPNOT.getIterator());
690 // If we move the instr, and its operand was killed earlier, remove the kill
691 // flag.
692 if (VPNOTOperandKiller)
693 VPNOTOperandKiller->setIsKill(false);
694
695 break;
696 }
697 return HasUser;
698}
699
700// This optimisation attempts to reduce the number of overlapping lifetimes of
701// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
702// this replaces
703// %A:vccr = (something)
704// %B:vccr = VPNOT %A
705// %Foo = (some op that uses %B)
706// %Bar = (some op that uses %A)
707// With
708// %A:vccr = (something)
709// %B:vccr = VPNOT %A
710// %Foo = (some op that uses %B)
711// %TMP2:vccr = VPNOT %B
712// %Bar = (some op that uses %A)
713bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
714 MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
715 SmallVector<MachineInstr *, 4> DeadInstructions;
716 bool Modified = false;
717
718 while (Iter != End) {
719 Register VCCRValue, OppositeVCCRValue;
720 // The first loop looks for 2 unpredicated instructions:
721 // %A:vccr = (instr) ; A is stored in VCCRValue
722 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
723 for (; Iter != End; ++Iter) {
724 // We're only interested in unpredicated instructions that write to VCCR.
725 if (!IsWritingToVCCR(*Iter) ||
727 continue;
728 Register Dst = Iter->getOperand(0).getReg();
729
730 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
731 // found what we were looking for.
732 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
733 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
734 // Move the VPNOT closer to its first user if needed, and ignore if it
735 // has no users.
736 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
737 continue;
738
739 OppositeVCCRValue = Dst;
740 ++Iter;
741 break;
742 }
743
744 // Else, just set VCCRValue.
745 VCCRValue = Dst;
746 }
747
748 // If the first inner loop didn't find anything, stop here.
749 if (Iter == End)
750 break;
751
752 assert(VCCRValue && OppositeVCCRValue &&
753 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
754 "stopped before the end of the block!");
755 assert(VCCRValue != OppositeVCCRValue &&
756 "VCCRValue should not be equal to OppositeVCCRValue!");
757
758 // LastVPNOTResult always contains the same value as OppositeVCCRValue.
759 Register LastVPNOTResult = OppositeVCCRValue;
760
761 // This second loop tries to optimize the remaining instructions.
762 for (; Iter != End; ++Iter) {
763 bool IsInteresting = false;
764
765 if (MachineOperand *MO =
766 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
767 IsInteresting = true;
768
769 // - If the instruction is a VPNOT, it can be removed, and we can just
770 // replace its uses with LastVPNOTResult.
771 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
772 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
773 Register Result = Iter->getOperand(0).getReg();
774
775 MRI->replaceRegWith(Result, LastVPNOTResult);
776 DeadInstructions.push_back(&*Iter);
777 Modified = true;
778
780 << "Replacing all uses of '" << printReg(Result)
781 << "' with '" << printReg(LastVPNOTResult) << "'\n");
782 } else {
783 MachineInstr &VPNOT =
784 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
785 Modified = true;
786
787 LastVPNOTResult = VPNOT.getOperand(0).getReg();
788 std::swap(VCCRValue, OppositeVCCRValue);
789
790 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
791 << "' with '" << printReg(LastVPNOTResult)
792 << "' in instr: " << *Iter);
793 }
794 } else {
795 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
796 // instead as they contain the same value.
797 if (MachineOperand *MO = Iter->findRegisterUseOperand(
798 OppositeVCCRValue, /*TRI=*/nullptr)) {
799 IsInteresting = true;
800
801 // This is pointless if LastVPNOTResult == OppositeVCCRValue.
802 if (LastVPNOTResult != OppositeVCCRValue) {
803 LLVM_DEBUG(dbgs() << "Replacing usage of '"
804 << printReg(OppositeVCCRValue) << "' with '"
805 << printReg(LastVPNOTResult) << " for instr: ";
806 Iter->dump());
807 MO->setReg(LastVPNOTResult);
808 Modified = true;
809 }
810
811 MO->setIsKill(false);
812 }
813
814 // If this is an unpredicated VPNOT on
815 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
816 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
818 Register VPNOTOperand = Iter->getOperand(1).getReg();
819 if (VPNOTOperand == LastVPNOTResult ||
820 VPNOTOperand == OppositeVCCRValue) {
821 IsInteresting = true;
822
823 std::swap(VCCRValue, OppositeVCCRValue);
824 LastVPNOTResult = Iter->getOperand(0).getReg();
825 }
826 }
827 }
828
829 // If this instruction was not interesting, and it writes to VCCR, stop.
830 if (!IsInteresting && IsWritingToVCCR(*Iter))
831 break;
832 }
833 }
834
835 for (MachineInstr *DeadInstruction : DeadInstructions)
836 DeadInstruction->eraseFromParent();
837
838 return Modified;
839}
840
841// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
842bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
843 SmallVector<MachineInstr *, 4> DeadInstructions;
844
845 // The last VCMP that we have seen and that couldn't be replaced.
846 // This is reset when an instruction that writes to VCCR/VPR is found, or when
847 // a VCMP is replaced with a VPNOT.
848 // We'll only replace VCMPs with VPNOTs when this is not null, and when the
849 // current VCMP is the opposite of PrevVCMP.
850 MachineInstr *PrevVCMP = nullptr;
851 // If we find an instruction that kills the result of PrevVCMP, we save the
852 // operand here to remove the kill flag in case we need to use PrevVCMP's
853 // result.
854 MachineOperand *PrevVCMPResultKiller = nullptr;
855
856 for (MachineInstr &Instr : MBB.instrs()) {
857 if (PrevVCMP) {
858 if (MachineOperand *MO =
859 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
860 /*TRI=*/nullptr, /*isKill*/ true)) {
861 // If we come accross the instr that kills PrevVCMP's result, record it
862 // so we can remove the kill flag later if we need to.
863 PrevVCMPResultKiller = MO;
864 }
865 }
866
867 // Ignore predicated instructions.
869 continue;
870
871 // Only look at VCMPs
872 if (!IsVCMP(Instr.getOpcode())) {
873 // If the instruction writes to VCCR, forget the previous VCMP.
874 if (IsWritingToVCCR(Instr))
875 PrevVCMP = nullptr;
876 continue;
877 }
878
879 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
880 PrevVCMP = &Instr;
881 continue;
882 }
883
884 // The register containing the result of the VCMP that we're going to
885 // replace.
886 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
887
888 // Build a VPNOT to replace the VCMP, reusing its operands.
889 MachineInstrBuilder MIBuilder =
890 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
891 .add(Instr.getOperand(0))
892 .addReg(PrevVCMPResultReg);
894 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
895 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
896 Instr.dump());
897
898 // If we found an instruction that uses, and kills PrevVCMP's result,
899 // remove the kill flag.
900 if (PrevVCMPResultKiller)
901 PrevVCMPResultKiller->setIsKill(false);
902
903 // Finally, mark the old VCMP for removal and reset
904 // PrevVCMP/PrevVCMPResultKiller.
905 DeadInstructions.push_back(&Instr);
906 PrevVCMP = nullptr;
907 PrevVCMPResultKiller = nullptr;
908 }
909
910 for (MachineInstr *DeadInstruction : DeadInstructions)
911 DeadInstruction->eraseFromParent();
912
913 return !DeadInstructions.empty();
914}
915
916bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
917 MachineDominatorTree *DT) {
918 // Scan through the block, looking for instructions that use constants moves
919 // into VPR that are the negative of one another. These are expected to be
920 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
921 // mask is kept it or and VPNOT's of it are added or reused as we scan through
922 // the function.
923 unsigned LastVPTImm = 0;
924 Register LastVPTReg = 0;
925 SmallPtrSet<MachineInstr *, 4> DeadInstructions;
926
927 for (MachineInstr &Instr : MBB.instrs()) {
928 // Look for predicated MVE instructions.
929 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
930 if (PIdx == -1)
931 continue;
932 Register VPR = Instr.getOperand(PIdx + 1).getReg();
933 if (!VPR.isVirtual())
934 continue;
935
936 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
937 MachineInstr *Copy = MRI->getVRegDef(VPR);
938 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
939 !Copy->getOperand(1).getReg().isVirtual() ||
940 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
941 LastVPTReg = 0;
942 continue;
943 }
944 Register GPR = Copy->getOperand(1).getReg();
945
946 // Find the Immediate used by the copy.
947 auto getImm = [&](Register GPR) -> unsigned {
948 MachineInstr *Def = MRI->getVRegDef(GPR);
949 if (Def && (Def->getOpcode() == ARM::t2MOVi ||
950 Def->getOpcode() == ARM::t2MOVi16))
951 return Def->getOperand(1).getImm();
952 return -1U;
953 };
954 unsigned Imm = getImm(GPR);
955 if (Imm == -1U) {
956 LastVPTReg = 0;
957 continue;
958 }
959
960 unsigned NotImm = ~Imm & 0xffff;
961 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
962 MRI->clearKillFlags(LastVPTReg);
963 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
964 if (MRI->use_empty(VPR)) {
965 DeadInstructions.insert(Copy);
966 if (MRI->hasOneUse(GPR))
967 DeadInstructions.insert(MRI->getVRegDef(GPR));
968 }
969 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
970 VPR = LastVPTReg;
971 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
972 // We have found the not of a previous constant. Create a VPNot of the
973 // earlier predicate reg and use it instead of the copy.
974 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
975 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
976 TII->get(ARM::MVE_VPNOT), NewVPR)
977 .addReg(LastVPTReg);
979
980 // Use the new register and check if the def is now dead.
981 Instr.getOperand(PIdx + 1).setReg(NewVPR);
982 if (MRI->use_empty(VPR)) {
983 DeadInstructions.insert(Copy);
984 if (MRI->hasOneUse(GPR))
985 DeadInstructions.insert(MRI->getVRegDef(GPR));
986 }
987 MRI->clearKillFlags(LastVPTReg);
988 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
989 << Instr);
990 VPR = NewVPR;
991 }
992
993 LastVPTImm = Imm;
994 LastVPTReg = VPR;
995 }
996
997 for (MachineInstr *DI : DeadInstructions)
998 DI->eraseFromParent();
999
1000 return !DeadInstructions.empty();
1001}
1002
1003// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1004// somewhat blunt approximation to allow tail predicated with vpsel
1005// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1006// different semantics under tail predication. Until that is modelled we just
1007// convert to a VMOVT (via a predicated VORR) instead.
1008bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1009 bool HasVCTP = false;
1010 SmallVector<MachineInstr *, 4> DeadInstructions;
1011
1012 for (MachineInstr &MI : MBB.instrs()) {
1013 if (isVCTP(&MI)) {
1014 HasVCTP = true;
1015 continue;
1016 }
1017
1018 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1019 continue;
1020
1021 MachineInstrBuilder MIBuilder =
1022 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1023 .add(MI.getOperand(0))
1024 .add(MI.getOperand(1))
1025 .add(MI.getOperand(1))
1027 .add(MI.getOperand(4))
1028 .add(MI.getOperand(5))
1029 .add(MI.getOperand(2));
1030 // Silence unused variable warning in release builds.
1031 (void)MIBuilder;
1032 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1033 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1034 DeadInstructions.push_back(&MI);
1035 }
1036
1037 for (MachineInstr *DeadInstruction : DeadInstructions)
1038 DeadInstruction->eraseFromParent();
1039
1040 return !DeadInstructions.empty();
1041}
1042
1043// Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1044// the instruction may be removable as a noop.
1045bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1046 bool Changed = false;
1047 for (MachineInstr &MI : MBB.instrs()) {
1048 if (MI.getOpcode() != ARM::t2DoLoopStart)
1049 continue;
1050 Register R = MI.getOperand(1).getReg();
1051 MachineFunction *MF = MI.getParent()->getParent();
1053 Changed = true;
1054 }
1055 return Changed;
1056}
1057
1058bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1059 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1060
1061 if (!STI.isThumb2() || !STI.hasLOB())
1062 return false;
1063
1064 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1065 MRI = &Fn.getRegInfo();
1066 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1067 MachineDominatorTree *DT =
1068 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1069
1070 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1071 << "********** Function: " << Fn.getName() << '\n');
1072
1073 bool Modified = false;
1074 for (MachineLoop *ML : MLI->getLoopsInPreorder()) {
1075 Modified |= LowerWhileLoopStart(ML);
1076 Modified |= MergeLoopEnd(ML);
1077 Modified |= ConvertTailPredLoop(ML, DT);
1078 }
1079
1080 for (MachineBasicBlock &MBB : Fn) {
1081 Modified |= HintDoLoopStartReg(MBB);
1082 Modified |= ReplaceConstByVPNOTs(MBB, DT);
1083 Modified |= ReplaceVCMPsByVPNOTs(MBB);
1084 Modified |= ReduceOldVCCRValueUses(MBB);
1085 Modified |= ConvertVPSEL(MBB);
1086 }
1087
1088 LLVM_DEBUG(dbgs() << "**************************************\n");
1089 return Modified;
1090}
1091
1092/// createMVETPAndVPTOptimisationsPass
1094 return new MVETPAndVPTOptimisations();
1095}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
static bool IsWritingToVCCR(MachineInstr &Instr)
static bool CanHaveSwappedOperands(unsigned Opcode)
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
SI Lower i1 Copies
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
const ARMBaseInstrInfo * getInstrInfo() const override
bool isThumb2() const
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
BasicBlockListType::iterator iterator
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
void dump() const
Definition Pass.cpp:146
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
self_iterator getIterator()
Definition ilist_node.h:130
Changed
static CondCodes getOppositeCondition(CondCodes CC)
Definition ARMBaseInfo.h:48
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition ARMBaseInfo.h:71
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static bool isLoopStart(const MachineInstr &MI)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
ArrayRef(const T &OneElt) -> ArrayRef< T >
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1877
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853