LLVM 19.0.0git
MVETPAndVPTOptimisationsPass.cpp
Go to the documentation of this file.
1//===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass does a few optimisations related to Tail predicated loops
10/// and MVE VPT blocks before register allocation is performed. For VPT blocks
11/// the goal is to maximize the sizes of the blocks that will be created by the
12/// MVE VPT Block Insertion pass (which runs after register allocation). For
13/// tail predicated loops we transform the loop into something that will
14/// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15///
16//===----------------------------------------------------------------------===//
17
18#include "ARM.h"
19#include "ARMSubtarget.h"
21#include "MVETailPredUtils.h"
22#include "Thumb2InstrInfo.h"
31#include "llvm/Support/Debug.h"
32#include <cassert>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "arm-mve-vpt-opts"
37
38static cl::opt<bool>
39MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40 cl::desc("Enable merging Loop End and Dec instructions."),
41 cl::init(true));
42
43static cl::opt<bool>
44SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45 cl::desc("Enable setting lr as a predicate in tail predication regions."),
46 cl::init(true));
47
48namespace {
49class MVETPAndVPTOptimisations : public MachineFunctionPass {
50public:
51 static char ID;
52 const Thumb2InstrInfo *TII;
54
55 MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56
57 bool runOnMachineFunction(MachineFunction &Fn) override;
58
59 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 }
66
67 StringRef getPassName() const override {
68 return "ARM MVE TailPred and VPT Optimisation Pass";
69 }
70
71private:
72 bool LowerWhileLoopStart(MachineLoop *ML);
73 bool MergeLoopEnd(MachineLoop *ML);
74 bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75 MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76 MachineInstr &Instr,
79 bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80 bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81 bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82 bool ConvertVPSEL(MachineBasicBlock &MBB);
83 bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84 MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85 MachineInstr *LoopStart);
86};
87
88char MVETPAndVPTOptimisations::ID = 0;
89
90} // end anonymous namespace
91
92INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93 "ARM MVE TailPred and VPT Optimisations pass", false,
94 false)
97INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98 "ARM MVE TailPred and VPT Optimisations pass", false, false)
99
102 while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103 MI->getOperand(1).getReg().isVirtual())
104 MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105 return MI;
106}
107
108// Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109// corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110// at the moment, returning a t2DoLoopStart in LoopStart.
112 MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113 MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114 MachineBasicBlock *Header = ML->getHeader();
115 MachineBasicBlock *Latch = ML->getLoopLatch();
116 if (!Header || !Latch) {
117 LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
118 return false;
119 }
120
121 // Find the loop end from the terminators.
122 LoopEnd = nullptr;
123 for (auto &T : Latch->terminators()) {
124 if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125 LoopEnd = &T;
126 break;
127 }
128 if (T.getOpcode() == ARM::t2LoopEndDec &&
129 T.getOperand(2).getMBB() == Header) {
130 LoopEnd = &T;
131 break;
132 }
133 }
134 if (!LoopEnd) {
135 LLVM_DEBUG(dbgs() << " no LoopEnd\n");
136 return false;
137 }
138 LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
139
140 // Find the dec from the use of the end. There may be copies between
141 // instructions. We expect the loop to loop like:
142 // $vs = t2DoLoopStart ...
143 // loop:
144 // $vp = phi [ $vs ], [ $vd ]
145 // ...
146 // $vd = t2LoopDec $vp
147 // ...
148 // t2LoopEnd $vd, loop
149 if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150 LoopDec = LoopEnd;
151 else {
152 LoopDec =
153 LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
154 if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155 LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
156 return false;
157 }
158 }
159 LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
160
161 LoopPhi =
162 LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
163 if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164 LoopPhi->getNumOperands() != 5 ||
165 (LoopPhi->getOperand(2).getMBB() != Latch &&
166 LoopPhi->getOperand(4).getMBB() != Latch)) {
167 LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
168 return false;
169 }
170 LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
171
172 Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173 ? LoopPhi->getOperand(3).getReg()
174 : LoopPhi->getOperand(1).getReg();
175 LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176 if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177 LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179 LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
180 return false;
181 }
182 LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
183
184 return true;
185}
186
188 MachineBasicBlock *MBB = MI->getParent();
189 assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190 "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191
192 // Subs
194 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195 MIB.add(MI->getOperand(0));
196 MIB.add(MI->getOperand(1));
197 MIB.addImm(0);
198 MIB.addImm(ARMCC::AL);
199 MIB.addReg(ARM::NoRegister);
200 MIB.addReg(ARM::CPSR, RegState::Define);
201
202 // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203 for (MachineInstr &I : MBB->terminators()) {
204 if (I.getOpcode() == ARM::t2WhileLoopStart) {
206 BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207 MIB.add(MI->getOperand(1)); // branch target
208 MIB.addImm(ARMCC::EQ);
209 MIB.addReg(ARM::CPSR);
210 I.eraseFromParent();
211 break;
212 }
213 }
214
215 MI->eraseFromParent();
216}
217
218// The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219// start of a while loop:
220// %a:gprlr = t2WhileLoopSetup %Cnt
221// t2WhileLoopStart %a, %BB
222// We want to convert those to a single instruction which, like t2LoopEndDec and
223// t2DoLoopStartTP is both a terminator and produces a value:
224// %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225//
226// Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227// t2WhileLoopStart are not valid past regalloc.
228bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229 LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230 << ML->getHeader()->getName() << "\n");
231
232 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234 return false;
235
236 if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237 return false;
238
239 Register LR = LoopStart->getOperand(0).getReg();
240 auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241 return MI.getOpcode() == ARM::t2WhileLoopStart;
242 });
243 if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244 RevertWhileLoopSetup(LoopStart, TII);
245 RevertLoopDec(LoopStart, TII);
246 RevertLoopEnd(LoopStart, TII);
247 return true;
248 }
249
251 BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252 TII->get(ARM::t2WhileLoopStartLR), LR)
253 .add(LoopStart->getOperand(1))
254 .add(WLSIt->getOperand(1));
255 (void)MI;
256 LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257
258 WLSIt->eraseFromParent();
259 LoopStart->eraseFromParent();
260 return true;
261}
262
263// Return true if this instruction is invalid in a low overhead loop, usually
264// because it clobbers LR.
266 return MI.isCall() || isLoopStart(MI);
267}
268
269// Starting from PreHeader, search for invalid instructions back until the
270// LoopStart block is reached. If invalid instructions are found, the loop start
271// is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272// return the new DLS LoopStart if updated.
273MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274 MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
277 Worklist.push_back(PreHeader);
278 Visited.insert(LoopStart->getParent());
279
280 while (!Worklist.empty()) {
281 MachineBasicBlock *MBB = Worklist.pop_back_val();
282 if (Visited.count(MBB))
283 continue;
284
285 for (MachineInstr &MI : *MBB) {
287 continue;
288
289 LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290
291 // Create a t2DoLoopStart at the end of the preheader.
293 BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294 LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295 MIB.add(LoopStart->getOperand(0));
296 MIB.add(LoopStart->getOperand(1));
297
298 // Make sure to remove the kill flags, to prevent them from being invalid.
299 LoopStart->getOperand(1).setIsKill(false);
300
301 // Revert the t2WhileLoopStartLR to a CMP and Br.
302 RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303 return MIB;
304 }
305
306 Visited.insert(MBB);
307 for (auto *Pred : MBB->predecessors())
308 Worklist.push_back(Pred);
309 }
310 return LoopStart;
311}
312
313// This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314// a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315// will be valid to be used for the low overhead loop, which means nothing else
316// is using LR (especially calls) and there are no superfluous copies in the
317// loop. The t2LoopEndDec is a branching terminator that produces a value (the
318// decrement) around the loop edge, which means we need to be careful that they
319// will be valid to allocate without any spilling.
320bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321 if (!MergeEndDec)
322 return false;
323
324 LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325 << "\n");
326
327 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329 return false;
330
331 // Check if there is an illegal instruction (a call) in the low overhead loop
332 // and if so revert it now before we get any further. While loops also need to
333 // check the preheaders, but can be reverted to a DLS loop if needed.
334 auto *PreHeader = ML->getLoopPreheader();
335 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336 LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337
338 for (MachineBasicBlock *MBB : ML->blocks()) {
339 for (MachineInstr &MI : *MBB) {
341 LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342 if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343 RevertDoLoopStart(LoopStart, TII);
344 else
345 RevertWhileLoopStartLR(LoopStart, TII);
346 RevertLoopDec(LoopDec, TII);
347 RevertLoopEnd(LoopEnd, TII);
348 return true;
349 }
350 }
351 }
352
353 // Remove any copies from the loop, to ensure the phi that remains is both
354 // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355 // that cannot spill, we need to be careful what remains in the loop.
356 Register PhiReg = LoopPhi->getOperand(0).getReg();
357 Register DecReg = LoopDec->getOperand(0).getReg();
358 Register StartReg = LoopStart->getOperand(0).getReg();
359 // Ensure the uses are expected, and collect any copies we want to remove.
361 auto CheckUsers = [&Copies](Register BaseReg,
362 ArrayRef<MachineInstr *> ExpectedUsers,
365 Worklist.push_back(BaseReg);
366 while (!Worklist.empty()) {
367 Register Reg = Worklist.pop_back_val();
368 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
369 if (llvm::is_contained(ExpectedUsers, &MI))
370 continue;
371 if (MI.getOpcode() != TargetOpcode::COPY ||
372 !MI.getOperand(0).getReg().isVirtual()) {
373 LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374 return false;
375 }
376 Worklist.push_back(MI.getOperand(0).getReg());
377 Copies.push_back(&MI);
378 }
379 }
380 return true;
381 };
382 if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383 !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384 !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385 // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386 if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387 RevertWhileLoopStartLR(LoopStart, TII);
388 RevertLoopDec(LoopDec, TII);
389 RevertLoopEnd(LoopEnd, TII);
390 return true;
391 }
392 return false;
393 }
394
395 MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396 MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397 MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398
399 if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400 LoopPhi->getOperand(3).setReg(StartReg);
401 LoopPhi->getOperand(1).setReg(DecReg);
402 } else {
403 LoopPhi->getOperand(1).setReg(StartReg);
404 LoopPhi->getOperand(3).setReg(DecReg);
405 }
406
407 SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
408 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
409 if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
410 // If the LoopEnd falls through, need to insert a t2B to the fall-through
411 // block so that the non-analyzable t2LoopEndDec doesn't fall through.
413 BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
414 .addMBB(&*MBBI)
416 }
417
418 // Replace the loop dec and loop end as a single instruction.
420 BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
421 TII->get(ARM::t2LoopEndDec), DecReg)
422 .addReg(PhiReg)
423 .add(LoopEnd->getOperand(1));
424 (void)MI;
425 LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
426
427 LoopDec->eraseFromParent();
428 LoopEnd->eraseFromParent();
429 for (auto *MI : Copies)
430 MI->eraseFromParent();
431 return true;
432}
433
434// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
435// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
436// instruction, making the backend ARMLowOverheadLoops passes job of finding the
437// VCTP operand much simpler.
438bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
440 LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
441 << ML->getHeader()->getName() << "\n");
442
443 // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
444 // in the loop.
445 MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
446 if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
447 return false;
448 if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
449 LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
450 return false;
451
454 for (MachineBasicBlock *BB : ML->blocks()) {
455 for (MachineInstr &MI : *BB)
456 if (isVCTP(&MI))
457 VCTPs.push_back(&MI);
458 else if (findFirstVPTPredOperandIdx(MI) != -1)
459 MVEInstrs.push_back(&MI);
460 }
461
462 if (VCTPs.empty()) {
463 LLVM_DEBUG(dbgs() << " no VCTPs\n");
464 return false;
465 }
466
467 // Check all VCTPs are the same.
468 MachineInstr *FirstVCTP = *VCTPs.begin();
469 for (MachineInstr *VCTP : VCTPs) {
470 LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
471 if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
472 VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
473 LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
474 return false;
475 }
476 }
477
478 // Check for the register being used can be setup before the loop. We expect
479 // this to be:
480 // $vx = ...
481 // loop:
482 // $vp = PHI [ $vx ], [ $vd ]
483 // ..
484 // $vpr = VCTP $vp
485 // ..
486 // $vd = t2SUBri $vp, #n
487 // ..
488 Register CountReg = FirstVCTP->getOperand(1).getReg();
489 if (!CountReg.isVirtual()) {
490 LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
491 return false;
492 }
493 MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
494 if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
495 Phi->getNumOperands() != 5 ||
496 (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
497 Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
498 LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
499 return false;
500 }
501 CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
502 ? Phi->getOperand(3).getReg()
503 : Phi->getOperand(1).getReg();
504
505 // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
506 // the preheader and add the new CountReg to it. We attempt to place it late
507 // in the preheader, but may need to move that earlier based on uses.
508 MachineBasicBlock *MBB = LoopStart->getParent();
510 for (MachineInstr &Use :
511 MRI->use_instructions(LoopStart->getOperand(0).getReg()))
512 if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
513 !DT->dominates(ML->getHeader(), Use.getParent())) {
514 LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
515 return false;
516 }
517
518 unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
519 ? ARM::t2DoLoopStartTP
520 : ARM::t2WhileLoopStartTP;
522 BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
523 .add(LoopStart->getOperand(0))
524 .add(LoopStart->getOperand(1))
525 .addReg(CountReg);
526 if (NewOpc == ARM::t2WhileLoopStartTP)
527 MI.add(LoopStart->getOperand(2));
528 LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
529 << *MI.getInstr());
530 MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
531 LoopStart->eraseFromParent();
532
533 if (SetLRPredicate) {
534 // Each instruction in the loop needs to be using LR as the predicate from
535 // the Phi as the predicate.
536 Register LR = LoopPhi->getOperand(0).getReg();
537 for (MachineInstr *MI : MVEInstrs) {
539 MI->getOperand(Idx + 2).setReg(LR);
540 }
541 }
542
543 return true;
544}
545
546// Returns true if Opcode is any VCMP Opcode.
547static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
548
549// Returns true if a VCMP with this Opcode can have its operands swapped.
550// There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
551// and VCMPr instructions (since the r is always on the right).
552static bool CanHaveSwappedOperands(unsigned Opcode) {
553 switch (Opcode) {
554 default:
555 return true;
556 case ARM::MVE_VCMPf32:
557 case ARM::MVE_VCMPf16:
558 case ARM::MVE_VCMPf32r:
559 case ARM::MVE_VCMPf16r:
560 case ARM::MVE_VCMPi8r:
561 case ARM::MVE_VCMPi16r:
562 case ARM::MVE_VCMPi32r:
563 case ARM::MVE_VCMPu8r:
564 case ARM::MVE_VCMPu16r:
565 case ARM::MVE_VCMPu32r:
566 case ARM::MVE_VCMPs8r:
567 case ARM::MVE_VCMPs16r:
568 case ARM::MVE_VCMPs32r:
569 return false;
570 }
571}
572
573// Returns the CondCode of a VCMP Instruction.
575 assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
576 return ARMCC::CondCodes(Instr.getOperand(3).getImm());
577}
578
579// Returns true if Cond is equivalent to a VPNOT instruction on the result of
580// Prev. Cond and Prev must be VCMPs.
582 assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
583
584 // Opcodes must match.
585 if (Cond.getOpcode() != Prev.getOpcode())
586 return false;
587
588 MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
589 MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
590
591 // If the VCMP has the opposite condition with the same operands, we can
592 // replace it with a VPNOT
593 ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
594 ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
595 if (ExpectedCode == GetCondCode(Prev))
596 if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
597 return true;
598 // Check again with operands swapped if possible
599 if (!CanHaveSwappedOperands(Cond.getOpcode()))
600 return false;
601 ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
602 return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
603 CondOP2.isIdenticalTo(PrevOP1);
604}
605
606// Returns true if Instr writes to VCCR.
607static bool IsWritingToVCCR(MachineInstr &Instr) {
608 if (Instr.getNumOperands() == 0)
609 return false;
610 MachineOperand &Dst = Instr.getOperand(0);
611 if (!Dst.isReg())
612 return false;
613 Register DstReg = Dst.getReg();
614 if (!DstReg.isVirtual())
615 return false;
616 MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
617 const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
618 return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
619}
620
621// Transforms
622// <Instr that uses %A ('User' Operand)>
623// Into
624// %K = VPNOT %Target
625// <Instr that uses %K ('User' Operand)>
626// And returns the newly inserted VPNOT.
627// This optimization is done in the hopes of preventing spills/reloads of VPR by
628// reducing the number of VCCR values with overlapping lifetimes.
629MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
632 Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
633
634 MachineInstrBuilder MIBuilder =
635 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
636 .addDef(NewResult)
637 .addReg(Target);
639
640 // Make the user use NewResult instead, and clear its kill flag.
641 User.setReg(NewResult);
642 User.setIsKill(false);
643
644 LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
645 MIBuilder.getInstr()->dump());
646
647 return *MIBuilder.getInstr();
648}
649
650// Moves a VPNOT before its first user if an instruction that uses Reg is found
651// in-between the VPNOT and its user.
652// Returns true if there is at least one user of the VPNOT in the block.
655 Register Reg) {
656 assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
658 "The VPNOT cannot be predicated");
659
660 MachineInstr &VPNOT = *Iter;
661 Register VPNOTResult = VPNOT.getOperand(0).getReg();
662 Register VPNOTOperand = VPNOT.getOperand(1).getReg();
663
664 // Whether the VPNOT will need to be moved, and whether we found a user of the
665 // VPNOT.
666 bool MustMove = false, HasUser = false;
667 MachineOperand *VPNOTOperandKiller = nullptr;
668 for (; Iter != MBB.end(); ++Iter) {
669 if (MachineOperand *MO =
670 Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
671 /*isKill*/ true)) {
672 // If we find the operand that kills the VPNOTOperand's result, save it.
673 VPNOTOperandKiller = MO;
674 }
675
676 if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
677 MustMove = true;
678 continue;
679 }
680
681 if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
682 continue;
683
684 HasUser = true;
685 if (!MustMove)
686 break;
687
688 // Move the VPNOT right before Iter
689 LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
690 Iter->dump());
691 MBB.splice(Iter, &MBB, VPNOT.getIterator());
692 // If we move the instr, and its operand was killed earlier, remove the kill
693 // flag.
694 if (VPNOTOperandKiller)
695 VPNOTOperandKiller->setIsKill(false);
696
697 break;
698 }
699 return HasUser;
700}
701
702// This optimisation attempts to reduce the number of overlapping lifetimes of
703// VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
704// this replaces
705// %A:vccr = (something)
706// %B:vccr = VPNOT %A
707// %Foo = (some op that uses %B)
708// %Bar = (some op that uses %A)
709// With
710// %A:vccr = (something)
711// %B:vccr = VPNOT %A
712// %Foo = (some op that uses %B)
713// %TMP2:vccr = VPNOT %B
714// %Bar = (some op that uses %A)
715bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
717 SmallVector<MachineInstr *, 4> DeadInstructions;
718 bool Modified = false;
719
720 while (Iter != End) {
721 Register VCCRValue, OppositeVCCRValue;
722 // The first loop looks for 2 unpredicated instructions:
723 // %A:vccr = (instr) ; A is stored in VCCRValue
724 // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
725 for (; Iter != End; ++Iter) {
726 // We're only interested in unpredicated instructions that write to VCCR.
727 if (!IsWritingToVCCR(*Iter) ||
729 continue;
730 Register Dst = Iter->getOperand(0).getReg();
731
732 // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
733 // found what we were looking for.
734 if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
735 Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
736 // Move the VPNOT closer to its first user if needed, and ignore if it
737 // has no users.
738 if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
739 continue;
740
741 OppositeVCCRValue = Dst;
742 ++Iter;
743 break;
744 }
745
746 // Else, just set VCCRValue.
747 VCCRValue = Dst;
748 }
749
750 // If the first inner loop didn't find anything, stop here.
751 if (Iter == End)
752 break;
753
754 assert(VCCRValue && OppositeVCCRValue &&
755 "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
756 "stopped before the end of the block!");
757 assert(VCCRValue != OppositeVCCRValue &&
758 "VCCRValue should not be equal to OppositeVCCRValue!");
759
760 // LastVPNOTResult always contains the same value as OppositeVCCRValue.
761 Register LastVPNOTResult = OppositeVCCRValue;
762
763 // This second loop tries to optimize the remaining instructions.
764 for (; Iter != End; ++Iter) {
765 bool IsInteresting = false;
766
767 if (MachineOperand *MO =
768 Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
769 IsInteresting = true;
770
771 // - If the instruction is a VPNOT, it can be removed, and we can just
772 // replace its uses with LastVPNOTResult.
773 // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
774 if (Iter->getOpcode() == ARM::MVE_VPNOT) {
775 Register Result = Iter->getOperand(0).getReg();
776
777 MRI->replaceRegWith(Result, LastVPNOTResult);
778 DeadInstructions.push_back(&*Iter);
779 Modified = true;
780
782 << "Replacing all uses of '" << printReg(Result)
783 << "' with '" << printReg(LastVPNOTResult) << "'\n");
784 } else {
785 MachineInstr &VPNOT =
786 ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
787 Modified = true;
788
789 LastVPNOTResult = VPNOT.getOperand(0).getReg();
790 std::swap(VCCRValue, OppositeVCCRValue);
791
792 LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
793 << "' with '" << printReg(LastVPNOTResult)
794 << "' in instr: " << *Iter);
795 }
796 } else {
797 // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
798 // instead as they contain the same value.
799 if (MachineOperand *MO = Iter->findRegisterUseOperand(
800 OppositeVCCRValue, /*TRI=*/nullptr)) {
801 IsInteresting = true;
802
803 // This is pointless if LastVPNOTResult == OppositeVCCRValue.
804 if (LastVPNOTResult != OppositeVCCRValue) {
805 LLVM_DEBUG(dbgs() << "Replacing usage of '"
806 << printReg(OppositeVCCRValue) << "' with '"
807 << printReg(LastVPNOTResult) << " for instr: ";
808 Iter->dump());
809 MO->setReg(LastVPNOTResult);
810 Modified = true;
811 }
812
813 MO->setIsKill(false);
814 }
815
816 // If this is an unpredicated VPNOT on
817 // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
818 if (Iter->getOpcode() == ARM::MVE_VPNOT &&
820 Register VPNOTOperand = Iter->getOperand(1).getReg();
821 if (VPNOTOperand == LastVPNOTResult ||
822 VPNOTOperand == OppositeVCCRValue) {
823 IsInteresting = true;
824
825 std::swap(VCCRValue, OppositeVCCRValue);
826 LastVPNOTResult = Iter->getOperand(0).getReg();
827 }
828 }
829 }
830
831 // If this instruction was not interesting, and it writes to VCCR, stop.
832 if (!IsInteresting && IsWritingToVCCR(*Iter))
833 break;
834 }
835 }
836
837 for (MachineInstr *DeadInstruction : DeadInstructions)
838 DeadInstruction->eraseFromParent();
839
840 return Modified;
841}
842
843// This optimisation replaces VCMPs with VPNOTs when they are equivalent.
844bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
845 SmallVector<MachineInstr *, 4> DeadInstructions;
846
847 // The last VCMP that we have seen and that couldn't be replaced.
848 // This is reset when an instruction that writes to VCCR/VPR is found, or when
849 // a VCMP is replaced with a VPNOT.
850 // We'll only replace VCMPs with VPNOTs when this is not null, and when the
851 // current VCMP is the opposite of PrevVCMP.
852 MachineInstr *PrevVCMP = nullptr;
853 // If we find an instruction that kills the result of PrevVCMP, we save the
854 // operand here to remove the kill flag in case we need to use PrevVCMP's
855 // result.
856 MachineOperand *PrevVCMPResultKiller = nullptr;
857
858 for (MachineInstr &Instr : MBB.instrs()) {
859 if (PrevVCMP) {
860 if (MachineOperand *MO =
861 Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
862 /*TRI=*/nullptr, /*isKill*/ true)) {
863 // If we come accross the instr that kills PrevVCMP's result, record it
864 // so we can remove the kill flag later if we need to.
865 PrevVCMPResultKiller = MO;
866 }
867 }
868
869 // Ignore predicated instructions.
871 continue;
872
873 // Only look at VCMPs
874 if (!IsVCMP(Instr.getOpcode())) {
875 // If the instruction writes to VCCR, forget the previous VCMP.
876 if (IsWritingToVCCR(Instr))
877 PrevVCMP = nullptr;
878 continue;
879 }
880
881 if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
882 PrevVCMP = &Instr;
883 continue;
884 }
885
886 // The register containing the result of the VCMP that we're going to
887 // replace.
888 Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
889
890 // Build a VPNOT to replace the VCMP, reusing its operands.
891 MachineInstrBuilder MIBuilder =
892 BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
893 .add(Instr.getOperand(0))
894 .addReg(PrevVCMPResultReg);
896 LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
897 MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
898 Instr.dump());
899
900 // If we found an instruction that uses, and kills PrevVCMP's result,
901 // remove the kill flag.
902 if (PrevVCMPResultKiller)
903 PrevVCMPResultKiller->setIsKill(false);
904
905 // Finally, mark the old VCMP for removal and reset
906 // PrevVCMP/PrevVCMPResultKiller.
907 DeadInstructions.push_back(&Instr);
908 PrevVCMP = nullptr;
909 PrevVCMPResultKiller = nullptr;
910 }
911
912 for (MachineInstr *DeadInstruction : DeadInstructions)
913 DeadInstruction->eraseFromParent();
914
915 return !DeadInstructions.empty();
916}
917
918bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
920 // Scan through the block, looking for instructions that use constants moves
921 // into VPR that are the negative of one another. These are expected to be
922 // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
923 // mask is kept it or and VPNOT's of it are added or reused as we scan through
924 // the function.
925 unsigned LastVPTImm = 0;
926 Register LastVPTReg = 0;
927 SmallSet<MachineInstr *, 4> DeadInstructions;
928
929 for (MachineInstr &Instr : MBB.instrs()) {
930 // Look for predicated MVE instructions.
931 int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
932 if (PIdx == -1)
933 continue;
934 Register VPR = Instr.getOperand(PIdx + 1).getReg();
935 if (!VPR.isVirtual())
936 continue;
937
938 // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
939 MachineInstr *Copy = MRI->getVRegDef(VPR);
940 if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
941 !Copy->getOperand(1).getReg().isVirtual() ||
942 MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
943 LastVPTReg = 0;
944 continue;
945 }
946 Register GPR = Copy->getOperand(1).getReg();
947
948 // Find the Immediate used by the copy.
949 auto getImm = [&](Register GPR) -> unsigned {
950 MachineInstr *Def = MRI->getVRegDef(GPR);
951 if (Def && (Def->getOpcode() == ARM::t2MOVi ||
952 Def->getOpcode() == ARM::t2MOVi16))
953 return Def->getOperand(1).getImm();
954 return -1U;
955 };
956 unsigned Imm = getImm(GPR);
957 if (Imm == -1U) {
958 LastVPTReg = 0;
959 continue;
960 }
961
962 unsigned NotImm = ~Imm & 0xffff;
963 if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
964 MRI->clearKillFlags(LastVPTReg);
965 Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
966 if (MRI->use_empty(VPR)) {
967 DeadInstructions.insert(Copy);
968 if (MRI->hasOneUse(GPR))
969 DeadInstructions.insert(MRI->getVRegDef(GPR));
970 }
971 LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
972 VPR = LastVPTReg;
973 } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
974 // We have found the not of a previous constant. Create a VPNot of the
975 // earlier predicate reg and use it instead of the copy.
976 Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
977 auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
978 TII->get(ARM::MVE_VPNOT), NewVPR)
979 .addReg(LastVPTReg);
981
982 // Use the new register and check if the def is now dead.
983 Instr.getOperand(PIdx + 1).setReg(NewVPR);
984 if (MRI->use_empty(VPR)) {
985 DeadInstructions.insert(Copy);
986 if (MRI->hasOneUse(GPR))
987 DeadInstructions.insert(MRI->getVRegDef(GPR));
988 }
989 LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
990 << Instr);
991 VPR = NewVPR;
992 }
993
994 LastVPTImm = Imm;
995 LastVPTReg = VPR;
996 }
997
998 for (MachineInstr *DI : DeadInstructions)
999 DI->eraseFromParent();
1000
1001 return !DeadInstructions.empty();
1002}
1003
1004// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1005// somewhat blunt approximation to allow tail predicated with vpsel
1006// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1007// different semantics under tail predication. Until that is modelled we just
1008// convert to a VMOVT (via a predicated VORR) instead.
1009bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1010 bool HasVCTP = false;
1011 SmallVector<MachineInstr *, 4> DeadInstructions;
1012
1013 for (MachineInstr &MI : MBB.instrs()) {
1014 if (isVCTP(&MI)) {
1015 HasVCTP = true;
1016 continue;
1017 }
1018
1019 if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1020 continue;
1021
1022 MachineInstrBuilder MIBuilder =
1023 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1024 .add(MI.getOperand(0))
1025 .add(MI.getOperand(1))
1026 .add(MI.getOperand(1))
1028 .add(MI.getOperand(4))
1029 .add(MI.getOperand(5))
1030 .add(MI.getOperand(2));
1031 // Silence unused variable warning in release builds.
1032 (void)MIBuilder;
1033 LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1034 dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
1035 DeadInstructions.push_back(&MI);
1036 }
1037
1038 for (MachineInstr *DeadInstruction : DeadInstructions)
1039 DeadInstruction->eraseFromParent();
1040
1041 return !DeadInstructions.empty();
1042}
1043
1044// Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1045// the instruction may be removable as a noop.
1046bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1047 bool Changed = false;
1048 for (MachineInstr &MI : MBB.instrs()) {
1049 if (MI.getOpcode() != ARM::t2DoLoopStart)
1050 continue;
1051 Register R = MI.getOperand(1).getReg();
1052 MachineFunction *MF = MI.getParent()->getParent();
1054 Changed = true;
1055 }
1056 return Changed;
1057}
1058
1059bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
1060 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1061
1062 if (!STI.isThumb2() || !STI.hasLOB())
1063 return false;
1064
1065 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1066 MRI = &Fn.getRegInfo();
1067 MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
1069 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1070
1071 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1072 << "********** Function: " << Fn.getName() << '\n');
1073
1074 bool Modified = false;
1075 for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
1076 Modified |= LowerWhileLoopStart(ML);
1077 Modified |= MergeLoopEnd(ML);
1078 Modified |= ConvertTailPredLoop(ML, DT);
1079 }
1080
1081 for (MachineBasicBlock &MBB : Fn) {
1082 Modified |= HintDoLoopStartReg(MBB);
1083 Modified |= ReplaceConstByVPNOTs(MBB, DT);
1084 Modified |= ReplaceVCMPsByVPNOTs(MBB);
1085 Modified |= ReduceOldVCCRValueUses(MBB);
1086 Modified |= ConvertVPSEL(MBB);
1087 }
1088
1089 LLVM_DEBUG(dbgs() << "**************************************\n");
1090 return Modified;
1091}
1092
1093/// createMVETPAndVPTOptimisationsPass
1095 return new MVETPAndVPTOptimisations();
1096}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator MBBI
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
ARM MVE TailPred and VPT Optimisations static false MachineInstr * LookThroughCOPY(MachineInstr *MI, MachineRegisterInfo *MRI)
static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII)
static cl::opt< bool > SetLRPredicate("arm-set-lr-predicate", cl::Hidden, cl::desc("Enable setting lr as a predicate in tail predication regions."), cl::init(true))
static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, MachineInstr *&LoopStart, MachineInstr *&LoopPhi, MachineInstr *&LoopDec, MachineInstr *&LoopEnd)
static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, Register Reg)
static cl::opt< bool > MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, cl::desc("Enable merging Loop End and Dec instructions."), cl::init(true))
static ARMCC::CondCodes GetCondCode(MachineInstr &Instr)
static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev)
static bool IsInvalidTPInstruction(MachineInstr &MI)
static bool IsVCMP(unsigned Opcode)
ARM MVE TailPred and VPT Optimisations pass
static bool IsWritingToVCCR(MachineInstr &Instr)
#define DEBUG_TYPE
static bool CanHaveSwappedOperands(unsigned Opcode)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file defines the SmallVector class.
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isThumb2() const
Definition: ARMSubtarget.h:365
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< iterator > terminators()
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:566
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:343
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:569
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:495
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:576
LoopInfoBase< MachineBasicBlock, MachineLoop > & getBase()
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
unsigned getID() const
Return the register class ID number.
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
self_iterator getIterator()
Definition: ilist_node.h:132
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC)
getSwappedCondition - assume the flags are set by MI(a,b), return the condition code if we modify the...
Definition: ARMBaseInfo.h:71
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
FunctionPass * createMVETPAndVPTOptimisationsPass()
createMVETPAndVPTOptimisationsPass
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860