LLVM 22.0.0git
AArch64RedundantCopyElimination.cpp
Go to the documentation of this file.
1//=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This pass removes unnecessary copies/moves in BBs based on a dominating
8// condition.
9//
10// We handle three cases:
11// 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of
12// the CBZ/CBNZ source register is zero on the taken/not-taken path. For
13// instance, the copy instruction in the code below can be removed because
14// the CBZW jumps to %bb.2 when w0 is zero.
15//
16// %bb.1:
17// cbz w0, .LBB0_2
18// .LBB0_2:
19// mov w0, wzr ; <-- redundant
20//
21// 2. If the flag setting instruction defines a register other than WZR/XZR, we
22// can remove a zero copy in some cases.
23//
24// %bb.0:
25// subs w0, w1, w2
26// str w0, [x1]
27// b.ne .LBB0_2
28// %bb.1:
29// mov w0, wzr ; <-- redundant
30// str w0, [x2]
31// .LBB0_2
32//
33// 3. Finally, if the flag setting instruction is a comparison against a
34// constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate
35// in some cases.
36//
37// %bb.0:
38// subs xzr, x0, #1
39// b.eq .LBB0_1
40// .LBB0_1:
41// orr x0, xzr, #0x1 ; <-- redundant
42//
43// This pass should be run after register allocation.
44//
45// FIXME: This could also be extended to check the whole dominance subtree below
46// the comparison if the compile time regression is acceptable.
47//
48// FIXME: Add support for handling CCMP instructions.
49// FIXME: If the known register value is zero, we should be able to rewrite uses
50// to use WZR/XZR directly in some cases.
51//===----------------------------------------------------------------------===//
52#include "AArch64.h"
53#include "llvm/ADT/SetVector.h"
54#include "llvm/ADT/Statistic.h"
59#include "llvm/Support/Debug.h"
60
61using namespace llvm;
62
63#define DEBUG_TYPE "aarch64-copyelim"
64
65STATISTIC(NumCopiesRemoved, "Number of copies removed.");
66
67namespace {
68class AArch64RedundantCopyElimination : public MachineFunctionPass {
71
72 // DomBBClobberedRegs is used when computing known values in the dominating
73 // BB.
74 LiveRegUnits DomBBClobberedRegs, DomBBUsedRegs;
75
76 // OptBBClobberedRegs is used when optimizing away redundant copies/moves.
77 LiveRegUnits OptBBClobberedRegs, OptBBUsedRegs;
78
79public:
80 static char ID;
81 AArch64RedundantCopyElimination() : MachineFunctionPass(ID) {}
82
83 struct RegImm {
84 MCPhysReg Reg;
85 int32_t Imm;
86 RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
87 };
88
89 bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB,
90 SmallVectorImpl<RegImm> &KnownRegs,
92 bool optimizeBlock(MachineBasicBlock *MBB);
93 bool runOnMachineFunction(MachineFunction &MF) override;
94 MachineFunctionProperties getRequiredProperties() const override {
95 return MachineFunctionProperties().setNoVRegs();
96 }
97 StringRef getPassName() const override {
98 return "AArch64 Redundant Copy Elimination";
99 }
100};
101char AArch64RedundantCopyElimination::ID = 0;
102}
103
104INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
105 "AArch64 redundant copy elimination pass", false, false)
106
107/// It's possible to determine the value of a register based on a dominating
108/// condition. To do so, this function checks to see if the basic block \p MBB
109/// is the target of a conditional branch \p CondBr with an equality comparison.
110/// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero
111/// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting
112/// instruction (e.g., SUBS, ADDS). If this instruction defines a register
113/// other than WZR/XZR, we know the value of the destination register is zero in
114/// \p MMB for some cases. In addition, if the NZCV setting instruction is
115/// comparing against a constant we know the other source register is equal to
116/// the constant in \p MBB for some cases. If we find any constant values, push
117/// a physical register and constant value pair onto the KnownRegs vector and
118/// return true. Otherwise, return false if no known values were found.
119bool AArch64RedundantCopyElimination::knownRegValInBlock(
121 SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) {
122 unsigned Opc = CondBr.getOpcode();
123
124 // Check if the current basic block is the target block to which the
125 // CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
126 if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
127 MBB == CondBr.getOperand(1).getMBB()) ||
128 ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
129 MBB != CondBr.getOperand(1).getMBB())) {
130 FirstUse = CondBr;
131 KnownRegs.push_back(RegImm(CondBr.getOperand(0).getReg(), 0));
132 return true;
133 }
134
135 // Otherwise, must be a conditional branch.
136 if (Opc != AArch64::Bcc)
137 return false;
138
139 // Must be an equality check (i.e., == or !=).
140 AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
141 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
142 return false;
143
144 MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
145 if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
146 (CC == AArch64CC::NE && BrTarget == MBB))
147 return false;
148
149 // Stop if we get to the beginning of PredMBB.
150 MachineBasicBlock *PredMBB = *MBB->pred_begin();
151 assert(PredMBB == CondBr.getParent() &&
152 "Conditional branch not in predecessor block!");
153 if (CondBr == PredMBB->begin())
154 return false;
155
156 // Registers clobbered in PredMBB between CondBr instruction and current
157 // instruction being checked in loop.
158 DomBBClobberedRegs.clear();
159 DomBBUsedRegs.clear();
160
161 // Find compare instruction that sets NZCV used by CondBr.
162 MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
163 for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
164
165 bool IsCMN = false;
166 switch (PredI.getOpcode()) {
167 default:
168 break;
169
170 // CMN is an alias for ADDS with a dead destination register.
171 case AArch64::ADDSWri:
172 case AArch64::ADDSXri:
173 IsCMN = true;
174 [[fallthrough]];
175 // CMP is an alias for SUBS with a dead destination register.
176 case AArch64::SUBSWri:
177 case AArch64::SUBSXri: {
178 // Sometimes the first operand is a FrameIndex. Bail if tht happens.
179 if (!PredI.getOperand(1).isReg())
180 return false;
181 MCPhysReg DstReg = PredI.getOperand(0).getReg();
182 MCPhysReg SrcReg = PredI.getOperand(1).getReg();
183
184 bool Res = false;
185 // If we're comparing against a non-symbolic immediate and the source
186 // register of the compare is not modified (including a self-clobbering
187 // compare) between the compare and conditional branch we known the value
188 // of the 1st source operand.
189 if (PredI.getOperand(2).isImm() && DomBBClobberedRegs.available(SrcReg) &&
190 SrcReg != DstReg) {
191 // We've found the instruction that sets NZCV.
192 int32_t KnownImm = PredI.getOperand(2).getImm();
193 int32_t Shift = PredI.getOperand(3).getImm();
194 KnownImm <<= Shift;
195 if (IsCMN)
196 KnownImm = -KnownImm;
197 FirstUse = PredI;
198 KnownRegs.push_back(RegImm(SrcReg, KnownImm));
199 Res = true;
200 }
201
202 // If this instructions defines something other than WZR/XZR, we know it's
203 // result is zero in some cases.
204 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
205 return Res;
206
207 // The destination register must not be modified between the NZCV setting
208 // instruction and the conditional branch.
209 if (!DomBBClobberedRegs.available(DstReg))
210 return Res;
211
212 FirstUse = PredI;
213 KnownRegs.push_back(RegImm(DstReg, 0));
214 return true;
215 }
216
217 // Look for NZCV setting instructions that define something other than
218 // WZR/XZR.
219 case AArch64::ADCSWr:
220 case AArch64::ADCSXr:
221 case AArch64::ADDSWrr:
222 case AArch64::ADDSWrs:
223 case AArch64::ADDSWrx:
224 case AArch64::ADDSXrr:
225 case AArch64::ADDSXrs:
226 case AArch64::ADDSXrx:
227 case AArch64::ADDSXrx64:
228 case AArch64::ANDSWri:
229 case AArch64::ANDSWrr:
230 case AArch64::ANDSWrs:
231 case AArch64::ANDSXri:
232 case AArch64::ANDSXrr:
233 case AArch64::ANDSXrs:
234 case AArch64::BICSWrr:
235 case AArch64::BICSWrs:
236 case AArch64::BICSXrs:
237 case AArch64::BICSXrr:
238 case AArch64::SBCSWr:
239 case AArch64::SBCSXr:
240 case AArch64::SUBSWrr:
241 case AArch64::SUBSWrs:
242 case AArch64::SUBSWrx:
243 case AArch64::SUBSXrr:
244 case AArch64::SUBSXrs:
245 case AArch64::SUBSXrx:
246 case AArch64::SUBSXrx64: {
247 MCPhysReg DstReg = PredI.getOperand(0).getReg();
248 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
249 return false;
250
251 // The destination register of the NZCV setting instruction must not be
252 // modified before the conditional branch.
253 if (!DomBBClobberedRegs.available(DstReg))
254 return false;
255
256 // We've found the instruction that sets NZCV whose DstReg == 0.
257 FirstUse = PredI;
258 KnownRegs.push_back(RegImm(DstReg, 0));
259 return true;
260 }
261 }
262
263 // Bail if we see an instruction that defines NZCV that we don't handle.
264 if (PredI.definesRegister(AArch64::NZCV, /*TRI=*/nullptr))
265 return false;
266
267 // Track clobbered and used registers.
268 LiveRegUnits::accumulateUsedDefed(PredI, DomBBClobberedRegs, DomBBUsedRegs,
269 TRI);
270 }
271 return false;
272}
273
274bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
275 // Check if the current basic block has a single predecessor.
276 if (MBB->pred_size() != 1)
277 return false;
278
279 // Check if the predecessor has two successors, implying the block ends in a
280 // conditional branch.
281 MachineBasicBlock *PredMBB = *MBB->pred_begin();
282 if (PredMBB->succ_size() != 2)
283 return false;
284
286 if (CondBr == PredMBB->end())
287 return false;
288
289 // Keep track of the earliest point in the PredMBB block where kill markers
290 // need to be removed if a COPY is removed.
292 // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
293 // or a compare (i.e., SUBS). In the latter case, we must take care when
294 // updating FirstUse when scanning for COPY instructions. In particular, if
295 // there's a COPY in between the compare and branch the COPY should not
296 // update FirstUse.
297 bool SeenFirstUse = false;
298 // Registers that contain a known value at the start of MBB.
299 SmallVector<RegImm, 4> KnownRegs;
300
301 MachineBasicBlock::iterator Itr = std::next(CondBr);
302 do {
303 --Itr;
304
305 if (!knownRegValInBlock(*Itr, MBB, KnownRegs, FirstUse))
306 continue;
307
308 // Reset the clobbered and used register units.
309 OptBBClobberedRegs.clear();
310 OptBBUsedRegs.clear();
311
312 // Look backward in PredMBB for COPYs from the known reg to find other
313 // registers that are known to be a constant value.
314 for (auto PredI = Itr;; --PredI) {
315 if (FirstUse == PredI)
316 SeenFirstUse = true;
317
318 if (PredI->isCopy()) {
319 MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
320 MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
321 for (auto &KnownReg : KnownRegs) {
322 if (!OptBBClobberedRegs.available(KnownReg.Reg))
323 continue;
324 // If we have X = COPY Y, and Y is known to be zero, then now X is
325 // known to be zero.
326 if (CopySrcReg == KnownReg.Reg &&
327 OptBBClobberedRegs.available(CopyDstReg)) {
328 KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
329 if (SeenFirstUse)
330 FirstUse = PredI;
331 break;
332 }
333 // If we have X = COPY Y, and X is known to be zero, then now Y is
334 // known to be zero.
335 if (CopyDstReg == KnownReg.Reg &&
336 OptBBClobberedRegs.available(CopySrcReg)) {
337 KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
338 if (SeenFirstUse)
339 FirstUse = PredI;
340 break;
341 }
342 }
343 }
344
345 // Stop if we get to the beginning of PredMBB.
346 if (PredI == PredMBB->begin())
347 break;
348
349 LiveRegUnits::accumulateUsedDefed(*PredI, OptBBClobberedRegs,
350 OptBBUsedRegs, TRI);
351 // Stop if all of the known-zero regs have been clobbered.
352 if (all_of(KnownRegs, [&](RegImm KnownReg) {
353 return !OptBBClobberedRegs.available(KnownReg.Reg);
354 }))
355 break;
356 }
357 break;
358
359 } while (Itr != PredMBB->begin() && Itr->isTerminator());
360
361 // We've not found a registers with a known value, time to bail out.
362 if (KnownRegs.empty())
363 return false;
364
365 bool Changed = false;
366 // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
367 SmallSetVector<unsigned, 4> UsedKnownRegs;
368 MachineBasicBlock::iterator LastChange = MBB->begin();
369 // Remove redundant copy/move instructions unless KnownReg is modified.
370 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
371 MachineInstr *MI = &*I;
372 ++I;
373 bool RemovedMI = false;
374 bool IsCopy = MI->isCopy();
375 bool IsMoveImm = MI->isMoveImmediate();
376 if (IsCopy || IsMoveImm) {
377 Register DefReg = MI->getOperand(0).getReg();
378 Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register();
379 int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
380 if (!MRI->isReserved(DefReg) &&
381 ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
382 IsMoveImm)) {
383 for (RegImm &KnownReg : KnownRegs) {
384 if (KnownReg.Reg != DefReg &&
385 !TRI->isSuperRegister(DefReg, KnownReg.Reg))
386 continue;
387
388 // For a copy, the known value must be a zero.
389 if (IsCopy && KnownReg.Imm != 0)
390 continue;
391
392 if (IsMoveImm) {
393 // For a move immediate, the known immediate must match the source
394 // immediate.
395 if (KnownReg.Imm != SrcImm)
396 continue;
397
398 // Don't remove a move immediate that implicitly defines the upper
399 // bits when only the lower 32 bits are known.
400 MCPhysReg CmpReg = KnownReg.Reg;
401 if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) {
402 return !O.isDead() && O.isReg() && O.isDef() &&
403 O.getReg() != CmpReg;
404 }))
405 continue;
406
407 // Don't remove a move immediate that implicitly defines the upper
408 // bits as different.
409 if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
410 continue;
411 }
412
413 if (IsCopy)
414 LLVM_DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
415 else
416 LLVM_DEBUG(dbgs() << "Remove redundant Move : " << *MI);
417
418 MI->eraseFromParent();
419 Changed = true;
420 LastChange = I;
421 NumCopiesRemoved++;
422 UsedKnownRegs.insert(KnownReg.Reg);
423 RemovedMI = true;
424 break;
425 }
426 }
427 }
428
429 // Skip to the next instruction if we removed the COPY/MovImm.
430 if (RemovedMI)
431 continue;
432
433 // Remove any regs the MI clobbers from the KnownConstRegs set.
434 for (unsigned RI = 0; RI < KnownRegs.size();)
435 if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) {
436 std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]);
437 KnownRegs.pop_back();
438 // Don't increment RI since we need to now check the swapped-in
439 // KnownRegs[RI].
440 } else {
441 ++RI;
442 }
443
444 // Continue until the KnownRegs set is empty.
445 if (KnownRegs.empty())
446 break;
447 }
448
449 if (!Changed)
450 return false;
451
452 // Add newly used regs to the block's live-in list if they aren't there
453 // already.
454 for (MCPhysReg KnownReg : UsedKnownRegs)
455 if (!MBB->isLiveIn(KnownReg))
456 MBB->addLiveIn(KnownReg);
457
458 // Clear kills in the range where changes were made. This is conservative,
459 // but should be okay since kill markers are being phased out.
460 LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
461 << "\tLastChange: ";
462 if (LastChange == MBB->end()) dbgs() << "<end>\n";
463 else dbgs() << *LastChange);
464 for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
465 MMI.clearKillInfo();
466 for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
467 MMI.clearKillInfo();
468
469 return true;
470}
471
472bool AArch64RedundantCopyElimination::runOnMachineFunction(
473 MachineFunction &MF) {
474 if (skipFunction(MF.getFunction()))
475 return false;
477 MRI = &MF.getRegInfo();
478
479 // Resize the clobbered and used register unit trackers. We do this once per
480 // function.
481 DomBBClobberedRegs.init(*TRI);
482 DomBBUsedRegs.init(*TRI);
483 OptBBClobberedRegs.init(*TRI);
484 OptBBUsedRegs.init(*TRI);
485
486 bool Changed = false;
487 for (MachineBasicBlock &MBB : MF)
489 return Changed;
490}
491
493 return new AArch64RedundantCopyElimination();
494}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:119
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void clear()
Clears the set.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
FunctionPass * createAArch64RedundantCopyEliminationPass()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853