LLVM 22.0.0git
AArch64RedundantCopyElimination.cpp
Go to the documentation of this file.
1//=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This pass removes unnecessary copies/moves in BBs based on a dominating
8// condition.
9//
10// We handle three cases:
11// 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of
12// the CBZ/CBNZ source register is zero on the taken/not-taken path. For
13// instance, the copy instruction in the code below can be removed because
14// the CBZW jumps to %bb.2 when w0 is zero.
15//
16// %bb.1:
17// cbz w0, .LBB0_2
18// .LBB0_2:
19// mov w0, wzr ; <-- redundant
20//
21// 2. If the flag setting instruction defines a register other than WZR/XZR, we
22// can remove a zero copy in some cases.
23//
24// %bb.0:
25// subs w0, w1, w2
26// str w0, [x1]
27// b.ne .LBB0_2
28// %bb.1:
29// mov w0, wzr ; <-- redundant
30// str w0, [x2]
31// .LBB0_2
32//
33// 3. Finally, if the flag setting instruction is a comparison against a
34// constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate
35// in some cases.
36//
37// %bb.0:
38// subs xzr, x0, #1
39// b.eq .LBB0_1
40// .LBB0_1:
41// orr x0, xzr, #0x1 ; <-- redundant
42//
43// This pass should be run after register allocation.
44//
45// FIXME: This could also be extended to check the whole dominance subtree below
46// the comparison if the compile time regression is acceptable.
47//
48// FIXME: Add support for handling CCMP instructions.
49// FIXME: If the known register value is zero, we should be able to rewrite uses
50// to use WZR/XZR directly in some cases.
51//===----------------------------------------------------------------------===//
52#include "AArch64.h"
53#include "AArch64InstrInfo.h"
54#include "llvm/ADT/SetVector.h"
55#include "llvm/ADT/Statistic.h"
60#include "llvm/Support/Debug.h"
61
62using namespace llvm;
63
64#define DEBUG_TYPE "aarch64-copyelim"
65
66STATISTIC(NumCopiesRemoved, "Number of copies removed.");
67
68namespace {
69class AArch64RedundantCopyElimination : public MachineFunctionPass {
72
73 // DomBBClobberedRegs is used when computing known values in the dominating
74 // BB.
75 LiveRegUnits DomBBClobberedRegs, DomBBUsedRegs;
76
77 // OptBBClobberedRegs is used when optimizing away redundant copies/moves.
78 LiveRegUnits OptBBClobberedRegs, OptBBUsedRegs;
79
80public:
81 static char ID;
82 AArch64RedundantCopyElimination() : MachineFunctionPass(ID) {}
83
84 struct RegImm {
85 MCPhysReg Reg;
86 int32_t Imm;
87 RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
88 };
89
90 bool knownRegValInBlock(MachineInstr &CondBr, MachineBasicBlock *MBB,
91 SmallVectorImpl<RegImm> &KnownRegs,
93 bool optimizeBlock(MachineBasicBlock *MBB);
94 bool runOnMachineFunction(MachineFunction &MF) override;
95 MachineFunctionProperties getRequiredProperties() const override {
96 return MachineFunctionProperties().setNoVRegs();
97 }
98 StringRef getPassName() const override {
99 return "AArch64 Redundant Copy Elimination";
100 }
101};
102char AArch64RedundantCopyElimination::ID = 0;
103}
104
105INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
106 "AArch64 redundant copy elimination pass", false, false)
107
108/// It's possible to determine the value of a register based on a dominating
109/// condition. To do so, this function checks to see if the basic block \p MBB
110/// is the target of a conditional branch \p CondBr with an equality comparison.
111/// If the branch is a CBZ/CBNZ, we know the value of its source operand is zero
112/// in \p MBB for some cases. Otherwise, we find and inspect the NZCV setting
113/// instruction (e.g., SUBS, ADDS). If this instruction defines a register
114/// other than WZR/XZR, we know the value of the destination register is zero in
115/// \p MMB for some cases. In addition, if the NZCV setting instruction is
116/// comparing against a constant we know the other source register is equal to
117/// the constant in \p MBB for some cases. If we find any constant values, push
118/// a physical register and constant value pair onto the KnownRegs vector and
119/// return true. Otherwise, return false if no known values were found.
120bool AArch64RedundantCopyElimination::knownRegValInBlock(
122 SmallVectorImpl<RegImm> &KnownRegs, MachineBasicBlock::iterator &FirstUse) {
123 unsigned Opc = CondBr.getOpcode();
124
125 // Check if the current basic block is the target block to which the
126 // CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
127 if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
128 MBB == CondBr.getOperand(1).getMBB()) ||
129 ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
130 MBB != CondBr.getOperand(1).getMBB())) {
131 FirstUse = CondBr;
132 KnownRegs.push_back(RegImm(CondBr.getOperand(0).getReg(), 0));
133 return true;
134 }
135
136 // Otherwise, must be a conditional branch.
137 if (Opc != AArch64::Bcc)
138 return false;
139
140 // Must be an equality check (i.e., == or !=).
141 AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
142 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
143 return false;
144
145 MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
146 if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
147 (CC == AArch64CC::NE && BrTarget == MBB))
148 return false;
149
150 // Stop if we get to the beginning of PredMBB.
151 MachineBasicBlock *PredMBB = *MBB->pred_begin();
152 assert(PredMBB == CondBr.getParent() &&
153 "Conditional branch not in predecessor block!");
154 if (CondBr == PredMBB->begin())
155 return false;
156
157 // Registers clobbered in PredMBB between CondBr instruction and current
158 // instruction being checked in loop.
159 DomBBClobberedRegs.clear();
160 DomBBUsedRegs.clear();
161
162 // Find compare instruction that sets NZCV used by CondBr.
163 MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
164 for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
165
166 bool IsCMN = false;
167 switch (PredI.getOpcode()) {
168 default:
169 break;
170
171 // CMN is an alias for ADDS with a dead destination register.
172 case AArch64::ADDSWri:
173 case AArch64::ADDSXri:
174 IsCMN = true;
175 [[fallthrough]];
176 // CMP is an alias for SUBS with a dead destination register.
177 case AArch64::SUBSWri:
178 case AArch64::SUBSXri: {
179 // Sometimes the first operand is a FrameIndex. Bail if tht happens.
180 if (!PredI.getOperand(1).isReg())
181 return false;
182 MCPhysReg DstReg = PredI.getOperand(0).getReg();
183 MCPhysReg SrcReg = PredI.getOperand(1).getReg();
184
185 bool Res = false;
186 // If we're comparing against a non-symbolic immediate and the source
187 // register of the compare is not modified (including a self-clobbering
188 // compare) between the compare and conditional branch we known the value
189 // of the 1st source operand.
190 if (PredI.getOperand(2).isImm() && DomBBClobberedRegs.available(SrcReg) &&
191 SrcReg != DstReg) {
192 // We've found the instruction that sets NZCV.
193 int32_t KnownImm = PredI.getOperand(2).getImm();
194 int32_t Shift = PredI.getOperand(3).getImm();
195 KnownImm <<= Shift;
196 if (IsCMN)
197 KnownImm = -KnownImm;
198 FirstUse = PredI;
199 KnownRegs.push_back(RegImm(SrcReg, KnownImm));
200 Res = true;
201 }
202
203 // If this instructions defines something other than WZR/XZR, we know it's
204 // result is zero in some cases.
205 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
206 return Res;
207
208 // The destination register must not be modified between the NZCV setting
209 // instruction and the conditional branch.
210 if (!DomBBClobberedRegs.available(DstReg))
211 return Res;
212
213 FirstUse = PredI;
214 KnownRegs.push_back(RegImm(DstReg, 0));
215 return true;
216 }
217
218 // Look for NZCV setting instructions that define something other than
219 // WZR/XZR.
220 case AArch64::ADCSWr:
221 case AArch64::ADCSXr:
222 case AArch64::ADDSWrr:
223 case AArch64::ADDSWrs:
224 case AArch64::ADDSWrx:
225 case AArch64::ADDSXrr:
226 case AArch64::ADDSXrs:
227 case AArch64::ADDSXrx:
228 case AArch64::ADDSXrx64:
229 case AArch64::ANDSWri:
230 case AArch64::ANDSWrr:
231 case AArch64::ANDSWrs:
232 case AArch64::ANDSXri:
233 case AArch64::ANDSXrr:
234 case AArch64::ANDSXrs:
235 case AArch64::BICSWrr:
236 case AArch64::BICSWrs:
237 case AArch64::BICSXrs:
238 case AArch64::BICSXrr:
239 case AArch64::SBCSWr:
240 case AArch64::SBCSXr:
241 case AArch64::SUBSWrr:
242 case AArch64::SUBSWrs:
243 case AArch64::SUBSWrx:
244 case AArch64::SUBSXrr:
245 case AArch64::SUBSXrs:
246 case AArch64::SUBSXrx:
247 case AArch64::SUBSXrx64: {
248 MCPhysReg DstReg = PredI.getOperand(0).getReg();
249 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
250 return false;
251
252 // The destination register of the NZCV setting instruction must not be
253 // modified before the conditional branch.
254 if (!DomBBClobberedRegs.available(DstReg))
255 return false;
256
257 // We've found the instruction that sets NZCV whose DstReg == 0.
258 FirstUse = PredI;
259 KnownRegs.push_back(RegImm(DstReg, 0));
260 return true;
261 }
262 }
263
264 // Bail if we see an instruction that defines NZCV that we don't handle.
265 if (PredI.definesRegister(AArch64::NZCV, /*TRI=*/nullptr))
266 return false;
267
268 // Track clobbered and used registers.
269 LiveRegUnits::accumulateUsedDefed(PredI, DomBBClobberedRegs, DomBBUsedRegs,
270 TRI);
271 }
272 return false;
273}
274
275bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
276 // Check if the current basic block has a single predecessor.
277 if (MBB->pred_size() != 1)
278 return false;
279
280 // Check if the predecessor has two successors, implying the block ends in a
281 // conditional branch.
282 MachineBasicBlock *PredMBB = *MBB->pred_begin();
283 if (PredMBB->succ_size() != 2)
284 return false;
285
287 if (CondBr == PredMBB->end())
288 return false;
289
290 // Keep track of the earliest point in the PredMBB block where kill markers
291 // need to be removed if a COPY is removed.
293 // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
294 // or a compare (i.e., SUBS). In the latter case, we must take care when
295 // updating FirstUse when scanning for COPY instructions. In particular, if
296 // there's a COPY in between the compare and branch the COPY should not
297 // update FirstUse.
298 bool SeenFirstUse = false;
299 // Registers that contain a known value at the start of MBB.
300 SmallVector<RegImm, 4> KnownRegs;
301
302 MachineBasicBlock::iterator Itr = std::next(CondBr);
303 do {
304 --Itr;
305
306 if (!knownRegValInBlock(*Itr, MBB, KnownRegs, FirstUse))
307 continue;
308
309 // Reset the clobbered and used register units.
310 OptBBClobberedRegs.clear();
311 OptBBUsedRegs.clear();
312
313 // Look backward in PredMBB for COPYs from the known reg to find other
314 // registers that are known to be a constant value.
315 for (auto PredI = Itr;; --PredI) {
316 if (FirstUse == PredI)
317 SeenFirstUse = true;
318
319 if (PredI->isCopy()) {
320 MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
321 MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
322 for (auto &KnownReg : KnownRegs) {
323 if (!OptBBClobberedRegs.available(KnownReg.Reg))
324 continue;
325 // If we have X = COPY Y, and Y is known to be zero, then now X is
326 // known to be zero.
327 if (CopySrcReg == KnownReg.Reg &&
328 OptBBClobberedRegs.available(CopyDstReg)) {
329 KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
330 if (SeenFirstUse)
331 FirstUse = PredI;
332 break;
333 }
334 // If we have X = COPY Y, and X is known to be zero, then now Y is
335 // known to be zero.
336 if (CopyDstReg == KnownReg.Reg &&
337 OptBBClobberedRegs.available(CopySrcReg)) {
338 KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
339 if (SeenFirstUse)
340 FirstUse = PredI;
341 break;
342 }
343 }
344 }
345
346 // Stop if we get to the beginning of PredMBB.
347 if (PredI == PredMBB->begin())
348 break;
349
350 LiveRegUnits::accumulateUsedDefed(*PredI, OptBBClobberedRegs,
351 OptBBUsedRegs, TRI);
352 // Stop if all of the known-zero regs have been clobbered.
353 if (all_of(KnownRegs, [&](RegImm KnownReg) {
354 return !OptBBClobberedRegs.available(KnownReg.Reg);
355 }))
356 break;
357 }
358 break;
359
360 } while (Itr != PredMBB->begin() && Itr->isTerminator());
361
362 // We've not found a registers with a known value, time to bail out.
363 if (KnownRegs.empty())
364 return false;
365
366 bool Changed = false;
367 // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
368 SmallSetVector<unsigned, 4> UsedKnownRegs;
369 MachineBasicBlock::iterator LastChange = MBB->begin();
370 // Remove redundant copy/move instructions unless KnownReg is modified.
371 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
372 MachineInstr *MI = &*I;
373 ++I;
374 bool RemovedMI = false;
375 bool IsCopy = MI->isCopy();
376 bool IsMoveImm = MI->isMoveImmediate();
377 if (IsCopy || IsMoveImm) {
378 Register DefReg = MI->getOperand(0).getReg();
379 Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register();
380 int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
381 if (!MRI->isReserved(DefReg) &&
382 ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
383 IsMoveImm)) {
384 for (RegImm &KnownReg : KnownRegs) {
385 if (KnownReg.Reg != DefReg &&
386 !TRI->isSuperRegister(DefReg, KnownReg.Reg))
387 continue;
388
389 // For a copy, the known value must be a zero.
390 if (IsCopy && KnownReg.Imm != 0)
391 continue;
392
393 if (IsMoveImm) {
394 // For a move immediate, the known immediate must match the source
395 // immediate.
396 if (KnownReg.Imm != SrcImm)
397 continue;
398
399 // Don't remove a move immediate that implicitly defines the upper
400 // bits when only the lower 32 bits are known.
401 MCPhysReg CmpReg = KnownReg.Reg;
402 if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) {
403 return !O.isDead() && O.isReg() && O.isDef() &&
404 O.getReg() != CmpReg;
405 }))
406 continue;
407
408 // Don't remove a move immediate that implicitly defines the upper
409 // bits as different.
410 if (TRI->isSuperRegister(DefReg, KnownReg.Reg) && KnownReg.Imm < 0)
411 continue;
412 }
413
414 if (IsCopy)
415 LLVM_DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
416 else
417 LLVM_DEBUG(dbgs() << "Remove redundant Move : " << *MI);
418
419 MI->eraseFromParent();
420 Changed = true;
421 LastChange = I;
422 NumCopiesRemoved++;
423 UsedKnownRegs.insert(KnownReg.Reg);
424 RemovedMI = true;
425 break;
426 }
427 }
428 }
429
430 // Skip to the next instruction if we removed the COPY/MovImm.
431 if (RemovedMI)
432 continue;
433
434 // Remove any regs the MI clobbers from the KnownConstRegs set.
435 for (unsigned RI = 0; RI < KnownRegs.size();)
436 if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) {
437 std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]);
438 KnownRegs.pop_back();
439 // Don't increment RI since we need to now check the swapped-in
440 // KnownRegs[RI].
441 } else {
442 ++RI;
443 }
444
445 // Continue until the KnownRegs set is empty.
446 if (KnownRegs.empty())
447 break;
448 }
449
450 if (!Changed)
451 return false;
452
453 // Add newly used regs to the block's live-in list if they aren't there
454 // already.
455 for (MCPhysReg KnownReg : UsedKnownRegs)
456 if (!MBB->isLiveIn(KnownReg))
457 MBB->addLiveIn(KnownReg);
458
459 // Clear kills in the range where changes were made. This is conservative,
460 // but should be okay since kill markers are being phased out.
461 LLVM_DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
462 << "\tLastChange: ";
463 if (LastChange == MBB->end()) dbgs() << "<end>\n";
464 else dbgs() << *LastChange);
465 for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
466 MMI.clearKillInfo();
467 for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
468 MMI.clearKillInfo();
469
470 return true;
471}
472
473bool AArch64RedundantCopyElimination::runOnMachineFunction(
474 MachineFunction &MF) {
475 if (skipFunction(MF.getFunction()))
476 return false;
478 MRI = &MF.getRegInfo();
479 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
480
481 // Resize the clobbered and used register unit trackers. We do this once per
482 // function.
483 DomBBClobberedRegs.init(*TRI);
484 DomBBUsedRegs.init(*TRI);
485 OptBBClobberedRegs.init(*TRI);
486 OptBBUsedRegs.init(*TRI);
487
488 bool Changed = false;
489 for (MachineBasicBlock &MBB : MF) {
492 }
493 return Changed;
494}
495
497 return new AArch64RedundantCopyElimination();
498}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file implements a set that has insertion order iteration characteristics.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI)
For a machine instruction MI, adds all register units used in UsedRegUnits and defined or clobbered i...
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
void clear()
Clears the set.
LLVM_ABI iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
FunctionPass * createAArch64RedundantCopyEliminationPass()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
bool optimizeTerminators(MachineBasicBlock *MBB, const TargetInstrInfo &TII)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872