LLVM 20.0.0git
AArch64ConditionOptimizer.cpp
Go to the documentation of this file.
1//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to make consecutive compares of values use same operands to
10// allow CSE pass to remove duplicated instructions. For this it analyzes
11// branches and adjusts comparisons with immediate values by converting:
12// * GE -> GT
13// * GT -> GE
14// * LT -> LE
15// * LE -> LT
16// and adjusting immediate values appropriately. It basically corrects two
17// immediate values towards each other to make them equal.
18//
19// Consider the following example in C:
20//
21// if ((a < 5 && ...) || (a > 5 && ...)) {
22// ~~~~~ ~~~~~
23// ^ ^
24// x y
25//
26// Here both "x" and "y" expressions compare "a" with "5". When "x" evaluates
27// to "false", "y" can just check flags set by the first comparison. As a
28// result of the canonicalization employed by
29// SelectionDAGBuilder::visitSwitchCase, DAGCombine, and other target-specific
30// code, assembly ends up in the form that is not CSE friendly:
31//
32// ...
33// cmp w8, #4
34// b.gt .LBB0_3
35// ...
36// .LBB0_3:
37// cmp w8, #6
38// b.lt .LBB0_6
39// ...
40//
41// Same assembly after the pass:
42//
43// ...
44// cmp w8, #5
45// b.ge .LBB0_3
46// ...
47// .LBB0_3:
48// cmp w8, #5 // <-- CSE pass removes this instruction
49// b.le .LBB0_6
50// ...
51//
52// Currently only SUBS and ADDS followed by b.?? are supported.
53//
54// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
55// TODO: handle other conditional instructions (e.g. CSET)
56// TODO: allow second branching to be anything if it doesn't require adjusting
57//
58//===----------------------------------------------------------------------===//
59
60#include "AArch64.h"
63#include "llvm/ADT/ArrayRef.h"
66#include "llvm/ADT/Statistic.h"
78#include "llvm/Pass.h"
79#include "llvm/Support/Debug.h"
82#include <cassert>
83#include <cstdlib>
84#include <tuple>
85
86using namespace llvm;
87
88#define DEBUG_TYPE "aarch64-condopt"
89
90STATISTIC(NumConditionsAdjusted, "Number of conditions adjusted");
91
92namespace {
93
94class AArch64ConditionOptimizer : public MachineFunctionPass {
95 const TargetInstrInfo *TII;
96 MachineDominatorTree *DomTree;
98
99public:
100 // Stores immediate, compare instruction opcode and branch condition (in this
101 // order) of adjusted comparison.
102 using CmpInfo = std::tuple<int, unsigned, AArch64CC::CondCode>;
103
104 static char ID;
105
106 AArch64ConditionOptimizer() : MachineFunctionPass(ID) {
108 }
109
110 void getAnalysisUsage(AnalysisUsage &AU) const override;
111 MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
112 CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
113 void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
114 bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
115 int ToImm);
116 bool runOnMachineFunction(MachineFunction &MF) override;
117
118 StringRef getPassName() const override {
119 return "AArch64 Condition Optimizer";
120 }
121};
122
123} // end anonymous namespace
124
125char AArch64ConditionOptimizer::ID = 0;
126
127INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
128 "AArch64 CondOpt Pass", false, false)
130INITIALIZE_PASS_END(AArch64ConditionOptimizer, "aarch64-condopt",
131 "AArch64 CondOpt Pass", false, false)
132
134 return new AArch64ConditionOptimizer();
135}
136
137void AArch64ConditionOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
141}
142
143// Finds compare instruction that corresponds to supported types of branching.
144// Returns the instruction or nullptr on failures or detecting unsupported
145// instructions.
146MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
149 if (Term == MBB->end())
150 return nullptr;
151
152 if (Term->getOpcode() != AArch64::Bcc)
153 return nullptr;
154
155 // Since we may modify cmp of this MBB, make sure NZCV does not live out.
156 for (auto *SuccBB : MBB->successors())
157 if (SuccBB->isLiveIn(AArch64::NZCV))
158 return nullptr;
159
160 // Now find the instruction controlling the terminator.
161 for (MachineBasicBlock::iterator B = MBB->begin(), It = Term; It != B;) {
162 It = prev_nodbg(It, B);
163 MachineInstr &I = *It;
164 assert(!I.isTerminator() && "Spurious terminator");
165 // Check if there is any use of NZCV between CMP and Bcc.
166 if (I.readsRegister(AArch64::NZCV, /*TRI=*/nullptr))
167 return nullptr;
168 switch (I.getOpcode()) {
169 // cmp is an alias for subs with a dead destination register.
170 case AArch64::SUBSWri:
171 case AArch64::SUBSXri:
172 // cmn is an alias for adds with a dead destination register.
173 case AArch64::ADDSWri:
174 case AArch64::ADDSXri: {
175 unsigned ShiftAmt = AArch64_AM::getShiftValue(I.getOperand(3).getImm());
176 if (!I.getOperand(2).isImm()) {
177 LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << I << '\n');
178 return nullptr;
179 } else if (I.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
180 LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << I
181 << '\n');
182 return nullptr;
183 } else if (!MRI->use_nodbg_empty(I.getOperand(0).getReg())) {
184 LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << I << '\n');
185 return nullptr;
186 }
187 return &I;
188 }
189 // Prevent false positive case like:
190 // cmp w19, #0
191 // cinc w0, w19, gt
192 // ...
193 // fcmp d8, #0.0
194 // b.gt .LBB0_5
195 case AArch64::FCMPDri:
196 case AArch64::FCMPSri:
197 case AArch64::FCMPESri:
198 case AArch64::FCMPEDri:
199
200 case AArch64::SUBSWrr:
201 case AArch64::SUBSXrr:
202 case AArch64::ADDSWrr:
203 case AArch64::ADDSXrr:
204 case AArch64::FCMPSrr:
205 case AArch64::FCMPDrr:
206 case AArch64::FCMPESrr:
207 case AArch64::FCMPEDrr:
208 // Skip comparison instructions without immediate operands.
209 return nullptr;
210 }
211 }
212 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
213 << '\n');
214 return nullptr;
215}
216
217// Changes opcode adds <-> subs considering register operand width.
218static int getComplementOpc(int Opc) {
219 switch (Opc) {
220 case AArch64::ADDSWri: return AArch64::SUBSWri;
221 case AArch64::ADDSXri: return AArch64::SUBSXri;
222 case AArch64::SUBSWri: return AArch64::ADDSWri;
223 case AArch64::SUBSXri: return AArch64::ADDSXri;
224 default:
225 llvm_unreachable("Unexpected opcode");
226 }
227}
228
229// Changes form of comparison inclusive <-> exclusive.
231 switch (Cmp) {
232 case AArch64CC::GT: return AArch64CC::GE;
233 case AArch64CC::GE: return AArch64CC::GT;
234 case AArch64CC::LT: return AArch64CC::LE;
235 case AArch64CC::LE: return AArch64CC::LT;
236 default:
237 llvm_unreachable("Unexpected condition code");
238 }
239}
240
241// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
242// operator and condition code.
243AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
244 MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
245 unsigned Opc = CmpMI->getOpcode();
246
247 // CMN (compare with negative immediate) is an alias to ADDS (as
248 // "operand - negative" == "operand + positive")
249 bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
250
251 int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
252 // Negate Correction value for comparison with negative immediate (CMN).
253 if (Negative) {
254 Correction = -Correction;
255 }
256
257 const int OldImm = (int)CmpMI->getOperand(2).getImm();
258 const int NewImm = std::abs(OldImm + Correction);
259
260 // Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
261 // adjusting compare instruction opcode.
262 if (OldImm == 0 && ((Negative && Correction == 1) ||
263 (!Negative && Correction == -1))) {
264 Opc = getComplementOpc(Opc);
265 }
266
267 return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
268}
269
270// Applies changes to comparison instruction suggested by adjustCmp().
271void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
272 const CmpInfo &Info) {
273 int Imm;
274 unsigned Opc;
276 std::tie(Imm, Opc, Cmp) = Info;
277
278 MachineBasicBlock *const MBB = CmpMI->getParent();
279
280 // Change immediate in comparison instruction (ADDS or SUBS).
281 BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
282 .add(CmpMI->getOperand(0))
283 .add(CmpMI->getOperand(1))
284 .addImm(Imm)
285 .add(CmpMI->getOperand(3));
286 CmpMI->eraseFromParent();
287
288 // The fact that this comparison was picked ensures that it's related to the
289 // first terminator instruction.
291
292 // Change condition in branch instruction.
293 BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
294 .addImm(Cmp)
295 .add(BrMI.getOperand(1));
296 BrMI.eraseFromParent();
297
298 ++NumConditionsAdjusted;
299}
300
301// Parse a condition code returned by analyzeBranch, and compute the CondCode
302// corresponding to TBB.
303// Returns true if parsing was successful, otherwise false is returned.
305 // A normal br.cond simply has the condition code.
306 if (Cond[0].getImm() != -1) {
307 assert(Cond.size() == 1 && "Unknown Cond array format");
308 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
309 return true;
310 }
311 return false;
312}
313
314// Adjusts one cmp instruction to another one if result of adjustment will allow
315// CSE. Returns true if compare instruction was changed, otherwise false is
316// returned.
317bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
318 AArch64CC::CondCode Cmp, MachineInstr *To, int ToImm)
319{
320 CmpInfo Info = adjustCmp(CmpMI, Cmp);
321 if (std::get<0>(Info) == ToImm && std::get<1>(Info) == To->getOpcode()) {
322 modifyCmp(CmpMI, Info);
323 return true;
324 }
325 return false;
326}
327
328bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
329 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
330 << "********** Function: " << MF.getName() << '\n');
331 if (skipFunction(MF.getFunction()))
332 return false;
333
335 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
336 MRI = &MF.getRegInfo();
337
338 bool Changed = false;
339
340 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
341 // cmp-conversions from the same head block.
342 // Note that updateDomTree() modifies the children of the DomTree node
343 // currently being visited. The df_iterator supports that; it doesn't look at
344 // child_begin() / child_end() until after a node has been visited.
345 for (MachineDomTreeNode *I : depth_first(DomTree)) {
346 MachineBasicBlock *HBB = I->getBlock();
347
349 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
350 if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {
351 continue;
352 }
353
354 // Equivalence check is to skip loops.
355 if (!TBB || TBB == HBB) {
356 continue;
357 }
358
360 MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
361 if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
362 continue;
363 }
364
365 MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
366 if (!HeadCmpMI) {
367 continue;
368 }
369
370 MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
371 if (!TrueCmpMI) {
372 continue;
373 }
374
375 AArch64CC::CondCode HeadCmp;
376 if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
377 continue;
378 }
379
380 AArch64CC::CondCode TrueCmp;
381 if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
382 continue;
383 }
384
385 const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
386 const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
387
388 LLVM_DEBUG(dbgs() << "Head branch:\n");
389 LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
390 << '\n');
391 LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
392
393 LLVM_DEBUG(dbgs() << "True branch:\n");
394 LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
395 << '\n');
396 LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
397
398 if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
399 (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
400 std::abs(TrueImm - HeadImm) == 2) {
401 // This branch transforms machine instructions that correspond to
402 //
403 // 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
404 // 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
405 //
406 // into
407 //
408 // 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
409 // 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
410
411 CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
412 CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
413 if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
414 std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
415 modifyCmp(HeadCmpMI, HeadCmpInfo);
416 modifyCmp(TrueCmpMI, TrueCmpInfo);
417 Changed = true;
418 }
419 } else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
420 (HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
421 std::abs(TrueImm - HeadImm) == 1) {
422 // This branch transforms machine instructions that correspond to
423 //
424 // 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
425 // 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
426 //
427 // into
428 //
429 // 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
430 // 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
431
432 // GT -> GE transformation increases immediate value, so picking the
433 // smaller one; LT -> LE decreases immediate value so invert the choice.
434 bool adjustHeadCond = (HeadImm < TrueImm);
435 if (HeadCmp == AArch64CC::LT) {
436 adjustHeadCond = !adjustHeadCond;
437 }
438
439 if (adjustHeadCond) {
440 Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
441 } else {
442 Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
443 }
444 }
445 // Other transformation cases almost never occur due to generation of < or >
446 // comparisons instead of <= and >=.
447 }
448
449 return Changed;
450}
unsigned const MachineRegisterInfo * MRI
aarch64 condopt
static int getComplementOpc(int Opc)
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp)
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator_range< succ_iterator > successors()
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
int64_t getImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
bool empty() const
Definition: SmallVector.h:95
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createAArch64ConditionOptimizerPass()
iterator_range< df_iterator< T > > depth_first(const T &G)
void initializeAArch64ConditionOptimizerPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.