LLVM 17.0.0git
AArch64PostSelectOptimize.cpp
Go to the documentation of this file.
1//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does post-instruction-selection optimizations in the GlobalISel
10// pipeline, before the rest of codegen runs.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
17#include "llvm/ADT/STLExtras.h"
24#include "llvm/Support/Debug.h"
26
27#define DEBUG_TYPE "aarch64-post-select-optimize"
28
29using namespace llvm;
30
31namespace {
32class AArch64PostSelectOptimize : public MachineFunctionPass {
33public:
34 static char ID;
35
36 AArch64PostSelectOptimize();
37
38 StringRef getPassName() const override {
39 return "AArch64 Post Select Optimizer";
40 }
41
42 bool runOnMachineFunction(MachineFunction &MF) override;
43
44 void getAnalysisUsage(AnalysisUsage &AU) const override;
45
46private:
47 bool optimizeNZCVDefs(MachineBasicBlock &MBB);
48 bool doPeepholeOpts(MachineBasicBlock &MBB);
49 /// Look for cross regclass copies that can be trivially eliminated.
50 bool foldSimpleCrossClassCopies(MachineInstr &MI);
51};
52} // end anonymous namespace
53
54void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
56 AU.setPreservesCFG();
59}
60
61AArch64PostSelectOptimize::AArch64PostSelectOptimize()
64}
65
66unsigned getNonFlagSettingVariant(unsigned Opc) {
67 switch (Opc) {
68 default:
69 return 0;
70 case AArch64::SUBSXrr:
71 return AArch64::SUBXrr;
72 case AArch64::SUBSWrr:
73 return AArch64::SUBWrr;
74 case AArch64::SUBSXrs:
75 return AArch64::SUBXrs;
76 case AArch64::SUBSXri:
77 return AArch64::SUBXri;
78 case AArch64::SUBSWri:
79 return AArch64::SUBWri;
80 }
81}
82
83bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
84 bool Changed = false;
85 for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
86 Changed |= foldSimpleCrossClassCopies(MI);
87 }
88 return Changed;
89}
90
91bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
92 auto *MF = MI.getMF();
93 auto &MRI = MF->getRegInfo();
94
95 if (!MI.isCopy())
96 return false;
97
98 if (MI.getOperand(1).getSubReg())
99 return false; // Don't deal with subreg copies
100
101 Register Src = MI.getOperand(1).getReg();
102 Register Dst = MI.getOperand(0).getReg();
103
104 if (Src.isPhysical() || Dst.isPhysical())
105 return false;
106
107 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
108 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
109
110 if (SrcRC == DstRC)
111 return false;
112
113
114 if (SrcRC->hasSubClass(DstRC)) {
115 // This is the case where the source class is a superclass of the dest, so
116 // if the copy is the only user of the source, we can just constrain the
117 // source reg to the dest class.
118
119 if (!MRI.hasOneNonDBGUse(Src))
120 return false; // Only constrain single uses of the source.
121
122 // Constrain to dst reg class as long as it's not a weird class that only
123 // has a few registers.
124 if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
125 return false;
126 } else if (DstRC->hasSubClass(SrcRC)) {
127 // This is the inverse case, where the destination class is a superclass of
128 // the source. Here, if the copy is the only user, we can just constrain
129 // the user of the copy to use the smaller class of the source.
130 } else {
131 return false;
132 }
133
134 MRI.replaceRegWith(Dst, Src);
135 MI.eraseFromParent();
136 return true;
137}
138
139bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
140 // Consider the following code:
141 // FCMPSrr %0, %1, implicit-def $nzcv
142 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
143 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
144 // FCMPSrr %0, %1, implicit-def $nzcv
145 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
146 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
147 // when we have a single IR fcmp being used by two selects. During selection,
148 // to ensure that there can be no clobbering of nzcv between the fcmp and the
149 // csel, we have to generate an fcmp immediately before each csel is
150 // selected.
151 // However, often we can essentially CSE these together later in MachineCSE.
152 // This doesn't work though if there are unrelated flag-setting instructions
153 // in between the two FCMPs. In this case, the SUBS defines NZCV
154 // but it doesn't have any users, being overwritten by the second FCMP.
155 //
156 // Our solution here is to try to convert flag setting operations between
157 // a interval of identical FCMPs, so that CSE will be able to eliminate one.
158 bool Changed = false;
159 auto &MF = *MBB.getParent();
160 auto &Subtarget = MF.getSubtarget();
161 const auto &TII = Subtarget.getInstrInfo();
162 auto TRI = Subtarget.getRegisterInfo();
163 auto RBI = Subtarget.getRegBankInfo();
164 auto &MRI = MF.getRegInfo();
165
166 // The first step is to find the first and last FCMPs. If we have found
167 // at least two, then set the limit of the bottom-up walk to the first FCMP
168 // found since we're only interested in dealing with instructions between
169 // them.
170 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
171 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
172 if (MI.getOpcode() == AArch64::FCMPSrr ||
173 MI.getOpcode() == AArch64::FCMPDrr) {
174 if (!FirstCmp)
175 FirstCmp = &MI;
176 else
177 LastCmp = &MI;
178 }
179 }
180
181 // In addition to converting flag-setting ops in fcmp ranges into non-flag
182 // setting ops, across the whole basic block we also detect when nzcv
183 // implicit-defs are dead, and mark them as dead. Peephole optimizations need
184 // this information later.
185
187 LRU.addLiveOuts(MBB);
188 bool NZCVDead = LRU.available(AArch64::NZCV);
189 bool InsideCmpRange = false;
190 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
191 LRU.stepBackward(II);
192
193 if (LastCmp) { // There's a range present in this block.
194 // If we're inside an fcmp range, look for begin instruction.
195 if (InsideCmpRange && &II == FirstCmp)
196 InsideCmpRange = false;
197 else if (&II == LastCmp)
198 InsideCmpRange = true;
199 }
200
201 // Did this instruction define NZCV?
202 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
203 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
204 // If we have a def and NZCV is dead, then we may convert this op.
205 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
206 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
207 if (DeadNZCVIdx != -1) {
208 // If we're inside an fcmp range, then convert flag setting ops.
209 if (InsideCmpRange && NewOpc) {
210 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
211 "op in fcmp range: "
212 << II);
213 II.setDesc(TII->get(NewOpc));
214 II.removeOperand(DeadNZCVIdx);
215 // Changing the opcode can result in differing regclass requirements,
216 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
217 // Constrain the regclasses, possibly introducing a copy.
218 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
219 II.getOperand(0), 0);
220 Changed |= true;
221 } else {
222 // Otherwise, we just set the nzcv imp-def operand to be dead, so the
223 // peephole optimizations can optimize them further.
224 II.getOperand(DeadNZCVIdx).setIsDead();
225 }
226 }
227 }
228
229 NZCVDead = NZCVDeadAtCurrInstr;
230 }
231 return Changed;
232}
233
234bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
235 if (MF.getProperties().hasProperty(
236 MachineFunctionProperties::Property::FailedISel))
237 return false;
239 MachineFunctionProperties::Property::Selected) &&
240 "Expected a selected MF");
241
242 bool Changed = false;
243 for (auto &BB : MF) {
244 Changed |= optimizeNZCVDefs(BB);
245 Changed |= doPeepholeOpts(BB);
246 }
247 return Changed;
248}
249
250char AArch64PostSelectOptimize::ID = 0;
251INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
252 "Optimize AArch64 selected instructions",
253 false, false)
254INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
255 "Optimize AArch64 selected instructions", false,
256 false)
257
258namespace llvm {
260 return new AArch64PostSelectOptimize();
261}
262} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
unsigned getNonFlagSettingVariant(unsigned Opc)
Optimize AArch64 selected instructions
#define DEBUG_TYPE
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
reverse_iterator rend()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
reverse_iterator rbegin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
Definition: MachineInstr.h:68
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
bool hasSubClass(const TargetRegisterClass *RC) const
Return true if the specified TargetRegisterClass is a proper sub-class of this TargetRegisterClass.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAArch64PostSelectOptimize()
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:53
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
void initializeAArch64PostSelectOptimizePass(PassRegistry &)