LLVM 23.0.0git
AArch64PostSelectOptimize.cpp
Go to the documentation of this file.
1//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does post-instruction-selection optimizations in the GlobalISel
10// pipeline, before the rest of codegen runs.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
17#include "llvm/ADT/STLExtras.h"
23#include "llvm/Support/Debug.h"
25
26#define DEBUG_TYPE "aarch64-post-select-optimize"
27
28using namespace llvm;
29
30namespace {
31
32class AArch64PostSelectOptimizeLegacy : public MachineFunctionPass {
33public:
34 static char ID;
35
36 AArch64PostSelectOptimizeLegacy() : MachineFunctionPass(ID) {}
37
38 StringRef getPassName() const override {
39 return "AArch64 Post Select Optimizer";
40 }
41
42 bool runOnMachineFunction(MachineFunction &MF) override;
43
44 void getAnalysisUsage(AnalysisUsage &AU) const override;
45};
46} // end anonymous namespace
47
48void AArch64PostSelectOptimizeLegacy::getAnalysisUsage(
49 AnalysisUsage &AU) const {
50 AU.setPreservesCFG();
53}
54
55unsigned getNonFlagSettingVariant(unsigned Opc) {
56 switch (Opc) {
57 default:
58 return 0;
59 case AArch64::SUBSXrr:
60 return AArch64::SUBXrr;
61 case AArch64::SUBSWrr:
62 return AArch64::SUBWrr;
63 case AArch64::SUBSXrs:
64 return AArch64::SUBXrs;
65 case AArch64::SUBSWrs:
66 return AArch64::SUBWrs;
67 case AArch64::SUBSXri:
68 return AArch64::SUBXri;
69 case AArch64::SUBSWri:
70 return AArch64::SUBWri;
71 case AArch64::ADDSXrr:
72 return AArch64::ADDXrr;
73 case AArch64::ADDSWrr:
74 return AArch64::ADDWrr;
75 case AArch64::ADDSXrs:
76 return AArch64::ADDXrs;
77 case AArch64::ADDSWrs:
78 return AArch64::ADDWrs;
79 case AArch64::ADDSXri:
80 return AArch64::ADDXri;
81 case AArch64::ADDSWri:
82 return AArch64::ADDWri;
83 case AArch64::SBCSXr:
84 return AArch64::SBCXr;
85 case AArch64::SBCSWr:
86 return AArch64::SBCWr;
87 case AArch64::ADCSXr:
88 return AArch64::ADCXr;
89 case AArch64::ADCSWr:
90 return AArch64::ADCWr;
91 }
92}
93
94/// Look for cross regclass copies that can be trivially eliminated.
96 auto *MF = MI.getMF();
97 auto &MRI = MF->getRegInfo();
98
99 if (!MI.isCopy())
100 return false;
101
102 if (MI.getOperand(1).getSubReg())
103 return false; // Don't deal with subreg copies
104
105 Register Src = MI.getOperand(1).getReg();
106 Register Dst = MI.getOperand(0).getReg();
107
108 if (Src.isPhysical() || Dst.isPhysical())
109 return false;
110
111 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
112 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
113
114 if (SrcRC == DstRC)
115 return false;
116
117
118 if (SrcRC->hasSubClass(DstRC)) {
119 // This is the case where the source class is a superclass of the dest, so
120 // if the copy is the only user of the source, we can just constrain the
121 // source reg to the dest class.
122
123 if (!MRI.hasOneNonDBGUse(Src))
124 return false; // Only constrain single uses of the source.
125
126 // Constrain to dst reg class as long as it's not a weird class that only
127 // has a few registers.
128 if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
129 return false;
130 } else if (DstRC->hasSubClass(SrcRC)) {
131 // This is the inverse case, where the destination class is a superclass of
132 // the source. Here, if the copy is the only user, we can just constrain
133 // the user of the copy to use the smaller class of the source.
134 } else {
135 return false;
136 }
137
138 MRI.replaceRegWith(Dst, Src);
139 MI.eraseFromParent();
140 return true;
141}
142
144 if (!MI.isCopy())
145 return false;
146
147 auto *MF = MI.getMF();
148 auto &MRI = MF->getRegInfo();
149 auto *TII = MF->getSubtarget().getInstrInfo();
150
151 // Optimize COPY(y:GPR, DUP(x:FPR, i)) -> UMOV(y:GPR, x:FPR, i).
152 // Here Dst is y and Src is the result of DUP.
153 Register Dst = MI.getOperand(0).getReg();
154 Register Src = MI.getOperand(1).getReg();
155
156 if (!Dst.isVirtual() || !Src.isVirtual())
157 return false;
158
159 auto TryMatchDUP = [&](const TargetRegisterClass *GPRRegClass,
160 const TargetRegisterClass *FPRRegClass, unsigned DUP,
161 unsigned UMOV) {
162 if (MRI.getRegClassOrNull(Dst) != GPRRegClass ||
163 MRI.getRegClassOrNull(Src) != FPRRegClass)
164 return false;
165
166 // There is a special case when one of the uses is COPY(z:FPR, y:GPR).
167 // In this case, we get COPY(z:FPR, COPY(y:GPR, DUP(x:FPR, i))), which can
168 // be folded by peephole-opt into just DUP(z:FPR, i), so this transform is
169 // not worthwhile in that case.
170 for (auto &Use : MRI.use_nodbg_instructions(Dst)) {
171 if (!Use.isCopy())
172 continue;
173
174 Register UseOp0 = Use.getOperand(0).getReg();
175 Register UseOp1 = Use.getOperand(1).getReg();
176 if (UseOp0.isPhysical() || UseOp1.isPhysical())
177 return false;
178
179 if (MRI.getRegClassOrNull(UseOp0) == FPRRegClass &&
180 MRI.getRegClassOrNull(UseOp1) == GPRRegClass)
181 return false;
182 }
183
184 MachineInstr *SrcMI = MRI.getUniqueVRegDef(Src);
185 if (!SrcMI || SrcMI->getOpcode() != DUP || !MRI.hasOneNonDBGUse(Src))
186 return false;
187
188 Register DupSrc = SrcMI->getOperand(1).getReg();
189 int64_t DupImm = SrcMI->getOperand(2).getImm();
190
191 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(UMOV), Dst)
192 .addReg(DupSrc)
193 .addImm(DupImm);
194 SrcMI->eraseFromParent();
195 MI.eraseFromParent();
196 return true;
197 };
198
199 return TryMatchDUP(&AArch64::GPR32RegClass, &AArch64::FPR32RegClass,
200 AArch64::DUPi32, AArch64::UMOVvi32) ||
201 TryMatchDUP(&AArch64::GPR64RegClass, &AArch64::FPR64RegClass,
202 AArch64::DUPi64, AArch64::UMOVvi64);
203}
204
206 bool Changed = false;
207 for (auto &MI : make_early_inc_range(MBB)) {
208 bool CurrentIterChanged = foldSimpleCrossClassCopies(MI);
209 if (!CurrentIterChanged)
210 CurrentIterChanged |= foldCopyDup(MI);
211 Changed |= CurrentIterChanged;
212 }
213 return Changed;
214}
215
217 // If we find a dead NZCV implicit-def, we
218 // - try to convert the operation to a non-flag-setting equivalent
219 // - or mark the def as dead to aid later peephole optimizations.
220
221 // Use cases:
222 // 1)
223 // Consider the following code:
224 // FCMPSrr %0, %1, implicit-def $nzcv
225 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
226 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
227 // FCMPSrr %0, %1, implicit-def $nzcv
228 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
229 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
230 // when we have a single IR fcmp being used by two selects. During selection,
231 // to ensure that there can be no clobbering of nzcv between the fcmp and the
232 // csel, we have to generate an fcmp immediately before each csel is
233 // selected.
234 // However, often we can essentially CSE these together later in MachineCSE.
235 // This doesn't work though if there are unrelated flag-setting instructions
236 // in between the two FCMPs. In this case, the SUBS defines NZCV
237 // but it doesn't have any users, being overwritten by the second FCMP.
238 //
239 // 2)
240 // The instruction selector always emits the flag-setting variant of ADC/SBC
241 // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
242 // instructions is never used, we can switch to the non-flag-setting variant.
243
244 bool Changed = false;
245 auto &MF = *MBB.getParent();
246 auto &Subtarget = MF.getSubtarget();
247 const auto &TII = Subtarget.getInstrInfo();
248 auto TRI = Subtarget.getRegisterInfo();
249 auto RBI = Subtarget.getRegBankInfo();
250 auto &MRI = MF.getRegInfo();
251
252 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
253 LRU.addLiveOuts(MBB);
254
255 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
256 bool NZCVDead = LRU.available(AArch64::NZCV);
257 if (NZCVDead && II.definesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
258 // The instruction defines NZCV, but NZCV is dead.
259 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
260 int DeadNZCVIdx =
261 II.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
262 if (DeadNZCVIdx != -1) {
263 if (NewOpc) {
264 // If there is an equivalent non-flag-setting op, we convert.
265 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
266 "op: "
267 << II);
268 II.setDesc(TII->get(NewOpc));
269 II.removeOperand(DeadNZCVIdx);
270 // Changing the opcode can result in differing regclass requirements,
271 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
272 // Constrain the regclasses, possibly introducing a copy.
273 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
274 II.getOperand(0), 0);
275 Changed |= true;
276 } else {
277 // Otherwise, we just set the nzcv imp-def operand to be dead, so the
278 // peephole optimizations can optimize them further.
279 II.getOperand(DeadNZCVIdx).setIsDead();
280 }
281 }
282 }
283 LRU.stepBackward(II);
284 }
285 return Changed;
286}
287
289 if (MF.getProperties().hasFailedISel())
290 return false;
291 assert(MF.getProperties().hasSelected() && "Expected a selected MF");
292
293 bool Changed = false;
294 for (auto &BB : MF) {
296 Changed |= doPeepholeOpts(BB);
297 }
298 return Changed;
299}
300
301bool AArch64PostSelectOptimizeLegacy::runOnMachineFunction(
302 MachineFunction &MF) {
304}
305
306char AArch64PostSelectOptimizeLegacy::ID = 0;
307INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimizeLegacy, DEBUG_TYPE,
308 "Optimize AArch64 selected instructions", false, false)
309INITIALIZE_PASS_END(AArch64PostSelectOptimizeLegacy, DEBUG_TYPE,
310 "Optimize AArch64 selected instructions", false, false)
311
312namespace llvm {
314 return new AArch64PostSelectOptimizeLegacy();
315}
316
327} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
bool doPeepholeOpts(MachineBasicBlock &MBB)
bool foldCopyDup(MachineInstr &MI)
bool optimizeNZCVDefs(MachineBasicBlock &MBB)
unsigned getNonFlagSettingVariant(unsigned Opc)
bool runAArch64PostSelectOptimize(MachineFunction &MF)
bool foldSimpleCrossClassCopies(MachineInstr &MI)
Look for cross regclass copies that can be trivially eliminated.
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
#define LLVM_DEBUG(...)
Definition Debug.h:114
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool hasSubClass(const TargetRegisterClass *RC) const
Return true if the specified TargetRegisterClass is a proper sub-class of this TargetRegisterClass.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Changed
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PostSelectOptimize()
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1126