LLVM  16.0.0git
AArch64PostSelectOptimize.cpp
Go to the documentation of this file.
1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does post-instruction-selection optimizations in the GlobalISel
10 // pipeline, before the rest of codegen runs.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64.h"
15 #include "AArch64TargetMachine.h"
17 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "aarch64-post-select-optimize"
28 
29 using namespace llvm;
30 
31 namespace {
32 class AArch64PostSelectOptimize : public MachineFunctionPass {
33 public:
34  static char ID;
35 
36  AArch64PostSelectOptimize();
37 
38  StringRef getPassName() const override {
39  return "AArch64 Post Select Optimizer";
40  }
41 
42  bool runOnMachineFunction(MachineFunction &MF) override;
43 
44  void getAnalysisUsage(AnalysisUsage &AU) const override;
45 
46 private:
47  bool optimizeNZCVDefs(MachineBasicBlock &MBB);
48  bool doPeepholeOpts(MachineBasicBlock &MBB);
49  /// Look for cross regclass copies that can be trivially eliminated.
50  bool foldSimpleCrossClassCopies(MachineInstr &MI);
51 };
52 } // end anonymous namespace
53 
54 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
56  AU.setPreservesCFG();
59 }
60 
61 AArch64PostSelectOptimize::AArch64PostSelectOptimize()
63  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
64 }
65 
66 unsigned getNonFlagSettingVariant(unsigned Opc) {
67  switch (Opc) {
68  default:
69  return 0;
70  case AArch64::SUBSXrr:
71  return AArch64::SUBXrr;
72  case AArch64::SUBSWrr:
73  return AArch64::SUBWrr;
74  case AArch64::SUBSXrs:
75  return AArch64::SUBXrs;
76  case AArch64::SUBSXri:
77  return AArch64::SUBXri;
78  case AArch64::SUBSWri:
79  return AArch64::SUBWri;
80  }
81 }
82 
83 bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
84  bool Changed = false;
85  for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
86  Changed |= foldSimpleCrossClassCopies(MI);
87  }
88  return Changed;
89 }
90 
91 bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
92  auto *MF = MI.getMF();
93  auto &MRI = MF->getRegInfo();
94 
95  if (!MI.isCopy())
96  return false;
97 
98  if (MI.getOperand(1).getSubReg())
99  return false; // Don't deal with subreg copies
100 
101  Register Src = MI.getOperand(1).getReg();
102  Register Dst = MI.getOperand(0).getReg();
103 
104  if (Src.isPhysical() || Dst.isPhysical())
105  return false;
106 
107  const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
108  const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
109 
110  if (SrcRC == DstRC)
111  return false;
112 
113 
114  if (SrcRC->hasSubClass(DstRC)) {
115  // This is the case where the source class is a superclass of the dest, so
116  // if the copy is the only user of the source, we can just constrain the
117  // source reg to the dest class.
118 
119  if (!MRI.hasOneNonDBGUse(Src))
120  return false; // Only constrain single uses of the source.
121 
122  // Constrain to dst reg class as long as it's not a weird class that only
123  // has a few registers.
124  if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
125  return false;
126  } else if (DstRC->hasSubClass(SrcRC)) {
127  // This is the inverse case, where the destination class is a superclass of
128  // the source. Here, if the copy is the only user, we can just constrain
129  // the user of the copy to use the smaller class of the source.
130  } else {
131  return false;
132  }
133 
134  MRI.replaceRegWith(Dst, Src);
135  MI.eraseFromParent();
136  return true;
137 }
138 
139 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
140  // Consider the following code:
141  // FCMPSrr %0, %1, implicit-def $nzcv
142  // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
143  // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
144  // FCMPSrr %0, %1, implicit-def $nzcv
145  // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
146  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
147  // when we have a single IR fcmp being used by two selects. During selection,
148  // to ensure that there can be no clobbering of nzcv between the fcmp and the
149  // csel, we have to generate an fcmp immediately before each csel is
150  // selected.
151  // However, often we can essentially CSE these together later in MachineCSE.
152  // This doesn't work though if there are unrelated flag-setting instructions
153  // in between the two FCMPs. In this case, the SUBS defines NZCV
154  // but it doesn't have any users, being overwritten by the second FCMP.
155  //
156  // Our solution here is to try to convert flag setting operations between
157  // a interval of identical FCMPs, so that CSE will be able to eliminate one.
158  bool Changed = false;
159  auto &MF = *MBB.getParent();
160  auto &Subtarget = MF.getSubtarget();
161  const auto &TII = Subtarget.getInstrInfo();
162  auto TRI = Subtarget.getRegisterInfo();
163  auto RBI = Subtarget.getRegBankInfo();
164  auto &MRI = MF.getRegInfo();
165 
166  // The first step is to find the first and last FCMPs. If we have found
167  // at least two, then set the limit of the bottom-up walk to the first FCMP
168  // found since we're only interested in dealing with instructions between
169  // them.
170  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
171  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
172  if (MI.getOpcode() == AArch64::FCMPSrr ||
173  MI.getOpcode() == AArch64::FCMPDrr) {
174  if (!FirstCmp)
175  FirstCmp = &MI;
176  else
177  LastCmp = &MI;
178  }
179  }
180 
181  // In addition to converting flag-setting ops in fcmp ranges into non-flag
182  // setting ops, across the whole basic block we also detect when nzcv
183  // implicit-defs are dead, and mark them as dead. Peephole optimizations need
184  // this information later.
185 
187  LRU.addLiveOuts(MBB);
188  bool NZCVDead = LRU.available(AArch64::NZCV);
189  bool InsideCmpRange = false;
190  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
191  LRU.stepBackward(II);
192 
193  if (LastCmp) { // There's a range present in this block.
194  // If we're inside an fcmp range, look for begin instruction.
195  if (InsideCmpRange && &II == FirstCmp)
196  InsideCmpRange = false;
197  else if (&II == LastCmp)
198  InsideCmpRange = true;
199  }
200 
201  // Did this instruction define NZCV?
202  bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
203  if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
204  // If we have a def and NZCV is dead, then we may convert this op.
205  unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
206  int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
207  if (DeadNZCVIdx != -1) {
208  // If we're inside an fcmp range, then convert flag setting ops.
209  if (InsideCmpRange && NewOpc) {
210  LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
211  "op in fcmp range: "
212  << II);
213  II.setDesc(TII->get(NewOpc));
214  II.removeOperand(DeadNZCVIdx);
215  // Changing the opcode can result in differing regclass requirements,
216  // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
217  // Constrain the regclasses, possibly introducing a copy.
218  constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
219  II.getOperand(0), 0);
220  Changed |= true;
221  } else {
222  // Otherwise, we just set the nzcv imp-def operand to be dead, so the
223  // peephole optimizations can optimize them further.
224  II.getOperand(DeadNZCVIdx).setIsDead();
225  }
226  }
227  }
228 
229  NZCVDead = NZCVDeadAtCurrInstr;
230  }
231  return Changed;
232 }
233 
234 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
235  if (MF.getProperties().hasProperty(
236  MachineFunctionProperties::Property::FailedISel))
237  return false;
239  MachineFunctionProperties::Property::Selected) &&
240  "Expected a selected MF");
241 
242  bool Changed = false;
243  for (auto &BB : MF) {
244  Changed |= optimizeNZCVDefs(BB);
245  Changed |= doPeepholeOpts(BB);
246  }
247  return Changed;
248 }
249 
251 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
252  "Optimize AArch64 selected instructions",
253  false, false)
254 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
255  "Optimize AArch64 selected instructions", false,
256  false)
257 
258 namespace llvm {
260  return new AArch64PostSelectOptimize();
261 }
262 } // end namespace llvm
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:192
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64PostSelectOptimize.cpp:27
AArch64.h
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
ErrorHandling.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::createAArch64PostSelectOptimize
FunctionPass * createAArch64PostSelectOptimize()
Definition: AArch64PostSelectOptimize.cpp:259
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, "Optimize AArch64 selected instructions", false, false) INITIALIZE_PASS_END(AArch64PostSelectOptimize
STLExtras.h
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AArch64TargetMachine.h
instructions
Optimize AArch64 selected instructions
Definition: AArch64PostSelectOptimize.cpp:255
getNonFlagSettingVariant
unsigned getNonFlagSettingVariant(unsigned Opc)
Definition: AArch64PostSelectOptimize.cpp:66
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Utils.h
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:748
false
Definition: StackSlotColoring.cpp:141
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:315
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::initializeAArch64PostSelectOptimizePass
void initializeAArch64PostSelectOptimizePass(PassRegistry &)
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
TargetPassConfig.h
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:309
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:378
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
AArch64MCTargetDesc.h
llvm::TargetRegisterClass::hasSubClass
bool hasSubClass(const TargetRegisterClass *RC) const
Return true if the specified TargetRegisterClass is a proper sub-class of this TargetRegisterClass.
Definition: TargetRegisterInfo.h:125
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:54
llvm::MachineRegisterInfo::constrainRegClass
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
Definition: MachineRegisterInfo.cpp:82
MachineOperand.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1303