LLVM  14.0.0git
AArch64PostSelectOptimize.cpp
Go to the documentation of this file.
1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does post-instruction-selection optimizations in the GlobalISel
10 // pipeline, before the rest of codegen runs.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64.h"
15 #include "AArch64TargetMachine.h"
22 #include "llvm/Support/Debug.h"
23 
24 #define DEBUG_TYPE "aarch64-post-select-optimize"
25 
26 using namespace llvm;
27 
28 namespace {
29 class AArch64PostSelectOptimize : public MachineFunctionPass {
30 public:
31  static char ID;
32 
33  AArch64PostSelectOptimize();
34 
35  StringRef getPassName() const override {
36  return "AArch64 Post Select Optimizer";
37  }
38 
39  bool runOnMachineFunction(MachineFunction &MF) override;
40 
41  void getAnalysisUsage(AnalysisUsage &AU) const override;
42 
43 private:
44  bool optimizeNZCVDefs(MachineBasicBlock &MBB);
45 };
46 } // end anonymous namespace
47 
48 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
50  AU.setPreservesCFG();
53 }
54 
55 AArch64PostSelectOptimize::AArch64PostSelectOptimize()
57  initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
58 }
59 
60 unsigned getNonFlagSettingVariant(unsigned Opc) {
61  switch (Opc) {
62  default:
63  return 0;
64  case AArch64::SUBSXrr:
65  return AArch64::SUBXrr;
66  case AArch64::SUBSWrr:
67  return AArch64::SUBWrr;
68  case AArch64::SUBSXrs:
69  return AArch64::SUBXrs;
70  case AArch64::SUBSXri:
71  return AArch64::SUBXri;
72  case AArch64::SUBSWri:
73  return AArch64::SUBWri;
74  }
75 }
76 
77 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
78  // Consider the following code:
79  // FCMPSrr %0, %1, implicit-def $nzcv
80  // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
81  // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
82  // FCMPSrr %0, %1, implicit-def $nzcv
83  // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
84  // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
85  // when we have a single IR fcmp being used by two selects. During selection,
86  // to ensure that there can be no clobbering of nzcv between the fcmp and the
87  // csel, we have to generate an fcmp immediately before each csel is
88  // selected.
89  // However, often we can essentially CSE these together later in MachineCSE.
90  // This doesn't work though if there are unrelated flag-setting instructions
91  // in between the two FCMPs. In this case, the SUBS defines NZCV
92  // but it doesn't have any users, being overwritten by the second FCMP.
93  //
94  // Our solution here is to try to convert flag setting operations between
95  // a interval of identical FCMPs, so that CSE will be able to eliminate one.
96  bool Changed = false;
97  auto &MF = *MBB.getParent();
98  auto &Subtarget = MF.getSubtarget();
99  const auto &TII = Subtarget.getInstrInfo();
100  auto TRI = Subtarget.getRegisterInfo();
101  auto RBI = Subtarget.getRegBankInfo();
102  auto &MRI = MF.getRegInfo();
103 
104  // The first step is to find the first and last FCMPs. If we have found
105  // at least two, then set the limit of the bottom-up walk to the first FCMP
106  // found since we're only interested in dealing with instructions between
107  // them.
108  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
109  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
110  if (MI.getOpcode() == AArch64::FCMPSrr ||
111  MI.getOpcode() == AArch64::FCMPDrr) {
112  if (!FirstCmp)
113  FirstCmp = &MI;
114  else
115  LastCmp = &MI;
116  }
117  }
118 
119  // In addition to converting flag-setting ops in fcmp ranges into non-flag
120  // setting ops, across the whole basic block we also detect when nzcv
121  // implicit-defs are dead, and mark them as dead. Peephole optimizations need
122  // this information later.
123 
125  LRU.addLiveOuts(MBB);
126  bool NZCVDead = LRU.available(AArch64::NZCV);
127  bool InsideCmpRange = false;
128  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
129  LRU.stepBackward(II);
130 
131  if (LastCmp) { // There's a range present in this block.
132  // If we're inside an fcmp range, look for begin instruction.
133  if (InsideCmpRange && &II == FirstCmp)
134  InsideCmpRange = false;
135  else if (&II == LastCmp)
136  InsideCmpRange = true;
137  }
138 
139  // Did this instruction define NZCV?
140  bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
141  if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
142  // If we have a def and NZCV is dead, then we may convert this op.
143  unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
144  int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
145  if (DeadNZCVIdx != -1) {
146  // If we're inside an fcmp range, then convert flag setting ops.
147  if (InsideCmpRange && NewOpc) {
148  LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
149  "op in fcmp range: "
150  << II);
151  II.setDesc(TII->get(NewOpc));
152  II.RemoveOperand(DeadNZCVIdx);
153  // Changing the opcode can result in differing regclass requirements,
154  // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
155  // Constrain the regclasses, possibly introducing a copy.
156  constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
157  II.getOperand(0), 0);
158  Changed |= true;
159  } else {
160  // Otherwise, we just set the nzcv imp-def operand to be dead, so the
161  // peephole optimizations can optimize them further.
162  II.getOperand(DeadNZCVIdx).setIsDead();
163  }
164  }
165  }
166 
167  NZCVDead = NZCVDeadAtCurrInstr;
168  }
169  return Changed;
170 }
171 
172 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
173  if (MF.getProperties().hasProperty(
174  MachineFunctionProperties::Property::FailedISel))
175  return false;
177  MachineFunctionProperties::Property::Selected) &&
178  "Expected a selected MF");
179 
180  bool Changed = false;
181  for (auto &BB : MF)
182  Changed |= optimizeNZCVDefs(BB);
183  return Changed;
184 }
185 
187 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
188  "Optimize AArch64 selected instructions",
189  false, false)
190 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
191  "Optimize AArch64 selected instructions", false,
192  false)
193 
194 namespace llvm {
196  return new AArch64PostSelectOptimize();
197 }
198 } // end namespace llvm
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:169
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64PostSelectOptimize.cpp:24
AArch64.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::createAArch64PostSelectOptimize
FunctionPass * createAArch64PostSelectOptimize()
Definition: AArch64PostSelectOptimize.cpp:195
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:863
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, "Optimize AArch64 selected instructions", false, false) INITIALIZE_PASS_END(AArch64PostSelectOptimize
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
AArch64TargetMachine.h
instructions
Optimize AArch64 selected instructions
Definition: AArch64PostSelectOptimize.cpp:191
getNonFlagSettingVariant
unsigned getNonFlagSettingVariant(unsigned Opc)
Definition: AArch64PostSelectOptimize.cpp:60
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
Utils.h
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:725
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:278
llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::initializeAArch64PostSelectOptimizePass
void initializeAArch64PostSelectOptimizePass(PassRegistry &)
TargetPassConfig.h
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
AArch64MCTargetDesc.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::constrainOperandRegClass
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition: Utils.cpp:50
MachineOperand.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition: MachineBasicBlock.h:1244
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37