LLVM  14.0.0git
X86PadShortFunction.cpp
Go to the documentation of this file.
1 //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the pass which will pad short functions to prevent
10 // a stall if a function returns before the return address is ready. This
11 // is needed for some Intel Atom processors.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "X86.h"
17 #include "X86InstrInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/CodeGen/Passes.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Support/Debug.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "x86-pad-short-functions"
34 
35 STATISTIC(NumBBsPadded, "Number of basic blocks padded");
36 
37 namespace {
38  struct VisitedBBInfo {
39  // HasReturn - Whether the BB contains a return instruction
40  bool HasReturn;
41 
42  // Cycles - Number of cycles until return if HasReturn is true, otherwise
43  // number of cycles until end of the BB
44  unsigned int Cycles;
45 
46  VisitedBBInfo() : HasReturn(false), Cycles(0) {}
47  VisitedBBInfo(bool HasReturn, unsigned int Cycles)
48  : HasReturn(HasReturn), Cycles(Cycles) {}
49  };
50 
51  struct PadShortFunc : public MachineFunctionPass {
52  static char ID;
53  PadShortFunc() : MachineFunctionPass(ID)
54  , Threshold(4) {}
55 
56  bool runOnMachineFunction(MachineFunction &MF) override;
57 
58  void getAnalysisUsage(AnalysisUsage &AU) const override {
63  }
64 
65  MachineFunctionProperties getRequiredProperties() const override {
68  }
69 
70  StringRef getPassName() const override {
71  return "X86 Atom pad short functions";
72  }
73 
74  private:
75  void findReturns(MachineBasicBlock *MBB,
76  unsigned int Cycles = 0);
77 
78  bool cyclesUntilReturn(MachineBasicBlock *MBB,
79  unsigned int &Cycles);
80 
83  unsigned int NOOPsToAdd);
84 
85  const unsigned int Threshold;
86 
87  // ReturnBBs - Maps basic blocks that return to the minimum number of
88  // cycles until the return, starting from the entry block.
90 
91  // VisitedBBs - Cache of previously visited BBs.
93 
94  TargetSchedModel TSM;
95  };
96 
97  char PadShortFunc::ID = 0;
98 }
99 
101  return new PadShortFunc();
102 }
103 
104 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
105 /// NOOP instructions before early exits.
106 bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
107  if (skipFunction(MF.getFunction()))
108  return false;
109 
110  if (MF.getFunction().hasOptSize())
111  return false;
112 
114  return false;
115 
116  TSM.init(&MF.getSubtarget());
117 
118  auto *PSI =
119  &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
120  auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
121  &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
122  nullptr;
123 
124  // Search through basic blocks and mark the ones that have early returns
125  ReturnBBs.clear();
126  VisitedBBs.clear();
127  findReturns(&MF.front());
128 
129  bool MadeChange = false;
130 
131  // Pad the identified basic blocks with NOOPs
133  I != ReturnBBs.end(); ++I) {
134  MachineBasicBlock *MBB = I->first;
135  unsigned Cycles = I->second;
136 
137  // Function::hasOptSize is already checked above.
138  bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
139  if (OptForSize)
140  continue;
141 
142  if (Cycles < Threshold) {
143  // BB ends in a return. Skip over any DBG_VALUE instructions
144  // trailing the terminator.
145  assert(MBB->size() > 0 &&
146  "Basic block should contain at least a RET but is empty");
147  MachineBasicBlock::iterator ReturnLoc = --MBB->end();
148 
149  while (ReturnLoc->isDebugInstr())
150  --ReturnLoc;
151  assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
152  "Basic block does not end with RET");
153 
154  addPadding(MBB, ReturnLoc, Threshold - Cycles);
155  NumBBsPadded++;
156  MadeChange = true;
157  }
158  }
159 
160  return MadeChange;
161 }
162 
163 /// findReturn - Starting at MBB, follow control flow and add all
164 /// basic blocks that contain a return to ReturnBBs.
165 void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
166  // If this BB has a return, note how many cycles it takes to get there.
167  bool hasReturn = cyclesUntilReturn(MBB, Cycles);
168  if (Cycles >= Threshold)
169  return;
170 
171  if (hasReturn) {
172  ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
173  return;
174  }
175 
176  // Follow branches in BB and look for returns
177  for (MachineBasicBlock *Succ : MBB->successors())
178  if (Succ != MBB)
179  findReturns(Succ, Cycles);
180 }
181 
182 /// cyclesUntilReturn - return true if the MBB has a return instruction,
183 /// and return false otherwise.
184 /// Cycles will be incremented by the number of cycles taken to reach the
185 /// return or the end of the BB, whichever occurs first.
186 bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
187  unsigned int &Cycles) {
188  // Return cached result if BB was previously visited
190  = VisitedBBs.find(MBB);
191  if (it != VisitedBBs.end()) {
192  VisitedBBInfo BBInfo = it->second;
193  Cycles += BBInfo.Cycles;
194  return BBInfo.HasReturn;
195  }
196 
197  unsigned int CyclesToEnd = 0;
198 
199  for (MachineInstr &MI : *MBB) {
200  // Mark basic blocks with a return instruction. Calls to other
201  // functions do not count because the called function will be padded,
202  // if necessary.
203  if (MI.isReturn() && !MI.isCall()) {
204  VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
205  Cycles += CyclesToEnd;
206  return true;
207  }
208 
209  CyclesToEnd += TSM.computeInstrLatency(&MI);
210  }
211 
212  VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
213  Cycles += CyclesToEnd;
214  return false;
215 }
216 
217 /// addPadding - Add the given number of NOOP instructions to the function
218 /// just prior to the return at MBBI
221  unsigned int NOOPsToAdd) {
222  const DebugLoc &DL = MBBI->getDebugLoc();
223  unsigned IssueWidth = TSM.getIssueWidth();
224 
225  for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
226  BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
227 }
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::createX86PadShortFunctions
FunctionPass * createX86PadShortFunctions()
Return a pass that pads short functions with NOOPs.
Definition: X86PadShortFunction.cpp:100
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
X86Subtarget.h
Statistic.h
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineSizeOpts.h
llvm::MachineFunctionProperties
Properties which a MachineFunction may have at a given point in time.
Definition: MachineFunction.h:111
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:828
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
X86.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::MachineFunctionProperties::set
MachineFunctionProperties & set(Property P)
Definition: MachineFunction.h:173
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::MachineFunctionProperties::Property::NoVRegs
@ NoVRegs
Passes.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
LazyMachineBlockFrequencyInfo.h
===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*–===//
TargetSchedule.h
addPadding
static void addPadding(BinaryStreamWriter &Writer)
Definition: ContinuationRecordBuilder.cpp:21
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
ProfileSummaryInfo.h
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
llvm::LazyMachineBlockFrequencyInfoPass
This is an alternative analysis pass to MachineBlockFrequencyInfo.
Definition: LazyMachineBlockFrequencyInfo.h:37
llvm::MachineFunction
Definition: MachineFunction.h:234
llvm::X86Subtarget::padShortFunctions
bool padShortFunctions() const
Definition: X86Subtarget.h:745
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition: AArch64SLSHardening.cpp:75
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:661
hasReturn
static bool hasReturn(const MachineBasicBlock &MBB)
Returns true if MBB contains an instruction that returns.
Definition: HexagonFrameLowering.cpp:344
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:600
Function.h
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
raw_ostream.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38