LLVM  16.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1 //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Uses profile information to split out cold blocks.
11 //
12 // This pass splits out cold machine basic blocks from the parent function. This
13 // implementation leverages the basic block section framework. Blocks marked
14 // cold by this pass are grouped together in a separate section prefixed with
15 // ".text.unlikely.*". The linker can then group these together as a cold
16 // section. The split part of the function is a contiguous region identified by
17 // the symbol "foo.cold". Grouping all cold blocks across functions together
18 // decreases fragmentation and improves icache and itlb utilization. Note that
19 // the overall changes to the binary size are negligible; only a small number of
20 // additional jump instructions may be introduced.
21 //
22 // For the original RFC of this pass please see
23 // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/InitializePasses.h"
38 
39 using namespace llvm;
40 
41 // FIXME: This cutoff value is CPU dependent and should be moved to
42 // TargetTransformInfo once we consider enabling this on other platforms.
43 // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
44 // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
45 // The default was empirically determined to be optimal when considering cutoff
46 // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
47 // Intel CPUs.
48 static cl::opt<unsigned>
49  PercentileCutoff("mfs-psi-cutoff",
50  cl::desc("Percentile profile summary cutoff used to "
51  "determine cold blocks. Unused if set to zero."),
52  cl::init(999950), cl::Hidden);
53 
55  "mfs-count-threshold",
56  cl::desc(
57  "Minimum number of times a block must be executed to be retained."),
58  cl::init(1), cl::Hidden);
59 
61  "mfs-split-ehcode",
62  cl::desc("Splits all EH code and it's descendants by default."),
63  cl::init(false), cl::Hidden);
64 
65 namespace {
66 
67 class MachineFunctionSplitter : public MachineFunctionPass {
68 public:
69  static char ID;
70  MachineFunctionSplitter() : MachineFunctionPass(ID) {
72  }
73 
74  StringRef getPassName() const override {
75  return "Machine Function Splitter Transformation";
76  }
77 
78  void getAnalysisUsage(AnalysisUsage &AU) const override;
79 
80  bool runOnMachineFunction(MachineFunction &F) override;
81 };
82 } // end anonymous namespace
83 
84 /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
85 /// only by EH pad as cold. This will help mark EH pads statically cold instead
86 /// of relying on profile data.
87 static void
89  MachineFunction &MF) {
90  MachineBasicBlock *StartBlock = &MF.front();
91  // A block can be unknown if its not reachable from anywhere
92  // EH if its only reachable from start blocks via some path through EH pads
93  // NonEH if it's reachable from Non EH blocks as well.
94  enum Status { Unknown = 0, EH = 1, NonEH = 2 };
97 
98  auto getStatus = [&](MachineBasicBlock *MBB) {
99  if (Statuses.find(MBB) != Statuses.end())
100  return Statuses[MBB];
101  else
102  return Unknown;
103  };
104 
105  auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
106  for (auto *PredMBB : MBB->predecessors()) {
107  Status PredStatus = getStatus(PredMBB);
108  // If status of predecessor block has gone above current block
109  // we update current blocks status.
110  if (PredStatus > Stat)
111  Stat = PredStatus;
112  }
113  return Stat;
114  };
115 
116  auto addSuccesors = [&](MachineBasicBlock *MBB) {
117  for (auto *SuccMBB : MBB->successors()) {
118  if (!SuccMBB->isEHPad())
119  WorkList.insert(SuccMBB);
120  }
121  };
122 
123  // Insert the successors of start block
124  // and landing pads successor.
125  Statuses[StartBlock] = NonEH;
126  addSuccesors(StartBlock);
127  for (auto *LP : EHBlocks) {
128  addSuccesors(LP);
129  Statuses[LP] = EH;
130  }
131 
132  // Worklist iterative algorithm.
133  while (!WorkList.empty()) {
134  auto *MBB = *WorkList.begin();
135  WorkList.erase(MBB);
136 
137  Status OldStatus = getStatus(MBB);
138 
139  // Check on predecessors and check for
140  // Status update.
141  Status NewStatus = checkPredecessors(MBB, OldStatus);
142 
143  // Did the block status change?
144  bool changed = OldStatus != NewStatus;
145  if (changed) {
146  addSuccesors(MBB);
147  Statuses[MBB] = NewStatus;
148  }
149  }
150 
151  for (auto Entry : Statuses) {
152  if (Entry.second == EH)
153  Entry.first->setSectionID(MBBSectionID::ColdSectionID);
154  }
155 }
156 
157 static bool isColdBlock(const MachineBasicBlock &MBB,
158  const MachineBlockFrequencyInfo *MBFI,
159  ProfileSummaryInfo *PSI) {
161  if (!Count)
162  return true;
163 
164  if (PercentileCutoff > 0) {
165  return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
166  }
167  return (*Count < ColdCountThreshold);
168 }
169 
170 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
171  // We target functions with profile data. Static information in the form
172  // of exception handling code may be split to cold if user passes the
173  // mfs-split-ehcode flag.
174  bool UseProfileData = MF.getFunction().hasProfileData();
175  if (!UseProfileData && !SplitAllEHCode)
176  return false;
177 
178  // TODO: We don't split functions where a section attribute has been set
179  // since the split part may not be placed in a contiguous region. It may also
180  // be more beneficial to augment the linker to ensure contiguous layout of
181  // split functions within the same section as specified by the attribute.
182  if (MF.getFunction().hasSection() ||
183  MF.getFunction().hasFnAttribute("implicit-section-name"))
184  return false;
185 
186  // We don't want to proceed further for cold functions
187  // or functions of unknown hotness. Lukewarm functions have no prefix.
188  Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
189  if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
190  SectionPrefix.value().equals("unknown"))) {
191  return false;
192  }
193 
194  // Renumbering blocks here preserves the order of the blocks as
195  // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
196  // blocks. Preserving the order of blocks is essential to retaining decisions
197  // made by prior passes such as MachineBlockPlacement.
198  MF.RenumberBlocks();
200 
201  MachineBlockFrequencyInfo *MBFI = nullptr;
202  ProfileSummaryInfo *PSI = nullptr;
203  if (UseProfileData) {
204  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
205  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
206  }
207 
209  for (auto &MBB : MF) {
210  if (MBB.isEntryBlock())
211  continue;
212 
213  if (MBB.isEHPad())
214  LandingPads.push_back(&MBB);
215  else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
217  }
218 
219  // Split all EH code and it's descendant statically by default.
220  if (SplitAllEHCode)
221  setDescendantEHBlocksCold(LandingPads, MF);
222  // We only split out eh pads if all of them are cold.
223  else {
224  bool HasHotLandingPads = false;
225  for (const MachineBasicBlock *LP : LandingPads) {
226  if (!isColdBlock(*LP, MBFI, PSI))
227  HasHotLandingPads = true;
228  }
229  if (!HasHotLandingPads) {
230  for (MachineBasicBlock *LP : LandingPads)
231  LP->setSectionID(MBBSectionID::ColdSectionID);
232  }
233  }
234  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
235  return X.getSectionID().Type < Y.getSectionID().Type;
236  };
239  return true;
240 }
241 
242 void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
246 }
247 
249 INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
250  "Split machine functions using profile information", false,
251  false)
252 
254  return new MachineFunctionSplitter();
255 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::sortBasicBlocksAndUpdateBranches
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
Definition: BasicBlockSections.cpp:250
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::createMachineFunctionSplitterPass
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
llvm::MachineBlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
Definition: MachineBlockFrequencyInfo.cpp:234
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MachineBasicBlock::isEntryBlock
bool isEntryBlock() const
Returns true if this is the entry block of the function.
Definition: MachineBasicBlock.cpp:287
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::Optional< uint64_t >
INITIALIZE_PASS
INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", "Split machine functions using profile information", false, false) MachineFunctionPass *llvm
Definition: MachineFunctionSplitter.cpp:249
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::avoidZeroOffsetLandingPad
void avoidZeroOffsetLandingPad(MachineFunction &MF)
Definition: BasicBlockSections.cpp:272
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MachineBasicBlock::setSectionID
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
Definition: MachineBasicBlock.h:633
CommandLine.h
llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:866
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::MachineBlockFrequencyInfo
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Definition: MachineBlockFrequencyInfo.h:33
llvm::initializeMachineFunctionSplitterPass
void initializeMachineFunctionSplitterPass(PassRegistry &)
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
llvm::Function::getSectionPrefix
Optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:2094
setDescendantEHBlocksCold
static void setDescendantEHBlocksCold(SmallVectorImpl< MachineBasicBlock * > &EHBlocks, MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
Definition: MachineFunctionSplitter.cpp:88
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
SplitAllEHCode
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
llvm::StringRef::equals
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Passes.h
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:629
llvm::cl::opt
Definition: CommandLine.h:1412
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:103
PercentileCutoff
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
ProfileSummaryInfo.h
llvm::MachineModuleInfoWrapperPass
Definition: MachineModuleInfo.h:214
llvm::BasicBlockSection::Preset
@ Preset
BasicBlockSectionUtils.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::empty
bool empty() const
Definition: DenseSet.h:80
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
MachineFunctionPass.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::begin
iterator begin()
Definition: DenseSet.h:173
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
isColdBlock
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
Definition: MachineFunctionSplitter.cpp:157
MachineModuleInfo.h
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:386
Status
Definition: SIModeRegister.cpp:29
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:392
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:576
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::MBBSectionID::ColdSectionID
const static MBBSectionID ColdSectionID
Definition: MachineBasicBlock.h:63
Function.h
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
SmallVector.h
llvm::Optional::value
constexpr const T & value() const &
Definition: Optional.h:281
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:413
MachineFunction.h
llvm::MachineFunction::setBBSectionsType
void setBBSectionsType(BasicBlockSection V)
Definition: MachineFunction.h:646
InitializePasses.h
MachineBlockFrequencyInfo.h
llvm::MachineFunction::RenumberBlocks
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
Definition: MachineFunction.cpp:319
llvm::ProfileSummaryInfo::isColdCountNthPercentile
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
Definition: ProfileSummaryInfo.cpp:314