LLVM  15.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1 //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Uses profile information to split out cold blocks.
11 //
12 // This pass splits out cold machine basic blocks from the parent function. This
13 // implementation leverages the basic block section framework. Blocks marked
14 // cold by this pass are grouped together in a separate section prefixed with
15 // ".text.unlikely.*". The linker can then group these together as a cold
16 // section. The split part of the function is a contiguous region identified by
17 // the symbol "foo.cold". Grouping all cold blocks across functions together
18 // decreases fragmentation and improves icache and itlb utilization. Note that
19 // the overall changes to the binary size are negligible; only a small number of
20 // additional jump instructions may be introduced.
21 //
22 // For the original RFC of this pass please see
23 // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/InitializePasses.h"
38 
39 using namespace llvm;
40 
41 // FIXME: This cutoff value is CPU dependent and should be moved to
42 // TargetTransformInfo once we consider enabling this on other platforms.
43 // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
44 // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
45 // The default was empirically determined to be optimal when considering cutoff
46 // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
47 // Intel CPUs.
48 static cl::opt<unsigned>
49  PercentileCutoff("mfs-psi-cutoff",
50  cl::desc("Percentile profile summary cutoff used to "
51  "determine cold blocks. Unused if set to zero."),
52  cl::init(999950), cl::Hidden);
53 
55  "mfs-count-threshold",
56  cl::desc(
57  "Minimum number of times a block must be executed to be retained."),
58  cl::init(1), cl::Hidden);
59 
60 namespace {
61 
62 class MachineFunctionSplitter : public MachineFunctionPass {
63 public:
64  static char ID;
65  MachineFunctionSplitter() : MachineFunctionPass(ID) {
67  }
68 
69  StringRef getPassName() const override {
70  return "Machine Function Splitter Transformation";
71  }
72 
73  void getAnalysisUsage(AnalysisUsage &AU) const override;
74 
75  bool runOnMachineFunction(MachineFunction &F) override;
76 };
77 } // end anonymous namespace
78 
79 static bool isColdBlock(const MachineBasicBlock &MBB,
80  const MachineBlockFrequencyInfo *MBFI,
81  ProfileSummaryInfo *PSI) {
83  if (!Count)
84  return true;
85 
86  if (PercentileCutoff > 0) {
87  return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
88  }
89  return (*Count < ColdCountThreshold);
90 }
91 
92 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
93  // TODO: We only target functions with profile data. Static information may
94  // also be considered but we don't see performance improvements yet.
95  if (!MF.getFunction().hasProfileData())
96  return false;
97 
98  // TODO: We don't split functions where a section attribute has been set
99  // since the split part may not be placed in a contiguous region. It may also
100  // be more beneficial to augment the linker to ensure contiguous layout of
101  // split functions within the same section as specified by the attribute.
102  if (MF.getFunction().hasSection() ||
103  MF.getFunction().hasFnAttribute("implicit-section-name"))
104  return false;
105 
106  // We don't want to proceed further for cold functions
107  // or functions of unknown hotness. Lukewarm functions have no prefix.
108  Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
109  if (SectionPrefix.hasValue() &&
110  (SectionPrefix.getValue().equals("unlikely") ||
111  SectionPrefix.getValue().equals("unknown"))) {
112  return false;
113  }
114 
115  // Renumbering blocks here preserves the order of the blocks as
116  // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
117  // blocks. Preserving the order of blocks is essential to retaining decisions
118  // made by prior passes such as MachineBlockPlacement.
119  MF.RenumberBlocks();
121  auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
122  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
123 
125  for (auto &MBB : MF) {
126  if (MBB.isEntryBlock())
127  continue;
128 
129  if (MBB.isEHPad())
130  LandingPads.push_back(&MBB);
131  else if (isColdBlock(MBB, MBFI, PSI))
133  }
134 
135  // We only split out eh pads if all of them are cold.
136  bool HasHotLandingPads = false;
137  for (const MachineBasicBlock *LP : LandingPads) {
138  if (!isColdBlock(*LP, MBFI, PSI))
139  HasHotLandingPads = true;
140  }
141  if (!HasHotLandingPads) {
142  for (MachineBasicBlock *LP : LandingPads)
143  LP->setSectionID(MBBSectionID::ColdSectionID);
144  }
145 
146  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
147  return X.getSectionID().Type < Y.getSectionID().Type;
148  };
150 
151  return true;
152 }
153 
154 void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
158 }
159 
161 INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
162  "Split machine functions using profile information", false,
163  false)
164 
166  return new MachineFunctionSplitter();
167 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::sortBasicBlocksAndUpdateBranches
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
Definition: BasicBlockSections.cpp:251
llvm::createMachineFunctionSplitterPass
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
llvm::MachineBlockFrequencyInfo::getBlockProfileCount
Optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
Definition: MachineBlockFrequencyInfo.cpp:234
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::MachineBasicBlock::isEntryBlock
bool isEntryBlock() const
Returns true if this is the entry block of the function.
Definition: MachineBasicBlock.cpp:286
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::Optional< uint64_t >
INITIALIZE_PASS
INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", "Split machine functions using profile information", false, false) MachineFunctionPass *llvm
Definition: MachineFunctionSplitter.cpp:161
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:312
llvm::MachineBasicBlock::setSectionID
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
Definition: MachineBasicBlock.h:606
CommandLine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::MachineBlockFrequencyInfo
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Definition: MachineBlockFrequencyInfo.h:33
llvm::initializeMachineFunctionSplitterPass
void initializeMachineFunctionSplitterPass(PassRegistry &)
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
ColdCountThreshold
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
llvm::Function::getSectionPrefix
Optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:2032
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:187
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Passes.h
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:625
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:103
PercentileCutoff
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
ProfileSummaryInfo.h
llvm::MachineModuleInfoWrapperPass
Definition: MachineModuleInfo.h:215
llvm::BasicBlockSection::Preset
@ Preset
BasicBlockSectionUtils.h
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:193
MachineFunctionPass.h
llvm::Optional::getValue
constexpr const T & getValue() const &
Definition: Optional.h:306
isColdBlock
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
Definition: MachineFunctionSplitter.cpp:79
MachineModuleInfo.h
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:549
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::MBBSectionID::ColdSectionID
const static MBBSectionID ColdSectionID
Definition: MachineBasicBlock.h:63
Function.h
llvm::Function::hasProfileData
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:290
SmallVector.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:405
MachineFunction.h
llvm::MachineFunction::setBBSectionsType
void setBBSectionsType(BasicBlockSection V)
Definition: MachineFunction.h:645
InitializePasses.h
MachineBlockFrequencyInfo.h
llvm::MachineFunction::RenumberBlocks
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
Definition: MachineFunction.cpp:319
llvm::ProfileSummaryInfo::isColdCountNthPercentile
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
Definition: ProfileSummaryInfo.cpp:312
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38