LLVM 17.0.0git
MachineFunctionSplitter.cpp
Go to the documentation of this file.
1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
35#include "llvm/CodeGen/Passes.h"
36#include "llvm/IR/Function.h"
39#include <optional>
40
41using namespace llvm;
42
43// FIXME: This cutoff value is CPU dependent and should be moved to
44// TargetTransformInfo once we consider enabling this on other platforms.
45// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
46// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
47// The default was empirically determined to be optimal when considering cutoff
48// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
49// Intel CPUs.
51 PercentileCutoff("mfs-psi-cutoff",
52 cl::desc("Percentile profile summary cutoff used to "
53 "determine cold blocks. Unused if set to zero."),
54 cl::init(999950), cl::Hidden);
55
57 "mfs-count-threshold",
59 "Minimum number of times a block must be executed to be retained."),
61
63 "mfs-split-ehcode",
64 cl::desc("Splits all EH code and it's descendants by default."),
65 cl::init(false), cl::Hidden);
66
67namespace {
68
69class MachineFunctionSplitter : public MachineFunctionPass {
70public:
71 static char ID;
72 MachineFunctionSplitter() : MachineFunctionPass(ID) {
74 }
75
76 StringRef getPassName() const override {
77 return "Machine Function Splitter Transformation";
78 }
79
80 void getAnalysisUsage(AnalysisUsage &AU) const override;
81
83};
84} // end anonymous namespace
85
86/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
87/// only by EH pad as cold. This will help mark EH pads statically cold
88/// instead of relying on profile data.
91 computeEHOnlyBlocks(MF, EHBlocks);
92 for (auto Block : EHBlocks) {
93 Block->setSectionID(MBBSectionID::ColdSectionID);
94 }
95}
96
98 const MachineBlockFrequencyInfo *MBFI,
99 ProfileSummaryInfo *PSI) {
100 std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
101 if (!Count)
102 return true;
103
104 if (PercentileCutoff > 0) {
105 return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
106 }
107 return (*Count < ColdCountThreshold);
108}
109
110bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
111 // We target functions with profile data. Static information in the form
112 // of exception handling code may be split to cold if user passes the
113 // mfs-split-ehcode flag.
114 bool UseProfileData = MF.getFunction().hasProfileData();
115 if (!UseProfileData && !SplitAllEHCode)
116 return false;
117
118 // TODO: We don't split functions where a section attribute has been set
119 // since the split part may not be placed in a contiguous region. It may also
120 // be more beneficial to augment the linker to ensure contiguous layout of
121 // split functions within the same section as specified by the attribute.
122 if (MF.getFunction().hasSection() ||
123 MF.getFunction().hasFnAttribute("implicit-section-name"))
124 return false;
125
126 // We don't want to proceed further for cold functions
127 // or functions of unknown hotness. Lukewarm functions have no prefix.
128 std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
129 if (SectionPrefix &&
130 (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
131 return false;
132 }
133
134 // Renumbering blocks here preserves the order of the blocks as
135 // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
136 // blocks. Preserving the order of blocks is essential to retaining decisions
137 // made by prior passes such as MachineBlockPlacement.
138 MF.RenumberBlocks();
139 MF.setBBSectionsType(BasicBlockSection::Preset);
140
141 MachineBlockFrequencyInfo *MBFI = nullptr;
142 ProfileSummaryInfo *PSI = nullptr;
143 if (UseProfileData) {
144 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
145 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
146 }
147
149 for (auto &MBB : MF) {
150 if (MBB.isEntryBlock())
151 continue;
152
153 if (MBB.isEHPad())
154 LandingPads.push_back(&MBB);
155 else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
157 }
158
159 // Split all EH code and it's descendant statically by default.
160 if (SplitAllEHCode)
162 // We only split out eh pads if all of them are cold.
163 else {
164 bool HasHotLandingPads = false;
165 for (const MachineBasicBlock *LP : LandingPads) {
166 if (!isColdBlock(*LP, MBFI, PSI))
167 HasHotLandingPads = true;
168 }
169 if (!HasHotLandingPads) {
170 for (MachineBasicBlock *LP : LandingPads)
171 LP->setSectionID(MBBSectionID::ColdSectionID);
172 }
173 }
174 auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
175 return X.getSectionID().Type < Y.getSectionID().Type;
176 };
179 return true;
180}
181
182void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
186}
187
188char MachineFunctionSplitter::ID = 0;
189INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
190 "Split machine functions using profile information", false,
191 false)
192
194 return new MachineFunctionSplitter();
195}
MachineBasicBlock & MBB
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define F(x, y, z)
Definition: MD5.cpp:55
static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI)
static cl::opt< unsigned > ColdCountThreshold("mfs-count-threshold", cl::desc("Minimum number of times a block must be executed to be retained."), cl::init(1), cl::Hidden)
static void setDescendantEHBlocksCold(MachineFunction &MF)
setDescendantEHBlocksCold - This splits all EH pads and blocks reachable only by EH pad as cold.
static cl::opt< unsigned > PercentileCutoff("mfs-psi-cutoff", cl::desc("Percentile profile summary cutoff used to " "determine cold blocks. Unused if set to zero."), cl::init(999950), cl::Hidden)
static cl::opt< bool > SplitAllEHCode("mfs-split-ehcode", cl::desc("Splits all EH code and it's descendants by default."), cl::init(false), cl::Hidden)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
std::optional< StringRef > getSectionPrefix() const
Get the section prefix for this function.
Definition: Function.cpp:2125
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:289
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:644
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:109
bool isEHPad() const
Returns true if the block is a landing pad.
bool isEntryBlock() const
Returns true if this is the entry block of the function.
void setSectionID(MBBSectionID V)
Sets the section ID for this basic block.
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
std::optional< uint64_t > getBlockProfileCount(const MachineBasicBlock *MBB) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void setBBSectionsType(BasicBlockSection V)
Function & getFunction()
Return the LLVM function that this machine code represents.
void RenumberBlocks(MachineBasicBlock *MBBFrom=nullptr)
RenumberBlocks - This discards all of the MachineBasicBlock numbers and recomputes them.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered cold with regard to a given cold percentile cutoff value.
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMachineFunctionSplitterPass(PassRegistry &)
static void computeEHOnlyBlocks(FunctionT &F, DenseSet< BlockT * > &EHBlocks)
Compute a list of blocks that are only reachable via EH paths.
Definition: EHUtils.h:18
MachineFunctionPass * createMachineFunctionSplitterPass()
createMachineFunctionSplitterPass - This pass splits machine functions using profile information.
void avoidZeroOffsetLandingPad(MachineFunction &MF)
void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp)
static const MBBSectionID ColdSectionID