LLVM 17.0.0git
BlockExtractor.cpp
Go to the documentation of this file.
1//===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass extracts the specified basic blocks from the module into their
10// own functions.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/Statistic.h"
18#include "llvm/IR/Module.h"
19#include "llvm/IR/PassManager.h"
21#include "llvm/Pass.h"
23#include "llvm/Support/Debug.h"
25#include "llvm/Transforms/IPO.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "block-extractor"
32
33STATISTIC(NumExtracted, "Number of basic blocks extracted");
34
36 "extract-blocks-file", cl::value_desc("filename"),
37 cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
38
39static cl::opt<bool>
40 BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
41 cl::desc("Erase the existing functions"),
43namespace {
44class BlockExtractor {
45public:
46 BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
47 bool runOnModule(Module &M);
48 void
49 init(const std::vector<std::vector<BasicBlock *>> &GroupsOfBlocksToExtract) {
50 GroupsOfBlocks = GroupsOfBlocksToExtract;
51 if (!BlockExtractorFile.empty())
52 loadFile();
53 }
54
55private:
56 std::vector<std::vector<BasicBlock *>> GroupsOfBlocks;
57 bool EraseFunctions;
58 /// Map a function name to groups of blocks.
60 BlocksByName;
61
62 void loadFile();
63 void splitLandingPadPreds(Function &F);
64};
65
66} // end anonymous namespace
67
68/// Gets all of the blocks specified in the input file.
69void BlockExtractor::loadFile() {
71 if (ErrOrBuf.getError())
72 report_fatal_error("BlockExtractor couldn't load the file.");
73 // Read the file.
74 auto &Buf = *ErrOrBuf;
76 Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
77 /*KeepEmpty=*/false);
78 for (const auto &Line : Lines) {
80 Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
81 /*KeepEmpty=*/false);
82 if (LineSplit.empty())
83 continue;
84 if (LineSplit.size()!=2)
85 report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
86 /*GenCrashDiag=*/false);
88 LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
89 /*KeepEmpty=*/false);
90 if (BBNames.empty())
91 report_fatal_error("Missing bbs name");
92 BlocksByName.push_back(
93 {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
94 }
95}
96
97/// Extracts the landing pads to make sure all of them have only one
98/// predecessor.
99void BlockExtractor::splitLandingPadPreds(Function &F) {
100 for (BasicBlock &BB : F) {
101 for (Instruction &I : BB) {
102 if (!isa<InvokeInst>(&I))
103 continue;
104 InvokeInst *II = cast<InvokeInst>(&I);
105 BasicBlock *Parent = II->getParent();
106 BasicBlock *LPad = II->getUnwindDest();
107
108 // Look through the landing pad's predecessors. If one of them ends in an
109 // 'invoke', then we want to split the landing pad.
110 bool Split = false;
111 for (auto *PredBB : predecessors(LPad)) {
112 if (PredBB->isLandingPad() && PredBB != Parent &&
113 isa<InvokeInst>(Parent->getTerminator())) {
114 Split = true;
115 break;
116 }
117 }
118
119 if (!Split)
120 continue;
121
123 SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
124 }
125 }
126}
127
128bool BlockExtractor::runOnModule(Module &M) {
129 bool Changed = false;
130
131 // Get all the functions.
133 for (Function &F : M) {
134 splitLandingPadPreds(F);
135 Functions.push_back(&F);
136 }
137
138 // Get all the blocks specified in the input file.
139 unsigned NextGroupIdx = GroupsOfBlocks.size();
140 GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
141 for (const auto &BInfo : BlocksByName) {
142 Function *F = M.getFunction(BInfo.first);
143 if (!F)
144 report_fatal_error("Invalid function name specified in the input file",
145 /*GenCrashDiag=*/false);
146 for (const auto &BBInfo : BInfo.second) {
147 auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
148 return BB.getName().equals(BBInfo);
149 });
150 if (Res == F->end())
151 report_fatal_error("Invalid block name specified in the input file",
152 /*GenCrashDiag=*/false);
153 GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
154 }
155 ++NextGroupIdx;
156 }
157
158 // Extract each group of basic blocks.
159 for (auto &BBs : GroupsOfBlocks) {
160 SmallVector<BasicBlock *, 32> BlocksToExtractVec;
161 for (BasicBlock *BB : BBs) {
162 // Check if the module contains BB.
163 if (BB->getParent()->getParent() != &M)
164 report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
165 LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
166 << BB->getParent()->getName() << ":" << BB->getName()
167 << "\n");
168 BlocksToExtractVec.push_back(BB);
169 if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
170 BlocksToExtractVec.push_back(II->getUnwindDest());
171 ++NumExtracted;
172 Changed = true;
173 }
174 CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
175 Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
176 if (F)
177 LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
178 << "' in: " << F->getName() << '\n');
179 else
180 LLVM_DEBUG(dbgs() << "Failed to extract for group '"
181 << (*BBs.begin())->getName() << "'\n");
182 }
183
184 // Erase the functions.
185 if (EraseFunctions || BlockExtractorEraseFuncs) {
186 for (Function *F : Functions) {
187 LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
188 << "\n");
189 F->deleteBody();
190 }
191 // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
192 for (Function &F : M)
194 Changed = true;
195 }
196
197 return Changed;
198}
199
201 std::vector<std::vector<BasicBlock *>> &&GroupsOfBlocks,
202 bool EraseFunctions)
203 : GroupsOfBlocks(GroupsOfBlocks), EraseFunctions(EraseFunctions) {}
204
207 BlockExtractor BE(EraseFunctions);
208 BE.init(GroupsOfBlocks);
209 return BE.runOnModule(M) ? PreservedAnalyses::none()
211}
static const Function * getParent(const Value *V)
static cl::opt< std::string > BlockExtractorFile("extract-blocks-file", cl::value_desc("filename"), cl::desc("A file containing list of basic blocks to extract"), cl::Hidden)
static cl::opt< bool > BlockExtractorEraseFuncs("extract-blocks-erase-funcs", cl::desc("Erase the existing functions"), cl::Hidden)
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static std::unique_ptr< Module > loadFile(const std::string &FileName, LLVMContext &Context)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:48
const BasicBlock * getParent() const
Definition: Instruction.h:90
Invoke instruction.
BasicBlock * getUnwindDest() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
auto predecessors(const MachineBasicBlock *BB)
BlockExtractorPass(std::vector< std::vector< BasicBlock * > > &&GroupsOfBlocks, bool EraseFunctions)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)