LLVM 23.0.0git
IROutliner.h
Go to the documentation of this file.
1//===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// The interface file for the IROutliner which is used by the IROutliner Pass.
11//
12// The outliner uses the IRSimilarityIdentifier to identify the similar regions
13// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
14// whether it will provide code size reduction. Each region is extracted using
15// the code extractor. These extracted functions are consolidated into a single
16// function and called from the extracted call site.
17//
18// For example:
19// \code
20// %1 = add i32 %a, %b
21// %2 = add i32 %b, %a
22// %3 = add i32 %b, %a
23// %4 = add i32 %a, %b
24// \endcode
25// would become function
26// \code
27// define internal void outlined_ir_function(i32 %0, i32 %1) {
28// %1 = add i32 %0, %1
29// %2 = add i32 %1, %0
30// ret void
31// }
32// \endcode
33// with calls:
34// \code
35// call void outlined_ir_function(i32 %a, i32 %b)
36// call void outlined_ir_function(i32 %b, i32 %a)
37// \endcode
38//
39//===----------------------------------------------------------------------===//
40
41#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43
45#include "llvm/IR/PassManager.h"
48
49struct OutlinableGroup;
50
51namespace llvm {
52using namespace CallingConv;
53using namespace IRSimilarity;
54
55class Module;
58
59/// The OutlinableRegion holds all the information for a specific region, or
60/// sequence of instructions. This includes what values need to be hoisted to
61/// arguments from the extracted function, inputs and outputs to the region, and
62/// mapping from the extracted function arguments to overall function arguments.
64 /// Describes the region of code.
66
67 /// If this region is outlined, the front and back IRInstructionData could
68 /// potentially become invalidated if the only new instruction is a call.
69 /// This ensures that we replace in the instruction in the IRInstructionData.
72
73 /// The number of extracted inputs from the CodeExtractor.
74 unsigned NumExtractedInputs = 0;
75
76 /// The corresponding BasicBlock with the appropriate stores for this
77 /// OutlinableRegion in the overall function.
78 unsigned OutputBlockNum = -1;
79
80 /// Mapping the extracted argument number to the argument number in the
81 /// overall function. Since there will be inputs, such as elevated constants
82 /// that are not the same in each region in a SimilarityGroup, or values that
83 /// cannot be sunk into the extracted section in every region, we must keep
84 /// track of which extracted argument maps to which overall argument.
87
88 /// Values in the outlined functions will often be replaced by arguments. When
89 /// finding corresponding values from one region to another, the found value
90 /// will be the value the argument previously replaced. This structure maps
91 /// any replaced values for the region to the aggregate aggregate argument
92 /// in the overall function.
94
95 /// Marks whether we need to change the order of the arguments when mapping
96 /// the old extracted function call to the new aggregate outlined function
97 /// call.
98 bool ChangedArgOrder = false;
99
100 /// Marks whether this region ends in a branch, there is special handling
101 /// required for the following basic blocks in this case.
102 bool EndsInBranch = false;
103
104 /// The PHIBlocks with their corresponding return block based on the return
105 /// value as the key.
107
108 /// Mapping of the argument number in the deduplicated function
109 /// to a given constant, which is used when creating the arguments to the call
110 /// to the newly created deduplicated function. This is handled separately
111 /// since the CodeExtractor does not recognize constants.
113
114 /// The global value numbers that are used as outputs for this section. Once
115 /// extracted, each output will be stored to an output register. This
116 /// documents the global value numbers that are used in this pattern.
118
119 /// Used to create an outlined function.
120 CodeExtractor *CE = nullptr;
121
122 /// The call site of the extracted region.
123 CallInst *Call = nullptr;
124
125 /// The function for the extracted region.
127
128 /// Flag for whether we have split out the IRSimilarityCanidate. That is,
129 /// make the region contained the IRSimilarityCandidate its own BasicBlock.
130 bool CandidateSplit = false;
131
132 /// Flag for whether we should not consider this region for extraction.
133 bool IgnoreRegion = false;
134
135 /// The BasicBlock that is before the start of the region BasicBlock,
136 /// only defined when the region has been split.
137 BasicBlock *PrevBB = nullptr;
138
139 /// The BasicBlock that contains the starting instruction of the region.
140 BasicBlock *StartBB = nullptr;
141
142 /// The BasicBlock that contains the ending instruction of the region.
143 BasicBlock *EndBB = nullptr;
144
145 /// The BasicBlock that is after the start of the region BasicBlock,
146 /// only defined when the region has been split.
148
149 /// The Outlinable Group that contains this region and structurally similar
150 /// regions to this region.
152
154 : Candidate(&C), Parent(&Group) {
155 StartBB = C.getStartBB();
156 EndBB = C.getEndBB();
157 }
158
159 /// For the contained region, split the parent BasicBlock at the starting and
160 /// ending instructions of the contained IRSimilarityCandidate.
162
163 /// For the contained region, reattach the BasicBlock at the starting and
164 /// ending instructions of the contained IRSimilarityCandidate, or if the
165 /// function has been extracted, the start and end of the BasicBlock
166 /// containing the called function.
168
169 /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
170 ///
171 /// \param Other [in] - The OutlinableRegion to find the corresponding Value
172 /// in.
173 /// \param V [in] - The Value to look for in the other region.
174 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
176 Value *V);
177
178 /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
179 ///
180 /// \param Other [in] - The OutlinableRegion to find the corresponding
181 /// BasicBlock in.
182 /// \param BB [in] - The BasicBlock to look for in the other region.
183 /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
185 BasicBlock *BB);
186
187 /// Get the size of the code removed from the region.
188 ///
189 /// \param [in] TTI - The TargetTransformInfo for the parent function.
190 /// \returns the code size of the region
192};
193
194/// This class is a pass that identifies similarity in a Module, extracts
195/// instances of the similarity, and then consolidating the similar regions
196/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
197/// to identify the similar regions of code, and then extracts the similar
198/// sections into a single function. See the above for an example as to
199/// how code is extracted and consolidated into a single function.
201public:
205 : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
206 LLVM_ABI bool run(Module &M);
207
208private:
209 /// Find repeated similar code sequences in \p M and outline them into new
210 /// Functions.
211 ///
212 /// \param [in] M - The module to outline from.
213 /// \returns The number of Functions created.
214 unsigned doOutline(Module &M);
215
216 /// Check whether an OutlinableRegion is incompatible with code already
217 /// outlined. OutlinableRegions are incomptaible when there are overlapping
218 /// instructions, or code that has not been recorded has been added to the
219 /// instructions.
220 ///
221 /// \param [in] Region - The OutlinableRegion to check for conflicts with
222 /// already outlined code.
223 /// \returns whether the region can safely be outlined.
224 bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
225
226 /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
227 /// instructions contained in a previously outlined region and put the
228 /// remaining regions in \p CurrentGroup.
229 ///
230 /// \param [in] CandidateVec - List of similarity candidates for regions with
231 /// the same similarity structure.
232 /// \param [in,out] CurrentGroup - Contains the potential sections to
233 /// be outlined.
234 void
235 pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
236 OutlinableGroup &CurrentGroup);
237
238 /// Create the function based on the overall types found in the current
239 /// regions being outlined.
240 ///
241 /// \param M - The module to outline from.
242 /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
243 /// \param [in] FunctionNameSuffix - How many functions have we previously
244 /// created.
245 /// \returns the newly created function.
246 Function *createFunction(Module &M, OutlinableGroup &CG,
247 unsigned FunctionNameSuffix);
248
249 /// Identify the needed extracted inputs in a section, and add to the overall
250 /// function if needed.
251 ///
252 /// \param [in] M - The module to outline from.
253 /// \param [in,out] Region - The region to be extracted.
254 /// \param [in] NotSame - The global value numbers of the Values in the region
255 /// that do not have the same Constant in each strucutrally similar region.
256 void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
257 DenseSet<unsigned> &NotSame);
258
259 /// Find the number of instructions that will be removed by extracting the
260 /// OutlinableRegions in \p CurrentGroup.
261 ///
262 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
263 /// analyzed.
264 /// \returns the number of outlined instructions across all regions.
265 InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
266
267 /// Find the number of instructions that will be added by reloading arguments.
268 ///
269 /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
270 /// analyzed.
271 /// \returns the number of added reload instructions across all regions.
272 InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
273
274 /// Find the cost and the benefit of \p CurrentGroup and save it back to
275 /// \p CurrentGroup.
276 ///
277 /// \param [in] M - The module being analyzed
278 /// \param [in,out] CurrentGroup - The overall outlined section
279 void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
280
281 /// Update the output mapping based on the load instruction, and the outputs
282 /// of the extracted function.
283 ///
284 /// \param Region - The region extracted
285 /// \param Outputs - The outputs from the extracted function.
286 /// \param LI - The load instruction used to update the mapping.
287 void updateOutputMapping(OutlinableRegion &Region,
288 ArrayRef<Value *> Outputs, LoadInst *LI);
289
290 /// Extract \p Region into its own function.
291 ///
292 /// \param [in] Region - The region to be extracted into its own function.
293 /// \returns True if it was successfully outlined.
294 bool extractSection(OutlinableRegion &Region);
295
296 /// For the similarities found, and the extracted sections, create a single
297 /// outlined function with appropriate output blocks as necessary.
298 ///
299 /// \param [in] M - The module to outline from
300 /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
301 /// \param [in,out] FuncsToRemove - List of functions to remove from the
302 /// module after outlining is completed.
303 /// \param [in,out] OutlinedFunctionNum - the number of new outlined
304 /// functions.
305 void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
306 std::vector<Function *> &FuncsToRemove,
307 unsigned &OutlinedFunctionNum);
308
309 /// Fill the new function that will serve as the replacement function for all
310 /// of the extracted regions of a certain structure from the first region in
311 /// the list of regions. Replace this first region's extracted function with
312 /// the new overall function.
313 ///
314 /// \param [in] M - The module we are outlining from.
315 /// \param [in] CurrentGroup - The group of regions to be outlined.
316 /// \param [in,out] OutputStoreBBs - The output blocks for each different
317 /// set of stores needed for the different functions.
318 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
319 /// once outlining is complete.
320 void fillOverallFunction(
321 Module &M, OutlinableGroup &CurrentGroup,
322 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
323 std::vector<Function *> &FuncsToRemove);
324
325 /// If true, enables us to outline from functions that have LinkOnceFromODR
326 /// linkages.
327 bool OutlineFromLinkODRs = false;
328
329 /// If false, we do not worry if the cost is greater than the benefit. This
330 /// is for debugging and testing, so that we can test small cases to ensure
331 /// that the outlining is being done correctly.
332 bool CostModel = true;
333
334 /// The set of outlined Instructions, identified by their location in the
335 /// sequential ordering of instructions in a Module.
336 DenseSet<unsigned> Outlined;
337
338 /// TargetTransformInfo lambda for target specific information.
340
341 /// A mapping from newly created reloaded output values to the original value.
342 /// If an value is replace by an output from an outlined region, this maps
343 /// that Value, back to its original Value.
344 DenseMap<Value *, Value *> OutputMappings;
345
346 /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
348
349 /// The optimization remark emitter for the pass.
351
352 /// The memory allocator used to allocate the CodeExtractors.
354
355 /// The memory allocator used to allocate the OutlinableRegions.
357
358 /// The memory allocator used to allocate new IRInstructionData.
360
361 /// Custom InstVisitor to classify different instructions for whether it can
362 /// be analyzed for similarity. This is needed as there may be instruction we
363 /// can identify as having similarity, but are more complicated to outline.
364 struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
365 InstructionAllowed() = default;
366
367 bool visitUncondBrInst(UncondBrInst &BI) { return EnableBranches; }
368 bool visitCondBrInst(CondBrInst &BI) { return EnableBranches; }
369 bool visitPHINode(PHINode &PN) { return EnableBranches; }
370 // TODO: Handle allocas.
371 bool visitAllocaInst(AllocaInst &AI) { return false; }
372 // VAArg instructions are not allowed since this could cause difficulty when
373 // differentiating between different sets of variable instructions in
374 // the deduplicated outlined regions.
375 bool visitVAArgInst(VAArgInst &VI) { return false; }
376 // We exclude all exception handling cases since they are so context
377 // dependent.
378 bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
379 bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
380 // DebugInfo should be included in the regions, but should not be
381 // analyzed for similarity as it has no bearing on the outcome of the
382 // program.
383 bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
384 // TODO: Handle specific intrinsics individually from those that can be
385 // handled.
386 bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
387 // We only handle CallInsts that are not indirect, since we cannot guarantee
388 // that they have a name in these cases.
389 bool visitCallInst(CallInst &CI) {
390 Function *F = CI.getCalledFunction();
391 bool IsIndirectCall = CI.isIndirectCall();
392 if (IsIndirectCall && !EnableIndirectCalls)
393 return false;
394 if (!F && !IsIndirectCall)
395 return false;
396 // Returning twice can cause issues with the state of the function call
397 // that were not expected when the function was used, so we do not include
398 // the call in outlined functions.
399 if (CI.canReturnTwice())
400 return false;
401 // TODO: Update the outliner to capture whether the outlined function
402 // needs these extra attributes.
403
404 // `nomerge` states that calls to this function should never be merged
405 // during optimisation. Outlining would have the effect of merging
406 // callsites from separate functions into a single callsite in the
407 // outlined function.
408 if (CI.hasFnAttr(Attribute::NoMerge))
409 return false;
410
411 // Functions marked with the swifttailcc and tailcc calling conventions
412 // require special handling when outlining musttail functions. The
413 // calling convention must be passed down to the outlined function as
414 // well. Further, there is special handling for musttail calls as well,
415 // requiring a return call directly after. For now, the outliner does not
416 // support this.
417 bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
418 CI.getCallingConv() == CallingConv::Tail;
419 if (IsTailCC && !EnableMustTailCalls)
420 return false;
421 if (CI.isMustTailCall() && !EnableMustTailCalls)
422 return false;
423 // The outliner can only handle musttail items if it is also accompanied
424 // by the tailcc or swifttailcc calling convention.
425 if (CI.isMustTailCall() && !IsTailCC)
426 return false;
427 return true;
428 }
429 // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
430 // the outlined region, and then returned as an output, this will have to be
431 // handled differently.
432 bool visitFreezeInst(FreezeInst &CI) { return false; }
433 // TODO: We do not current handle similarity that changes the control flow.
434 bool visitInvokeInst(InvokeInst &II) { return false; }
435 // TODO: We do not current handle similarity that changes the control flow.
436 bool visitCallBrInst(CallBrInst &CBI) { return false; }
437 // TODO: Handle interblock similarity.
438 bool visitTerminator(Instruction &I) { return false; }
439 bool visitInstruction(Instruction &I) { return true; }
440
441 // The flag variable that marks whether we should allow branch instructions
442 // to be outlined.
443 bool EnableBranches = false;
444
445 // The flag variable that marks whether we should allow indirect calls
446 // to be outlined.
447 bool EnableIndirectCalls = true;
448
449 // The flag variable that marks whether we should allow intrinsics
450 // instructions to be outlined.
451 bool EnableIntrinsics = false;
452
453 // The flag variable that marks whether we should allow musttail calls.
454 bool EnableMustTailCalls = false;
455 };
456
457 /// A InstVisitor used to exclude certain instructions from being outlined.
458 InstructionAllowed InstructionClassifier;
459};
460
461/// Pass to outline similar regions.
462class IROutlinerPass : public OptionalPassInfoMixin<IROutlinerPass> {
463public:
465};
466
467} // end namespace llvm
468
469#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
#define LLVM_ABI
Definition Compiler.h:213
static bool IsIndirectCall(const MachineInstr *MI)
This header defines various interfaces for pass management in LLVM.
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
Utility class for extracting code into a new function.
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Pass to outline similar regions.
Definition IROutliner.h:462
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
IROutliner(function_ref< TargetTransformInfo &(Function &)> GTTI, function_ref< IRSimilarityIdentifier &(Module &)> GIRSI, function_ref< OptimizationRemarkEmitter &(Function &)> GORE)
Definition IROutliner.h:202
LLVM_ABI bool run(Module &M)
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Base class for instruction visitors.
Definition InstVisitor.h:78
An instruction for reading from memory.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Unconditional Branch instruction.
LLVM Value Representation.
Definition Value.h:75
An efficient, type-erasing, non-owning reference to a callable.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
This provides the utilities for hashing an Instruction to an unsigned integer.
A CRTP mix-in for passes that can be skipped.
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition IROutliner.h:123
CodeExtractor * CE
Used to create an outlined function.
Definition IROutliner.h:120
LLVM_ABI InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
DenseMap< unsigned, Constant * > AggArgToConstant
Mapping of the argument number in the deduplicated function to a given constant, which is used when c...
Definition IROutliner.h:112
DenseMap< unsigned, unsigned > AggArgToExtracted
Definition IROutliner.h:86
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition IROutliner.h:78
IRInstructionData * NewFront
If this region is outlined, the front and back IRInstructionData could potentially become invalidated...
Definition IROutliner.h:70
SmallVector< unsigned, 4 > GVNStores
The global value numbers that are used as outputs for this section.
Definition IROutliner.h:117
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition IROutliner.h:130
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition IROutliner.h:133
bool ChangedArgOrder
Marks whether we need to change the order of the arguments when mapping the old extracted function ca...
Definition IROutliner.h:98
LLVM_ABI void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
LLVM_ABI Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
DenseMap< unsigned, unsigned > ExtractedArgToAgg
Mapping the extracted argument number to the argument number in the overall function.
Definition IROutliner.h:85
LLVM_ABI BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition IROutliner.h:65
unsigned NumExtractedInputs
The number of extracted inputs from the CodeExtractor.
Definition IROutliner.h:74
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition IROutliner.h:106
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition IROutliner.h:93
OutlinableGroup * Parent
The Outlinable Group that contains this region and structurally similar regions to this region.
Definition IROutliner.h:151
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition IROutliner.h:153
Function * ExtractedFunction
The function for the extracted region.
Definition IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition IROutliner.h:140
IRInstructionData * NewBack
Definition IROutliner.h:71
LLVM_ABI void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...