LLVM  13.0.0git
IROutliner.h
Go to the documentation of this file.
1 //===- IROutliner.h - Extract similar IR regions into functions ------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The interface file for the IROutliner which is used by the IROutliner Pass.
11 //
12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
13 // of code. It evaluates each set of IRSimilarityCandidates with an estimate of
14 // whether it will provide code size reduction. Each region is extracted using
15 // the code extractor. These extracted functions are consolidated into a single
16 // function and called from the extracted call site.
17 //
18 // For example:
19 // \code
20 // %1 = add i32 %a, %b
21 // %2 = add i32 %b, %a
22 // %3 = add i32 %b, %a
23 // %4 = add i32 %a, %b
24 // \endcode
25 // would become function
26 // \code
27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
28 // %1 = add i32 %0, %1
29 // %2 = add i32 %1, %0
30 // ret void
31 // }
32 // \endcode
33 // with calls:
34 // \code
35 // call void outlined_ir_function(i32 %a, i32 %b)
36 // call void outlined_ir_function(i32 %b, i32 %a)
37 // \endcode
38 //
39 //===----------------------------------------------------------------------===//
40 
41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43 
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/ValueMap.h"
49 #include <set>
50 
51 struct OutlinableGroup;
52 
53 namespace llvm {
54 using namespace IRSimilarity;
55 
56 class Module;
57 class TargetTransformInfo;
58 class OptimizationRemarkEmitter;
59 
60 /// The OutlinableRegion holds all the information for a specific region, or
61 /// sequence of instructions. This includes what values need to be hoisted to
62 /// arguments from the extracted function, inputs and outputs to the region, and
63 /// mapping from the extracted function arguments to overall function arguments.
65  /// Describes the region of code.
66  IRSimilarityCandidate *Candidate = nullptr;
67 
68  /// If this region is outlined, the front and back IRInstructionData could
69  /// potentially become invalidated if the only new instruction is a call.
70  /// This ensures that we replace in the instruction in the IRInstructionData.
71  IRInstructionData *NewFront = nullptr;
72  IRInstructionData *NewBack = nullptr;
73 
74  /// The number of extracted inputs from the CodeExtractor.
75  unsigned NumExtractedInputs = 0;
76 
77  /// The corresponding BasicBlock with the appropriate stores for this
78  /// OutlinableRegion in the overall function.
79  unsigned OutputBlockNum = -1;
80 
81  /// Mapping the extracted argument number to the argument number in the
82  /// overall function. Since there will be inputs, such as elevated constants
83  /// that are not the same in each region in a SimilarityGroup, or values that
84  /// cannot be sunk into the extracted section in every region, we must keep
85  /// track of which extracted argument maps to which overall argument.
88 
89  /// Mapping of the argument number in the deduplicated function
90  /// to a given constant, which is used when creating the arguments to the call
91  /// to the newly created deduplicated function. This is handled separately
92  /// since the CodeExtractor does not recognize constants.
94 
95  /// The global value numbers that are used as outputs for this section. Once
96  /// extracted, each output will be stored to an output register. This
97  /// documents the global value numbers that are used in this pattern.
99 
100  /// Used to create an outlined function.
101  CodeExtractor *CE = nullptr;
102 
103  /// The call site of the extracted region.
104  CallInst *Call = nullptr;
105 
106  /// The function for the extracted region.
107  Function *ExtractedFunction = nullptr;
108 
109  /// Flag for whether we have split out the IRSimilarityCanidate. That is,
110  /// make the region contained the IRSimilarityCandidate its own BasicBlock.
111  bool CandidateSplit = false;
112 
113  /// Flag for whether we should not consider this region for extraction.
114  bool IgnoreRegion = false;
115 
116  /// The BasicBlock that is before the start of the region BasicBlock,
117  /// only defined when the region has been split.
118  BasicBlock *PrevBB = nullptr;
119 
120  /// The BasicBlock that contains the starting instruction of the region.
121  BasicBlock *StartBB = nullptr;
122 
123  /// The BasicBlock that contains the ending instruction of the region.
124  BasicBlock *EndBB = nullptr;
125 
126  /// The BasicBlock that is after the start of the region BasicBlock,
127  /// only defined when the region has been split.
128  BasicBlock *FollowBB = nullptr;
129 
130  /// The Outlinable Group that contains this region and structurally similar
131  /// regions to this region.
132  OutlinableGroup *Parent = nullptr;
133 
135  : Candidate(&C), Parent(&Group) {
136  StartBB = C.getStartBB();
137  EndBB = C.getEndBB();
138  }
139 
140  /// For the contained region, split the parent BasicBlock at the starting and
141  /// ending instructions of the contained IRSimilarityCandidate.
142  void splitCandidate();
143 
144  /// For the contained region, reattach the BasicBlock at the starting and
145  /// ending instructions of the contained IRSimilarityCandidate, or if the
146  /// function has been extracted, the start and end of the BasicBlock
147  /// containing the called function.
148  void reattachCandidate();
149 
150  /// Get the size of the code removed from the region.
151  ///
152  /// \param [in] TTI - The TargetTransformInfo for the parent function.
153  /// \returns the code size of the region
155 };
156 
157 /// This class is a pass that identifies similarity in a Module, extracts
158 /// instances of the similarity, and then consolidating the similar regions
159 /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
160 /// to identify the similar regions of code, and then extracts the similar
161 /// sections into a single function. See the above for an example as to
162 /// how code is extracted and consolidated into a single function.
163 class IROutliner {
164 public:
168  : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
169  bool run(Module &M);
170 
171 private:
172  /// Find repeated similar code sequences in \p M and outline them into new
173  /// Functions.
174  ///
175  /// \param [in] M - The module to outline from.
176  /// \returns The number of Functions created.
177  unsigned doOutline(Module &M);
178 
179  /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
180  /// instructions contained in a previously outlined region and put the
181  /// remaining regions in \p CurrentGroup.
182  ///
183  /// \param [in] CandidateVec - List of similarity candidates for regions with
184  /// the same similarity structure.
185  /// \param [in,out] CurrentGroup - Contains the potential sections to
186  /// be outlined.
187  void
188  pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
189  OutlinableGroup &CurrentGroup);
190 
191  /// Create the function based on the overall types found in the current
192  /// regions being outlined.
193  ///
194  /// \param M - The module to outline from.
195  /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
196  /// \param [in] FunctionNameSuffix - How many functions have we previously
197  /// created.
198  /// \returns the newly created function.
199  Function *createFunction(Module &M, OutlinableGroup &CG,
200  unsigned FunctionNameSuffix);
201 
202  /// Identify the needed extracted inputs in a section, and add to the overall
203  /// function if needed.
204  ///
205  /// \param [in] M - The module to outline from.
206  /// \param [in,out] Region - The region to be extracted.
207  /// \param [in] NotSame - The global value numbers of the Values in the region
208  /// that do not have the same Constant in each strucutrally similar region.
209  void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
210  DenseSet<unsigned> &NotSame);
211 
212  /// Find the number of instructions that will be removed by extracting the
213  /// OutlinableRegions in \p CurrentGroup.
214  ///
215  /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
216  /// analyzed.
217  /// \returns the number of outlined instructions across all regions.
218  InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
219 
220  /// Find the number of instructions that will be added by reloading arguments.
221  ///
222  /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
223  /// analyzed.
224  /// \returns the number of added reload instructions across all regions.
225  InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
226 
227  /// Find the cost and the benefit of \p CurrentGroup and save it back to
228  /// \p CurrentGroup.
229  ///
230  /// \param [in] M - The module being analyzed
231  /// \param [in,out] CurrentGroup - The overall outlined section
232  void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
233 
234  /// Update the output mapping based on the load instruction, and the outputs
235  /// of the extracted function.
236  ///
237  /// \param Region - The region extracted
238  /// \param Outputs - The outputs from the extracted function.
239  /// \param LI - The load instruction used to update the mapping.
240  void updateOutputMapping(OutlinableRegion &Region,
241  ArrayRef<Value *> Outputs, LoadInst *LI);
242 
243  /// Extract \p Region into its own function.
244  ///
245  /// \param [in] Region - The region to be extracted into its own function.
246  /// \returns True if it was successfully outlined.
247  bool extractSection(OutlinableRegion &Region);
248 
249  /// For the similarities found, and the extracted sections, create a single
250  /// outlined function with appropriate output blocks as necessary.
251  ///
252  /// \param [in] M - The module to outline from
253  /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
254  /// \param [in,out] FuncsToRemove - List of functions to remove from the
255  /// module after outlining is completed.
256  /// \param [in,out] OutlinedFunctionNum - the number of new outlined
257  /// functions.
258  void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
259  std::vector<Function *> &FuncsToRemove,
260  unsigned &OutlinedFunctionNum);
261 
262  /// If true, enables us to outline from functions that have LinkOnceFromODR
263  /// linkages.
264  bool OutlineFromLinkODRs = false;
265 
266  /// If false, we do not worry if the cost is greater than the benefit. This
267  /// is for debugging and testing, so that we can test small cases to ensure
268  /// that the outlining is being done correctly.
269  bool CostModel = true;
270 
271  /// The set of outlined Instructions, identified by their location in the
272  /// sequential ordering of instructions in a Module.
273  DenseSet<unsigned> Outlined;
274 
275  /// TargetTransformInfo lambda for target specific information.
277 
278  /// A mapping from newly created reloaded output values to the original value.
279  /// If an value is replace by an output from an outlined region, this maps
280  /// that Value, back to its original Value.
281  DenseMap<Value *, Value *> OutputMappings;
282 
283  /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
285 
286  /// The optimization remark emitter for the pass.
288 
289  /// The memory allocator used to allocate the CodeExtractors.
290  SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
291 
292  /// The memory allocator used to allocate the OutlinableRegions.
294 
295  /// The memory allocator used to allocate new IRInstructionData.
297 
298  /// Custom InstVisitor to classify different instructions for whether it can
299  /// be analyzed for similarity. This is needed as there may be instruction we
300  /// can identify as having similarity, but are more complicated to outline.
301  struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
302  InstructionAllowed() {}
303 
304  // TODO: Determine a scheme to resolve when the label is similar enough.
305  bool visitBranchInst(BranchInst &BI) { return false; }
306  // TODO: Determine a scheme to resolve when the labels are similar enough.
307  bool visitPHINode(PHINode &PN) { return false; }
308  // TODO: Handle allocas.
309  bool visitAllocaInst(AllocaInst &AI) { return false; }
310  // VAArg instructions are not allowed since this could cause difficulty when
311  // differentiating between different sets of variable instructions in
312  // the deduplicated outlined regions.
313  bool visitVAArgInst(VAArgInst &VI) { return false; }
314  // We exclude all exception handling cases since they are so context
315  // dependent.
316  bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
317  bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
318  // DebugInfo should be included in the regions, but should not be
319  // analyzed for similarity as it has no bearing on the outcome of the
320  // program.
321  bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
322  // TODO: Handle specific intrinsics individually from those that can be
323  // handled.
324  bool IntrinsicInst(IntrinsicInst &II) { return false; }
325  // We only handle CallInsts that are not indirect, since we cannot guarantee
326  // that they have a name in these cases.
327  bool visitCallInst(CallInst &CI) {
328  Function *F = CI.getCalledFunction();
329  if (!F || CI.isIndirectCall() || !F->hasName())
330  return false;
331  return true;
332  }
333  // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
334  // the outlined region, and then returned as an output, this will have to be
335  // handled differently.
336  bool visitFreezeInst(FreezeInst &CI) { return false; }
337  // TODO: We do not current handle similarity that changes the control flow.
338  bool visitInvokeInst(InvokeInst &II) { return false; }
339  // TODO: We do not current handle similarity that changes the control flow.
340  bool visitCallBrInst(CallBrInst &CBI) { return false; }
341  // TODO: Handle interblock similarity.
342  bool visitTerminator(Instruction &I) { return false; }
343  bool visitInstruction(Instruction &I) { return true; }
344  };
345 
346  /// A InstVisitor used to exclude certain instructions from being outlined.
347  InstructionAllowed InstructionClassifier;
348 };
349 
350 /// Pass to outline similar regions.
351 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
352 public:
354 };
355 
356 } // end namespace llvm
357 
358 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:374
llvm::Function
Definition: Function.h:61
llvm::SmallVector< unsigned, 4 >
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::SpecificBumpPtrAllocator
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:376
CodeExtractor.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::IROutliner::IROutliner
IROutliner(function_ref< TargetTransformInfo &(Function &)> GTTI, function_ref< IRSimilarityIdentifier &(Module &)> GIRSI, function_ref< OptimizationRemarkEmitter &(Function &)> GORE)
Definition: IROutliner.h:165
llvm::OutlinableRegion::OutlinableRegion
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition: IROutliner.h:134
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::DenseSet< unsigned >
VI
@ VI
Definition: SIInstrInfo.cpp:7494
llvm::DenseMap< unsigned, unsigned >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::OutlinableRegion::GVNStores
SmallVector< unsigned, 4 > GVNStores
The global value numbers that are used as outputs for this section.
Definition: IROutliner.h:98
llvm::IRSimilarity::IRInstructionData
This provides the utilities for hashing an Instruction to an unsigned integer.
Definition: IRSimilarityIdentifier.h:113
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
InstructionCost.h
OutlinableGroup
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:55
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::OutlinableRegion::ExtractedArgToAgg
DenseMap< unsigned, unsigned > ExtractedArgToAgg
Mapping the extracted argument number to the argument number in the overall function.
Definition: IROutliner.h:86
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
Module
Machine Check Debug Module
Definition: MachineCheckDebugify.cpp:122
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
ValueMap.h
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:351
llvm::OutlinableRegion::AggArgToExtracted
DenseMap< unsigned, unsigned > AggArgToExtracted
Definition: IROutliner.h:87
llvm::OutlinableRegion
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:64
IRSimilarityIdentifier.h
PassManager.h
llvm::IRSimilarity::IRSimilarityCandidate
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
Definition: IRSimilarityIdentifier.h:470
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::IROutliner
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:163
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
llvm::IRSimilarity::IRSimilarityIdentifier
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Definition: IRSimilarityIdentifier.h:652
llvm::Region
Definition: RegionInfo.h:889
llvm::OutlinableRegion::AggArgToConstant
DenseMap< unsigned, Constant * > AggArgToConstant
Mapping of the argument number in the deduplicated function to a given constant, which is used when c...
Definition: IROutliner.h:93
llvm::codeview::PublicSymFlags::Function
@ Function