LLVM  14.0.0git
IROutliner.h
Go to the documentation of this file.
1 //===- IROutliner.h - Extract similar IR regions into functions ------------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // The interface file for the IROutliner which is used by the IROutliner Pass.
11 //
12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
13 // of code. It evaluates each set of IRSimilarityCandidates with an estimate of
14 // whether it will provide code size reduction. Each region is extracted using
15 // the code extractor. These extracted functions are consolidated into a single
16 // function and called from the extracted call site.
17 //
18 // For example:
19 // \code
20 // %1 = add i32 %a, %b
21 // %2 = add i32 %b, %a
22 // %3 = add i32 %b, %a
23 // %4 = add i32 %a, %b
24 // \endcode
25 // would become function
26 // \code
27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
28 // %1 = add i32 %0, %1
29 // %2 = add i32 %1, %0
30 // ret void
31 // }
32 // \endcode
33 // with calls:
34 // \code
35 // call void outlined_ir_function(i32 %a, i32 %b)
36 // call void outlined_ir_function(i32 %b, i32 %a)
37 // \endcode
38 //
39 //===----------------------------------------------------------------------===//
40 
41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
43 
45 #include "llvm/IR/PassManager.h"
46 #include "llvm/IR/ValueMap.h"
49 #include <set>
50 
51 struct OutlinableGroup;
52 
53 namespace llvm {
54 using namespace IRSimilarity;
55 
56 class Module;
57 class TargetTransformInfo;
58 class OptimizationRemarkEmitter;
59 
60 /// The OutlinableRegion holds all the information for a specific region, or
61 /// sequence of instructions. This includes what values need to be hoisted to
62 /// arguments from the extracted function, inputs and outputs to the region, and
63 /// mapping from the extracted function arguments to overall function arguments.
65  /// Describes the region of code.
66  IRSimilarityCandidate *Candidate = nullptr;
67 
68  /// If this region is outlined, the front and back IRInstructionData could
69  /// potentially become invalidated if the only new instruction is a call.
70  /// This ensures that we replace in the instruction in the IRInstructionData.
71  IRInstructionData *NewFront = nullptr;
72  IRInstructionData *NewBack = nullptr;
73 
74  /// The number of extracted inputs from the CodeExtractor.
75  unsigned NumExtractedInputs = 0;
76 
77  /// The corresponding BasicBlock with the appropriate stores for this
78  /// OutlinableRegion in the overall function.
79  unsigned OutputBlockNum = -1;
80 
81  /// Mapping the extracted argument number to the argument number in the
82  /// overall function. Since there will be inputs, such as elevated constants
83  /// that are not the same in each region in a SimilarityGroup, or values that
84  /// cannot be sunk into the extracted section in every region, we must keep
85  /// track of which extracted argument maps to which overall argument.
88 
89  /// Marks whether we need to change the order of the arguments when mapping
90  /// the old extracted function call to the new aggregate outlined function
91  /// call.
92  bool ChangedArgOrder = false;
93 
94  /// Marks whether this region ends in a branch, there is special handling
95  /// required for the following basic blocks in this case.
96  bool EndsInBranch = false;
97 
98  /// Mapping of the argument number in the deduplicated function
99  /// to a given constant, which is used when creating the arguments to the call
100  /// to the newly created deduplicated function. This is handled separately
101  /// since the CodeExtractor does not recognize constants.
103 
104  /// The global value numbers that are used as outputs for this section. Once
105  /// extracted, each output will be stored to an output register. This
106  /// documents the global value numbers that are used in this pattern.
108 
109  /// Used to create an outlined function.
110  CodeExtractor *CE = nullptr;
111 
112  /// The call site of the extracted region.
113  CallInst *Call = nullptr;
114 
115  /// The function for the extracted region.
116  Function *ExtractedFunction = nullptr;
117 
118  /// Flag for whether we have split out the IRSimilarityCanidate. That is,
119  /// make the region contained the IRSimilarityCandidate its own BasicBlock.
120  bool CandidateSplit = false;
121 
122  /// Flag for whether we should not consider this region for extraction.
123  bool IgnoreRegion = false;
124 
125  /// The BasicBlock that is before the start of the region BasicBlock,
126  /// only defined when the region has been split.
127  BasicBlock *PrevBB = nullptr;
128 
129  /// The BasicBlock that contains the starting instruction of the region.
130  BasicBlock *StartBB = nullptr;
131 
132  /// The BasicBlock that contains the ending instruction of the region.
133  BasicBlock *EndBB = nullptr;
134 
135  /// The BasicBlock that is after the start of the region BasicBlock,
136  /// only defined when the region has been split.
137  BasicBlock *FollowBB = nullptr;
138 
139  /// The Outlinable Group that contains this region and structurally similar
140  /// regions to this region.
141  OutlinableGroup *Parent = nullptr;
142 
144  : Candidate(&C), Parent(&Group) {
145  StartBB = C.getStartBB();
146  EndBB = C.getEndBB();
147  }
148 
149  /// For the contained region, split the parent BasicBlock at the starting and
150  /// ending instructions of the contained IRSimilarityCandidate.
151  void splitCandidate();
152 
153  /// For the contained region, reattach the BasicBlock at the starting and
154  /// ending instructions of the contained IRSimilarityCandidate, or if the
155  /// function has been extracted, the start and end of the BasicBlock
156  /// containing the called function.
157  void reattachCandidate();
158 
159  /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
160  ///
161  /// \param Other [in] - The OutlinableRegion to find the corresponding Value
162  /// in.
163  /// \param V [in] - The Value to look for in the other region.
164  /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
165  Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
166 
167  /// Get the size of the code removed from the region.
168  ///
169  /// \param [in] TTI - The TargetTransformInfo for the parent function.
170  /// \returns the code size of the region
172 };
173 
174 /// This class is a pass that identifies similarity in a Module, extracts
175 /// instances of the similarity, and then consolidating the similar regions
176 /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
177 /// to identify the similar regions of code, and then extracts the similar
178 /// sections into a single function. See the above for an example as to
179 /// how code is extracted and consolidated into a single function.
180 class IROutliner {
181 public:
185  : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
186  bool run(Module &M);
187 
188 private:
189  /// Find repeated similar code sequences in \p M and outline them into new
190  /// Functions.
191  ///
192  /// \param [in] M - The module to outline from.
193  /// \returns The number of Functions created.
194  unsigned doOutline(Module &M);
195 
196  /// Check whether an OutlinableRegion is incompatible with code already
197  /// outlined. OutlinableRegions are incomptaible when there are overlapping
198  /// instructions, or code that has not been recorded has been added to the
199  /// instructions.
200  ///
201  /// \param [in] Region - The OutlinableRegion to check for conflicts with
202  /// already outlined code.
203  /// \returns whether the region can safely be outlined.
204  bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
205 
206  /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
207  /// instructions contained in a previously outlined region and put the
208  /// remaining regions in \p CurrentGroup.
209  ///
210  /// \param [in] CandidateVec - List of similarity candidates for regions with
211  /// the same similarity structure.
212  /// \param [in,out] CurrentGroup - Contains the potential sections to
213  /// be outlined.
214  void
215  pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
216  OutlinableGroup &CurrentGroup);
217 
218  /// Create the function based on the overall types found in the current
219  /// regions being outlined.
220  ///
221  /// \param M - The module to outline from.
222  /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
223  /// \param [in] FunctionNameSuffix - How many functions have we previously
224  /// created.
225  /// \returns the newly created function.
226  Function *createFunction(Module &M, OutlinableGroup &CG,
227  unsigned FunctionNameSuffix);
228 
229  /// Identify the needed extracted inputs in a section, and add to the overall
230  /// function if needed.
231  ///
232  /// \param [in] M - The module to outline from.
233  /// \param [in,out] Region - The region to be extracted.
234  /// \param [in] NotSame - The global value numbers of the Values in the region
235  /// that do not have the same Constant in each strucutrally similar region.
236  void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
237  DenseSet<unsigned> &NotSame);
238 
239  /// Find the number of instructions that will be removed by extracting the
240  /// OutlinableRegions in \p CurrentGroup.
241  ///
242  /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
243  /// analyzed.
244  /// \returns the number of outlined instructions across all regions.
245  InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
246 
247  /// Find the number of instructions that will be added by reloading arguments.
248  ///
249  /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
250  /// analyzed.
251  /// \returns the number of added reload instructions across all regions.
252  InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
253 
254  /// Find the cost and the benefit of \p CurrentGroup and save it back to
255  /// \p CurrentGroup.
256  ///
257  /// \param [in] M - The module being analyzed
258  /// \param [in,out] CurrentGroup - The overall outlined section
259  void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
260 
261  /// Update the output mapping based on the load instruction, and the outputs
262  /// of the extracted function.
263  ///
264  /// \param Region - The region extracted
265  /// \param Outputs - The outputs from the extracted function.
266  /// \param LI - The load instruction used to update the mapping.
267  void updateOutputMapping(OutlinableRegion &Region,
268  ArrayRef<Value *> Outputs, LoadInst *LI);
269 
270  /// Extract \p Region into its own function.
271  ///
272  /// \param [in] Region - The region to be extracted into its own function.
273  /// \returns True if it was successfully outlined.
274  bool extractSection(OutlinableRegion &Region);
275 
276  /// For the similarities found, and the extracted sections, create a single
277  /// outlined function with appropriate output blocks as necessary.
278  ///
279  /// \param [in] M - The module to outline from
280  /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
281  /// \param [in,out] FuncsToRemove - List of functions to remove from the
282  /// module after outlining is completed.
283  /// \param [in,out] OutlinedFunctionNum - the number of new outlined
284  /// functions.
285  void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
286  std::vector<Function *> &FuncsToRemove,
287  unsigned &OutlinedFunctionNum);
288 
289  /// If true, enables us to outline from functions that have LinkOnceFromODR
290  /// linkages.
291  bool OutlineFromLinkODRs = false;
292 
293  /// If false, we do not worry if the cost is greater than the benefit. This
294  /// is for debugging and testing, so that we can test small cases to ensure
295  /// that the outlining is being done correctly.
296  bool CostModel = true;
297 
298  /// The set of outlined Instructions, identified by their location in the
299  /// sequential ordering of instructions in a Module.
300  DenseSet<unsigned> Outlined;
301 
302  /// TargetTransformInfo lambda for target specific information.
304 
305  /// A mapping from newly created reloaded output values to the original value.
306  /// If an value is replace by an output from an outlined region, this maps
307  /// that Value, back to its original Value.
308  DenseMap<Value *, Value *> OutputMappings;
309 
310  /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
312 
313  /// The optimization remark emitter for the pass.
315 
316  /// The memory allocator used to allocate the CodeExtractors.
317  SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
318 
319  /// The memory allocator used to allocate the OutlinableRegions.
321 
322  /// The memory allocator used to allocate new IRInstructionData.
324 
325  /// Custom InstVisitor to classify different instructions for whether it can
326  /// be analyzed for similarity. This is needed as there may be instruction we
327  /// can identify as having similarity, but are more complicated to outline.
328  struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
329  InstructionAllowed() {}
330 
331  bool visitBranchInst(BranchInst &BI) {
332  return EnableBranches;
333  }
334  // TODO: Determine a scheme to resolve when the labels are similar enough.
335  bool visitPHINode(PHINode &PN) { return false; }
336  // TODO: Handle allocas.
337  bool visitAllocaInst(AllocaInst &AI) { return false; }
338  // VAArg instructions are not allowed since this could cause difficulty when
339  // differentiating between different sets of variable instructions in
340  // the deduplicated outlined regions.
341  bool visitVAArgInst(VAArgInst &VI) { return false; }
342  // We exclude all exception handling cases since they are so context
343  // dependent.
344  bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
345  bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
346  // DebugInfo should be included in the regions, but should not be
347  // analyzed for similarity as it has no bearing on the outcome of the
348  // program.
349  bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
350  // TODO: Handle specific intrinsics individually from those that can be
351  // handled.
352  bool IntrinsicInst(IntrinsicInst &II) { return false; }
353  // We only handle CallInsts that are not indirect, since we cannot guarantee
354  // that they have a name in these cases.
355  bool visitCallInst(CallInst &CI) {
356  Function *F = CI.getCalledFunction();
357  if (!F || CI.isIndirectCall() || !F->hasName())
358  return false;
359  return true;
360  }
361  // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
362  // the outlined region, and then returned as an output, this will have to be
363  // handled differently.
364  bool visitFreezeInst(FreezeInst &CI) { return false; }
365  // TODO: We do not current handle similarity that changes the control flow.
366  bool visitInvokeInst(InvokeInst &II) { return false; }
367  // TODO: We do not current handle similarity that changes the control flow.
368  bool visitCallBrInst(CallBrInst &CBI) { return false; }
369  // TODO: Handle interblock similarity.
370  bool visitTerminator(Instruction &I) { return false; }
371  bool visitInstruction(Instruction &I) { return true; }
372 
373  // The flag variable that marks whether we should allow branch instructions
374  // to be outlined.
375  bool EnableBranches = false;
376  };
377 
378  /// A InstVisitor used to exclude certain instructions from being outlined.
379  InstructionAllowed InstructionClassifier;
380 };
381 
382 /// Pass to outline similar regions.
383 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
384 public:
386 };
387 
388 } // end namespace llvm
389 
390 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::PassInfoMixin
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition: PassManager.h:374
llvm::Function
Definition: Function.h:62
llvm::SmallVector< unsigned, 4 >
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::SpecificBumpPtrAllocator
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition: Allocator.h:376
CodeExtractor.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::IROutliner::IROutliner
IROutliner(function_ref< TargetTransformInfo &(Function &)> GTTI, function_ref< IRSimilarityIdentifier &(Module &)> GIRSI, function_ref< OptimizationRemarkEmitter &(Function &)> GORE)
Definition: IROutliner.h:182
llvm::OutlinableRegion::OutlinableRegion
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition: IROutliner.h:143
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::DenseSet< unsigned >
VI
@ VI
Definition: SIInstrInfo.cpp:7703
llvm::DenseMap< unsigned, unsigned >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::OutlinableRegion::GVNStores
SmallVector< unsigned, 4 > GVNStores
The global value numbers that are used as outputs for this section.
Definition: IROutliner.h:107
llvm::IRSimilarity::IRInstructionData
This provides the utilities for hashing an Instruction to an unsigned integer.
Definition: IRSimilarityIdentifier.h:113
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
InstructionCost.h
OutlinableGroup
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:63
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::OutlinableRegion::ExtractedArgToAgg
DenseMap< unsigned, unsigned > ExtractedArgToAgg
Mapping the extracted argument number to the argument number in the overall function.
Definition: IROutliner.h:86
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:79
Module
Machine Check Debug Module
Definition: MachineCheckDebugify.cpp:122
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
ValueMap.h
llvm::IROutlinerPass
Pass to outline similar regions.
Definition: IROutliner.h:383
llvm::OutlinableRegion::AggArgToExtracted
DenseMap< unsigned, unsigned > AggArgToExtracted
Definition: IROutliner.h:87
llvm::OutlinableRegion
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:64
IRSimilarityIdentifier.h
PassManager.h
llvm::IRSimilarity::IRSimilarityCandidate
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
Definition: IRSimilarityIdentifier.h:549
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::IROutliner
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:180
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1469
llvm::IRSimilarity::IRSimilarityIdentifier
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Definition: IRSimilarityIdentifier.h:883
llvm::Region
Definition: RegionInfo.h:889
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::OutlinableRegion::AggArgToConstant
DenseMap< unsigned, Constant * > AggArgToConstant
Mapping of the argument number in the deduplicated function to a given constant, which is used when c...
Definition: IROutliner.h:102
llvm::codeview::PublicSymFlags::Function
@ Function