LLVM  14.0.0git
IROutliner.cpp
Go to the documentation of this file.
1 //===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 // Implementation for the IROutliner which is used by the IROutliner Pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
18 #include "llvm/IR/Attributes.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/InitializePasses.h"
25 #include "llvm/Pass.h"
27 #include "llvm/Transforms/IPO.h"
28 #include <map>
29 #include <set>
30 #include <vector>
31 
32 #define DEBUG_TYPE "iroutliner"
33 
34 using namespace llvm;
35 using namespace IRSimilarity;
36 
37 // A command flag to be used for debugging to exclude branches from similarity
38 // matching and outlining.
40 
41 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
42 // functions. This is false by default because the linker can dedupe linkonceodr
43 // functions. Since the outliner is confined to a single module (modulo LTO),
44 // this is off by default. It should, however, be the default behavior in
45 // LTO.
47  "enable-linkonceodr-ir-outlining", cl::Hidden,
48  cl::desc("Enable the IR outliner on linkonceodr functions"),
49  cl::init(false));
50 
51 // This is a debug option to test small pieces of code to ensure that outlining
52 // works correctly.
54  "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
55  cl::desc("Debug option to outline greedily, without restriction that "
56  "calculated benefit outweighs cost"));
57 
58 /// The OutlinableGroup holds all the overarching information for outlining
59 /// a set of regions that are structurally similar to one another, such as the
60 /// types of the overall function, the output blocks, the sets of stores needed
61 /// and a list of the different regions. This information is used in the
62 /// deduplication of extracted regions with the same structure.
64  /// The sections that could be outlined
65  std::vector<OutlinableRegion *> Regions;
66 
67  /// The argument types for the function created as the overall function to
68  /// replace the extracted function for each region.
69  std::vector<Type *> ArgumentTypes;
70  /// The FunctionType for the overall function.
71  FunctionType *OutlinedFunctionType = nullptr;
72  /// The Function for the collective overall function.
73  Function *OutlinedFunction = nullptr;
74 
75  /// Flag for whether we should not consider this group of OutlinableRegions
76  /// for extraction.
77  bool IgnoreGroup = false;
78 
79  /// The return blocks for the overall function.
81 
82  /// The PHIBlocks with their corresponding return block based on the return
83  /// value as the key.
85 
86  /// A set containing the different GVN store sets needed. Each array contains
87  /// a sorted list of the different values that need to be stored into output
88  /// registers.
90 
91  /// Flag for whether the \ref ArgumentTypes have been defined after the
92  /// extraction of the first region.
93  bool InputTypesSet = false;
94 
95  /// The number of input values in \ref ArgumentTypes. Anything after this
96  /// index in ArgumentTypes is an output argument.
97  unsigned NumAggregateInputs = 0;
98 
99  /// The mapping of the canonical numbering of the values in outlined sections
100  /// to specific arguments.
102 
103  /// The number of branches in the region target a basic block that is outside
104  /// of the region.
105  unsigned BranchesToOutside = 0;
106 
107  /// The number of instructions that will be outlined by extracting \ref
108  /// Regions.
109  InstructionCost Benefit = 0;
110  /// The number of added instructions needed for the outlining of the \ref
111  /// Regions.
112  InstructionCost Cost = 0;
113 
114  /// The argument that needs to be marked with the swifterr attribute. If not
115  /// needed, there is no value.
117 
118  /// For the \ref Regions, we look at every Value. If it is a constant,
119  /// we check whether it is the same in Region.
120  ///
121  /// \param [in,out] NotSame contains the global value numbers where the
122  /// constant is not always the same, and must be passed in as an argument.
123  void findSameConstants(DenseSet<unsigned> &NotSame);
124 
125  /// For the regions, look at each set of GVN stores needed and account for
126  /// each combination. Add an argument to the argument types if there is
127  /// more than one combination.
128  ///
129  /// \param [in] M - The module we are outlining from.
130  void collectGVNStoreSets(Module &M);
131 };
132 
133 /// Move the contents of \p SourceBB to before the last instruction of \p
134 /// TargetBB.
135 /// \param SourceBB - the BasicBlock to pull Instructions from.
136 /// \param TargetBB - the BasicBlock to put Instruction into.
137 static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
138  BasicBlock::iterator BBCurr, BBEnd, BBNext;
139  for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
140  BBCurr = BBNext) {
141  BBNext = std::next(BBCurr);
142  BBCurr->moveBefore(TargetBB, TargetBB.end());
143  }
144 }
145 
146 /// A function to sort the keys of \p Map, which must be a mapping of constant
147 /// values to basic blocks and return it in \p SortedKeys
148 ///
149 /// \param SortedKeys - The vector the keys will be return in and sorted.
150 /// \param Map - The DenseMap containing keys to sort.
151 static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
153  for (auto &VtoBB : Map)
154  SortedKeys.push_back(VtoBB.first);
155 
156  stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
157  const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
158  const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
159  assert(RHSC && "Not a constant integer in return value?");
160  assert(LHSC && "Not a constant integer in return value?");
161 
162  return LHSC->getLimitedValue() < RHSC->getLimitedValue();
163  });
164 }
165 
167  Value *V) {
168  Optional<unsigned> GVN = Candidate->getGVN(V);
169  assert(GVN.hasValue() && "No GVN for incoming value");
170  Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
171  Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
172  Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
173  return FoundValueOpt.getValueOr(nullptr);
174 }
175 
177  assert(!CandidateSplit && "Candidate already split!");
178 
179  Instruction *BackInst = Candidate->backInstruction();
180 
181  Instruction *EndInst = nullptr;
182  // Check whether the last instruction is a terminator, if it is, we do
183  // not split on the following instruction. We leave the block as it is. We
184  // also check that this is not the last instruction in the Module, otherwise
185  // the check for whether the current following instruction matches the
186  // previously recorded instruction will be incorrect.
187  if (!BackInst->isTerminator() ||
188  BackInst->getParent() != &BackInst->getFunction()->back()) {
189  EndInst = Candidate->end()->Inst;
190  assert(EndInst && "Expected an end instruction?");
191  }
192 
193  // We check if the current instruction following the last instruction in the
194  // region is the same as the recorded instruction following the last
195  // instruction. If they do not match, there could be problems in rewriting
196  // the program after outlining, so we ignore it.
197  if (!BackInst->isTerminator() &&
198  EndInst != BackInst->getNextNonDebugInstruction())
199  return;
200 
201  Instruction *StartInst = (*Candidate->begin()).Inst;
202  assert(StartInst && "Expected a start instruction?");
203  StartBB = StartInst->getParent();
204  PrevBB = StartBB;
205 
206  // The basic block gets split like so:
207  // block: block:
208  // inst1 inst1
209  // inst2 inst2
210  // region1 br block_to_outline
211  // region2 block_to_outline:
212  // region3 -> region1
213  // region4 region2
214  // inst3 region3
215  // inst4 region4
216  // br block_after_outline
217  // block_after_outline:
218  // inst3
219  // inst4
220 
221  std::string OriginalName = PrevBB->getName().str();
222 
223  StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
224  PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
225 
226  CandidateSplit = true;
227  if (!BackInst->isTerminator()) {
228  EndBB = EndInst->getParent();
229  FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
230  EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
231  FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
232  return;
233  }
234 
235  EndBB = BackInst->getParent();
236  EndsInBranch = true;
237  FollowBB = nullptr;
238 }
239 
241  assert(CandidateSplit && "Candidate is not split!");
242 
243  // The basic block gets reattached like so:
244  // block: block:
245  // inst1 inst1
246  // inst2 inst2
247  // br block_to_outline region1
248  // block_to_outline: -> region2
249  // region1 region3
250  // region2 region4
251  // region3 inst3
252  // region4 inst4
253  // br block_after_outline
254  // block_after_outline:
255  // inst3
256  // inst4
257  assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
258 
259  // StartBB should only have one predecessor since we put an unconditional
260  // branch at the end of PrevBB when we split the BasicBlock.
261  PrevBB = StartBB->getSinglePredecessor();
262  assert(PrevBB != nullptr &&
263  "No Predecessor for the region start basic block!");
264 
265  assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
266  PrevBB->getTerminator()->eraseFromParent();
267 
268  moveBBContents(*StartBB, *PrevBB);
269 
270  BasicBlock *PlacementBB = PrevBB;
271  if (StartBB != EndBB)
272  PlacementBB = EndBB;
273  if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
274  assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
275  assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
276  PlacementBB->getTerminator()->eraseFromParent();
277  moveBBContents(*FollowBB, *PlacementBB);
278  PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
279  FollowBB->eraseFromParent();
280  }
281 
282  PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
283  StartBB->eraseFromParent();
284 
285  // Make sure to save changes back to the StartBB.
286  StartBB = PrevBB;
287  EndBB = nullptr;
288  PrevBB = nullptr;
289  FollowBB = nullptr;
290 
291  CandidateSplit = false;
292 }
293 
294 /// Find whether \p V matches the Constants previously found for the \p GVN.
295 ///
296 /// \param V - The value to check for consistency.
297 /// \param GVN - The global value number assigned to \p V.
298 /// \param GVNToConstant - The mapping of global value number to Constants.
299 /// \returns true if the Value matches the Constant mapped to by V and false if
300 /// it \p V is a Constant but does not match.
301 /// \returns None if \p V is not a Constant.
302 static Optional<bool>
303 constantMatches(Value *V, unsigned GVN,
304  DenseMap<unsigned, Constant *> &GVNToConstant) {
305  // See if we have a constants
306  Constant *CST = dyn_cast<Constant>(V);
307  if (!CST)
308  return None;
309 
310  // Holds a mapping from a global value number to a Constant.
312  bool Inserted;
313 
314 
315  // If we have a constant, try to make a new entry in the GVNToConstant.
316  std::tie(GVNToConstantIt, Inserted) =
317  GVNToConstant.insert(std::make_pair(GVN, CST));
318  // If it was found and is not equal, it is not the same. We do not
319  // handle this case yet, and exit early.
320  if (Inserted || (GVNToConstantIt->second == CST))
321  return true;
322 
323  return false;
324 }
325 
327  InstructionCost Benefit = 0;
328 
329  // Estimate the benefit of outlining a specific sections of the program. We
330  // delegate mostly this task to the TargetTransformInfo so that if the target
331  // has specific changes, we can have a more accurate estimate.
332 
333  // However, getInstructionCost delegates the code size calculation for
334  // arithmetic instructions to getArithmeticInstrCost in
335  // include/Analysis/TargetTransformImpl.h, where it always estimates that the
336  // code size for a division and remainder instruction to be equal to 4, and
337  // everything else to 1. This is not an accurate representation of the
338  // division instruction for targets that have a native division instruction.
339  // To be overly conservative, we only add 1 to the number of instructions for
340  // each division instruction.
341  for (IRInstructionData &ID : *Candidate) {
342  Instruction *I = ID.Inst;
343  switch (I->getOpcode()) {
344  case Instruction::FDiv:
345  case Instruction::FRem:
346  case Instruction::SDiv:
347  case Instruction::SRem:
348  case Instruction::UDiv:
349  case Instruction::URem:
350  Benefit += 1;
351  break;
352  default:
354  break;
355  }
356  }
357 
358  return Benefit;
359 }
360 
361 /// Find whether \p Region matches the global value numbering to Constant
362 /// mapping found so far.
363 ///
364 /// \param Region - The OutlinableRegion we are checking for constants
365 /// \param GVNToConstant - The mapping of global value number to Constants.
366 /// \param NotSame - The set of global value numbers that do not have the same
367 /// constant in each region.
368 /// \returns true if all Constants are the same in every use of a Constant in \p
369 /// Region and false if not
370 static bool
372  DenseMap<unsigned, Constant *> &GVNToConstant,
373  DenseSet<unsigned> &NotSame) {
374  bool ConstantsTheSame = true;
375 
376  IRSimilarityCandidate &C = *Region.Candidate;
377  for (IRInstructionData &ID : C) {
378 
379  // Iterate over the operands in an instruction. If the global value number,
380  // assigned by the IRSimilarityCandidate, has been seen before, we check if
381  // the the number has been found to be not the same value in each instance.
382  for (Value *V : ID.OperVals) {
383  Optional<unsigned> GVNOpt = C.getGVN(V);
384  assert(GVNOpt.hasValue() && "Expected a GVN for operand?");
385  unsigned GVN = GVNOpt.getValue();
386 
387  // Check if this global value has been found to not be the same already.
388  if (NotSame.contains(GVN)) {
389  if (isa<Constant>(V))
390  ConstantsTheSame = false;
391  continue;
392  }
393 
394  // If it has been the same so far, we check the value for if the
395  // associated Constant value match the previous instances of the same
396  // global value number. If the global value does not map to a Constant,
397  // it is considered to not be the same value.
398  Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
399  if (ConstantMatches.hasValue()) {
400  if (ConstantMatches.getValue())
401  continue;
402  else
403  ConstantsTheSame = false;
404  }
405 
406  // While this value is a register, it might not have been previously,
407  // make sure we don't already have a constant mapped to this global value
408  // number.
409  if (GVNToConstant.find(GVN) != GVNToConstant.end())
410  ConstantsTheSame = false;
411 
412  NotSame.insert(GVN);
413  }
414  }
415 
416  return ConstantsTheSame;
417 }
418 
420  DenseMap<unsigned, Constant *> GVNToConstant;
421 
422  for (OutlinableRegion *Region : Regions)
423  collectRegionsConstants(*Region, GVNToConstant, NotSame);
424 }
425 
427  for (OutlinableRegion *OS : Regions)
428  OutputGVNCombinations.insert(OS->GVNStores);
429 
430  // We are adding an extracted argument to decide between which output path
431  // to use in the basic block. It is used in a switch statement and only
432  // needs to be an integer.
433  if (OutputGVNCombinations.size() > 1)
434  ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
435 }
436 
437 /// Get the subprogram if it exists for one of the outlined regions.
438 ///
439 /// \param [in] Group - The set of regions to find a subprogram for.
440 /// \returns the subprogram if it exists, or nullptr.
442  for (OutlinableRegion *OS : Group.Regions)
443  if (Function *F = OS->Call->getFunction())
444  if (DISubprogram *SP = F->getSubprogram())
445  return SP;
446 
447  return nullptr;
448 }
449 
450 Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
451  unsigned FunctionNameSuffix) {
452  assert(!Group.OutlinedFunction && "Function is already defined!");
453 
454  Type *RetTy = Type::getVoidTy(M.getContext());
455  // All extracted functions _should_ have the same return type at this point
456  // since the similarity identifier ensures that all branches outside of the
457  // region occur in the same place.
458 
459  // NOTE: Should we ever move to the model that uses a switch at every point
460  // needed, meaning that we could branch within the region or out, it is
461  // possible that we will need to switch to using the most general case all of
462  // the time.
463  for (OutlinableRegion *R : Group.Regions) {
464  Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
465  if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
466  (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
467  RetTy = ExtractedFuncType;
468  }
469 
471  RetTy, Group.ArgumentTypes, false);
472 
473  // These functions will only be called from within the same module, so
474  // we can set an internal linkage.
477  "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
478 
479  // Transfer the swifterr attribute to the correct function parameter.
480  if (Group.SwiftErrorArgument.hasValue())
482  Attribute::SwiftError);
483 
484  Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
485  Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
486 
487  // If there's a DISubprogram associated with this outlined function, then
488  // emit debug info for the outlined function.
489  if (DISubprogram *SP = getSubprogramOrNull(Group)) {
490  Function *F = Group.OutlinedFunction;
491  // We have a DISubprogram. Get its DICompileUnit.
492  DICompileUnit *CU = SP->getUnit();
493  DIBuilder DB(M, true, CU);
494  DIFile *Unit = SP->getFile();
495  Mangler Mg;
496  // Get the mangled name of the function for the linkage name.
497  std::string Dummy;
498  llvm::raw_string_ostream MangledNameStream(Dummy);
499  Mg.getNameWithPrefix(MangledNameStream, F, false);
500 
501  DISubprogram *OutlinedSP = DB.createFunction(
502  Unit /* Context */, F->getName(), MangledNameStream.str(),
503  Unit /* File */,
504  0 /* Line 0 is reserved for compiler-generated code. */,
505  DB.createSubroutineType(DB.getOrCreateTypeArray(None)), /* void type */
506  0, /* Line 0 is reserved for compiler-generated code. */
507  DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
508  /* Outlined code is optimized code by definition. */
509  DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
510 
511  // Don't add any new variables to the subprogram.
512  DB.finalizeSubprogram(OutlinedSP);
513 
514  // Attach subprogram to the function.
515  F->setSubprogram(OutlinedSP);
516  // We're done with the DIBuilder.
517  DB.finalize();
518  }
519 
520  return Group.OutlinedFunction;
521 }
522 
523 /// Move each BasicBlock in \p Old to \p New.
524 ///
525 /// \param [in] Old - The function to move the basic blocks from.
526 /// \param [in] New - The function to move the basic blocks to.
527 /// \param [out] NewEnds - The return blocks of the new overall function.
528 static void moveFunctionData(Function &Old, Function &New,
530  Function::iterator CurrBB, NextBB, FinalBB;
531  for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
532  CurrBB = NextBB) {
533  NextBB = std::next(CurrBB);
534  CurrBB->removeFromParent();
535  CurrBB->insertInto(&New);
536  Instruction *I = CurrBB->getTerminator();
537 
538  // For each block we find a return instruction is, it is a potential exit
539  // path for the function. We keep track of each block based on the return
540  // value here.
541  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
542  NewEnds.insert(std::make_pair(RI->getReturnValue(), &(*CurrBB)));
543 
544  std::vector<Instruction *> DebugInsts;
545 
546  for (Instruction &Val : *CurrBB) {
547  // We must handle the scoping of called functions differently than
548  // other outlined instructions.
549  if (!isa<CallInst>(&Val)) {
550  // Remove the debug information for outlined functions.
551  Val.setDebugLoc(DebugLoc());
552  continue;
553  }
554 
555  // From this point we are only handling call instructions.
556  CallInst *CI = cast<CallInst>(&Val);
557 
558  // We add any debug statements here, to be removed after. Since the
559  // instructions originate from many different locations in the program,
560  // it will cause incorrect reporting from a debugger if we keep the
561  // same debug instructions.
562  if (isa<DbgInfoIntrinsic>(CI)) {
563  DebugInsts.push_back(&Val);
564  continue;
565  }
566 
567  // Edit the scope of called functions inside of outlined functions.
568  if (DISubprogram *SP = New.getSubprogram()) {
569  DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
570  Val.setDebugLoc(DI);
571  }
572  }
573 
574  for (Instruction *I : DebugInsts)
575  I->eraseFromParent();
576  }
577 
578  assert(NewEnds.size() > 0 && "No return instruction for new function?");
579 }
580 
581 /// Find the the constants that will need to be lifted into arguments
582 /// as they are not the same in each instance of the region.
583 ///
584 /// \param [in] C - The IRSimilarityCandidate containing the region we are
585 /// analyzing.
586 /// \param [in] NotSame - The set of global value numbers that do not have a
587 /// single Constant across all OutlinableRegions similar to \p C.
588 /// \param [out] Inputs - The list containing the global value numbers of the
589 /// arguments needed for the region of code.
591  std::vector<unsigned> &Inputs) {
592  DenseSet<unsigned> Seen;
593  // Iterate over the instructions, and find what constants will need to be
594  // extracted into arguments.
595  for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
596  IDIt != EndIDIt; IDIt++) {
597  for (Value *V : (*IDIt).OperVals) {
598  // Since these are stored before any outlining, they will be in the
599  // global value numbering.
600  unsigned GVN = C.getGVN(V).getValue();
601  if (isa<Constant>(V))
602  if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
603  Inputs.push_back(GVN);
604  Seen.insert(GVN);
605  }
606  }
607  }
608 }
609 
610 /// Find the GVN for the inputs that have been found by the CodeExtractor.
611 ///
612 /// \param [in] C - The IRSimilarityCandidate containing the region we are
613 /// analyzing.
614 /// \param [in] CurrentInputs - The set of inputs found by the
615 /// CodeExtractor.
616 /// \param [in] OutputMappings - The mapping of values that have been replaced
617 /// by a new output value.
618 /// \param [out] EndInputNumbers - The global value numbers for the extracted
619 /// arguments.
621  SetVector<Value *> &CurrentInputs,
622  const DenseMap<Value *, Value *> &OutputMappings,
623  std::vector<unsigned> &EndInputNumbers) {
624  // Get the Global Value Number for each input. We check if the Value has been
625  // replaced by a different value at output, and use the original value before
626  // replacement.
627  for (Value *Input : CurrentInputs) {
628  assert(Input && "Have a nullptr as an input");
629  if (OutputMappings.find(Input) != OutputMappings.end())
630  Input = OutputMappings.find(Input)->second;
631  assert(C.getGVN(Input).hasValue() &&
632  "Could not find a numbering for the given input");
633  EndInputNumbers.push_back(C.getGVN(Input).getValue());
634  }
635 }
636 
637 /// Find the original value for the \p ArgInput values if any one of them was
638 /// replaced during a previous extraction.
639 ///
640 /// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
641 /// \param [in] OutputMappings - The mapping of values that have been replaced
642 /// by a new output value.
643 /// \param [out] RemappedArgInputs - The remapped values according to
644 /// \p OutputMappings that will be extracted.
645 static void
647  const DenseMap<Value *, Value *> &OutputMappings,
648  SetVector<Value *> &RemappedArgInputs) {
649  // Get the global value number for each input that will be extracted as an
650  // argument by the code extractor, remapping if needed for reloaded values.
651  for (Value *Input : ArgInputs) {
652  if (OutputMappings.find(Input) != OutputMappings.end())
653  Input = OutputMappings.find(Input)->second;
654  RemappedArgInputs.insert(Input);
655  }
656 }
657 
658 /// Find the input GVNs and the output values for a region of Instructions.
659 /// Using the code extractor, we collect the inputs to the extracted function.
660 ///
661 /// The \p Region can be identified as needing to be ignored in this function.
662 /// It should be checked whether it should be ignored after a call to this
663 /// function.
664 ///
665 /// \param [in,out] Region - The region of code to be analyzed.
666 /// \param [out] InputGVNs - The global value numbers for the extracted
667 /// arguments.
668 /// \param [in] NotSame - The global value numbers in the region that do not
669 /// have the same constant value in the regions structurally similar to
670 /// \p Region.
671 /// \param [in] OutputMappings - The mapping of values that have been replaced
672 /// by a new output value after extraction.
673 /// \param [out] ArgInputs - The values of the inputs to the extracted function.
674 /// \param [out] Outputs - The set of values extracted by the CodeExtractor
675 /// as outputs.
677  OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
678  DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
679  SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
680  IRSimilarityCandidate &C = *Region.Candidate;
681 
682  // OverallInputs are the inputs to the region found by the CodeExtractor,
683  // SinkCands and HoistCands are used by the CodeExtractor to find sunken
684  // allocas of values whose lifetimes are contained completely within the
685  // outlined region. PremappedInputs are the arguments found by the
686  // CodeExtractor, removing conditions such as sunken allocas, but that
687  // may need to be remapped due to the extracted output values replacing
688  // the original values. We use DummyOutputs for this first run of finding
689  // inputs and outputs since the outputs could change during findAllocas,
690  // the correct set of extracted outputs will be in the final Outputs ValueSet.
691  SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
692  DummyOutputs;
693 
694  // Use the code extractor to get the inputs and outputs, without sunken
695  // allocas or removing llvm.assumes.
696  CodeExtractor *CE = Region.CE;
697  CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
698  assert(Region.StartBB && "Region must have a start BasicBlock!");
699  Function *OrigF = Region.StartBB->getParent();
700  CodeExtractorAnalysisCache CEAC(*OrigF);
701  BasicBlock *Dummy = nullptr;
702 
703  // The region may be ineligible due to VarArgs in the parent function. In this
704  // case we ignore the region.
705  if (!CE->isEligible()) {
706  Region.IgnoreRegion = true;
707  return;
708  }
709 
710  // Find if any values are going to be sunk into the function when extracted
711  CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
712  CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
713 
714  // TODO: Support regions with sunken allocas: values whose lifetimes are
715  // contained completely within the outlined region. These are not guaranteed
716  // to be the same in every region, so we must elevate them all to arguments
717  // when they appear. If these values are not equal, it means there is some
718  // Input in OverallInputs that was removed for ArgInputs.
719  if (OverallInputs.size() != PremappedInputs.size()) {
720  Region.IgnoreRegion = true;
721  return;
722  }
723 
724  findConstants(C, NotSame, InputGVNs);
725 
726  mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
727 
728  remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
729  ArgInputs);
730 
731  // Sort the GVNs, since we now have constants included in the \ref InputGVNs
732  // we need to make sure they are in a deterministic order.
733  stable_sort(InputGVNs);
734 }
735 
736 /// Look over the inputs and map each input argument to an argument in the
737 /// overall function for the OutlinableRegions. This creates a way to replace
738 /// the arguments of the extracted function with the arguments of the new
739 /// overall function.
740 ///
741 /// \param [in,out] Region - The region of code to be analyzed.
742 /// \param [in] InputGVNs - The global value numbering of the input values
743 /// collected.
744 /// \param [in] ArgInputs - The values of the arguments to the extracted
745 /// function.
746 static void
748  std::vector<unsigned> &InputGVNs,
749  SetVector<Value *> &ArgInputs) {
750 
751  IRSimilarityCandidate &C = *Region.Candidate;
752  OutlinableGroup &Group = *Region.Parent;
753 
754  // This counts the argument number in the overall function.
755  unsigned TypeIndex = 0;
756 
757  // This counts the argument number in the extracted function.
758  unsigned OriginalIndex = 0;
759 
760  // Find the mapping of the extracted arguments to the arguments for the
761  // overall function. Since there may be extra arguments in the overall
762  // function to account for the extracted constants, we have two different
763  // counters as we find extracted arguments, and as we come across overall
764  // arguments.
765 
766  // Additionally, in our first pass, for the first extracted function,
767  // we find argument locations for the canonical value numbering. This
768  // numbering overrides any discovered location for the extracted code.
769  for (unsigned InputVal : InputGVNs) {
770  Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
771  assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
772  unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
773 
774  Optional<Value *> InputOpt = C.fromGVN(InputVal);
775  assert(InputOpt.hasValue() && "Global value number not found?");
776  Value *Input = InputOpt.getValue();
777 
779  Group.CanonicalNumberToAggArg.find(CanonicalNumber);
780 
781  if (!Group.InputTypesSet) {
782  Group.ArgumentTypes.push_back(Input->getType());
783  // If the input value has a swifterr attribute, make sure to mark the
784  // argument in the overall function.
785  if (Input->isSwiftError()) {
786  assert(
787  !Group.SwiftErrorArgument.hasValue() &&
788  "Argument already marked with swifterr for this OutlinableGroup!");
789  Group.SwiftErrorArgument = TypeIndex;
790  }
791  }
792 
793  // Check if we have a constant. If we do add it to the overall argument
794  // number to Constant map for the region, and continue to the next input.
795  if (Constant *CST = dyn_cast<Constant>(Input)) {
796  if (AggArgIt != Group.CanonicalNumberToAggArg.end())
797  Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
798  else {
800  std::make_pair(CanonicalNumber, TypeIndex));
801  Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
802  }
803  TypeIndex++;
804  continue;
805  }
806 
807  // It is not a constant, we create the mapping from extracted argument list
808  // to the overall argument list, using the canonical location, if it exists.
809  assert(ArgInputs.count(Input) && "Input cannot be found!");
810 
811  if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
812  if (OriginalIndex != AggArgIt->second)
813  Region.ChangedArgOrder = true;
814  Region.ExtractedArgToAgg.insert(
815  std::make_pair(OriginalIndex, AggArgIt->second));
816  Region.AggArgToExtracted.insert(
817  std::make_pair(AggArgIt->second, OriginalIndex));
818  } else {
820  std::make_pair(CanonicalNumber, TypeIndex));
821  Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
822  Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
823  }
824  OriginalIndex++;
825  TypeIndex++;
826  }
827 
828  // If the function type definitions for the OutlinableGroup holding the region
829  // have not been set, set the length of the inputs here. We should have the
830  // same inputs for all of the different regions contained in the
831  // OutlinableGroup since they are all structurally similar to one another.
832  if (!Group.InputTypesSet) {
833  Group.NumAggregateInputs = TypeIndex;
834  Group.InputTypesSet = true;
835  }
836 
837  Region.NumExtractedInputs = OriginalIndex;
838 }
839 
840 /// Create a mapping of the output arguments for the \p Region to the output
841 /// arguments of the overall outlined function.
842 ///
843 /// \param [in,out] Region - The region of code to be analyzed.
844 /// \param [in] Outputs - The values found by the code extractor.
845 static void
847  SetVector<Value *> &Outputs) {
848  OutlinableGroup &Group = *Region.Parent;
849  IRSimilarityCandidate &C = *Region.Candidate;
850 
853  C.getBasicBlocks(BBSet, BE);
854 
855  // Find the exits to the region.
857  for (BasicBlock *Block : BE)
858  for (BasicBlock *Succ : successors(Block))
859  if (!BBSet.contains(Succ))
860  Exits.insert(Succ);
861 
862  // After determining which blocks exit to PHINodes, we add these PHINodes to
863  // the set of outputs to be processed. We also check the incoming values of
864  // the PHINodes for whether they should no longer be considered outputs.
865  for (BasicBlock *ExitBB : Exits) {
866  for (PHINode &PN : ExitBB->phis()) {
867  // Find all incoming values from the outlining region.
868  SmallVector<unsigned, 2> IncomingVals;
869  for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
870  if (BBSet.contains(PN.getIncomingBlock(Idx)))
871  IncomingVals.push_back(Idx);
872 
873  // Do not process PHI if there is one (or fewer) predecessor from region.
874  if (IncomingVals.size() <= 1)
875  continue;
876 
877  Region.IgnoreRegion = true;
878  return;
879  }
880  }
881 
882  // This counts the argument number in the extracted function.
883  unsigned OriginalIndex = Region.NumExtractedInputs;
884 
885  // This counts the argument number in the overall function.
886  unsigned TypeIndex = Group.NumAggregateInputs;
887  bool TypeFound;
888  DenseSet<unsigned> AggArgsUsed;
889 
890  // Iterate over the output types and identify if there is an aggregate pointer
891  // type whose base type matches the current output type. If there is, we mark
892  // that we will use this output register for this value. If not we add another
893  // type to the overall argument type list. We also store the GVNs used for
894  // stores to identify which values will need to be moved into an special
895  // block that holds the stores to the output registers.
896  for (Value *Output : Outputs) {
897  TypeFound = false;
898  // We can do this since it is a result value, and will have a number
899  // that is necessarily the same. BUT if in the future, the instructions
900  // do not have to be in same order, but are functionally the same, we will
901  // have to use a different scheme, as one-to-one correspondence is not
902  // guaranteed.
903  unsigned GlobalValue = C.getGVN(Output).getValue();
904  unsigned ArgumentSize = Group.ArgumentTypes.size();
905 
906  for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
907  if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
908  continue;
909 
910  if (AggArgsUsed.contains(Jdx))
911  continue;
912 
913  TypeFound = true;
914  AggArgsUsed.insert(Jdx);
915  Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
916  Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
917  Region.GVNStores.push_back(GlobalValue);
918  break;
919  }
920 
921  // We were unable to find an unused type in the output type set that matches
922  // the output, so we add a pointer type to the argument types of the overall
923  // function to handle this output and create a mapping to it.
924  if (!TypeFound) {
925  Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
926  AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
927  Region.ExtractedArgToAgg.insert(
928  std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
929  Region.AggArgToExtracted.insert(
930  std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
931  Region.GVNStores.push_back(GlobalValue);
932  }
933 
934  stable_sort(Region.GVNStores);
935  OriginalIndex++;
936  TypeIndex++;
937  }
938 }
939 
940 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
941  DenseSet<unsigned> &NotSame) {
942  std::vector<unsigned> Inputs;
943  SetVector<Value *> ArgInputs, Outputs;
944 
945  getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
946  Outputs);
947 
948  if (Region.IgnoreRegion)
949  return;
950 
951  // Map the inputs found by the CodeExtractor to the arguments found for
952  // the overall function.
954 
955  // Map the outputs found by the CodeExtractor to the arguments found for
956  // the overall function.
958 }
959 
960 /// Replace the extracted function in the Region with a call to the overall
961 /// function constructed from the deduplicated similar regions, replacing and
962 /// remapping the values passed to the extracted function as arguments to the
963 /// new arguments of the overall function.
964 ///
965 /// \param [in] M - The module to outline from.
966 /// \param [in] Region - The regions of extracted code to be replaced with a new
967 /// function.
968 /// \returns a call instruction with the replaced function.
970  std::vector<Value *> NewCallArgs;
972 
973  OutlinableGroup &Group = *Region.Parent;
974  CallInst *Call = Region.Call;
975  assert(Call && "Call to replace is nullptr?");
976  Function *AggFunc = Group.OutlinedFunction;
977  assert(AggFunc && "Function to replace with is nullptr?");
978 
979  // If the arguments are the same size, there are not values that need to be
980  // made into an argument, the argument ordering has not been change, or
981  // different output registers to handle. We can simply replace the called
982  // function in this case.
983  if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
984  LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
985  << *AggFunc << " with same number of arguments\n");
986  Call->setCalledFunction(AggFunc);
987  return Call;
988  }
989 
990  // We have a different number of arguments than the new function, so
991  // we need to use our previously mappings off extracted argument to overall
992  // function argument, and constants to overall function argument to create the
993  // new argument list.
994  for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
995 
996  if (AggArgIdx == AggFunc->arg_size() - 1 &&
997  Group.OutputGVNCombinations.size() > 1) {
998  // If we are on the last argument, and we need to differentiate between
999  // output blocks, add an integer to the argument list to determine
1000  // what block to take
1001  LLVM_DEBUG(dbgs() << "Set switch block argument to "
1002  << Region.OutputBlockNum << "\n");
1003  NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1004  Region.OutputBlockNum));
1005  continue;
1006  }
1007 
1008  ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1009  if (ArgPair != Region.AggArgToExtracted.end()) {
1010  Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1011  // If we found the mapping from the extracted function to the overall
1012  // function, we simply add it to the argument list. We use the same
1013  // value, it just needs to honor the new order of arguments.
1014  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1015  << *ArgumentValue << "\n");
1016  NewCallArgs.push_back(ArgumentValue);
1017  continue;
1018  }
1019 
1020  // If it is a constant, we simply add it to the argument list as a value.
1021  if (Region.AggArgToConstant.find(AggArgIdx) !=
1022  Region.AggArgToConstant.end()) {
1023  Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1024  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1025  << *CST << "\n");
1026  NewCallArgs.push_back(CST);
1027  continue;
1028  }
1029 
1030  // Add a nullptr value if the argument is not found in the extracted
1031  // function. If we cannot find a value, it means it is not in use
1032  // for the region, so we should not pass anything to it.
1033  LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1034  NewCallArgs.push_back(ConstantPointerNull::get(
1035  static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1036  }
1037 
1038  LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1039  << *AggFunc << " with new set of arguments\n");
1040  // Create the new call instruction and erase the old one.
1041  Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1042  Call);
1043 
1044  // It is possible that the call to the outlined function is either the first
1045  // instruction is in the new block, the last instruction, or both. If either
1046  // of these is the case, we need to make sure that we replace the instruction
1047  // in the IRInstructionData struct with the new call.
1048  CallInst *OldCall = Region.Call;
1049  if (Region.NewFront->Inst == OldCall)
1050  Region.NewFront->Inst = Call;
1051  if (Region.NewBack->Inst == OldCall)
1052  Region.NewBack->Inst = Call;
1053 
1054  // Transfer any debug information.
1055  Call->setDebugLoc(Region.Call->getDebugLoc());
1056  // Since our output may determine which branch we go to, we make sure to
1057  // propogate this new call value through the module.
1058  OldCall->replaceAllUsesWith(Call);
1059 
1060  // Remove the old instruction.
1061  OldCall->eraseFromParent();
1062  Region.Call = Call;
1063 
1064  // Make sure that the argument in the new function has the SwiftError
1065  // argument.
1066  if (Group.SwiftErrorArgument.hasValue())
1067  Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
1068  Attribute::SwiftError);
1069 
1070  return Call;
1071 }
1072 
1073 // Within an extracted function, replace the argument uses of the extracted
1074 // region with the arguments of the function for an OutlinableGroup.
1075 //
1076 /// \param [in] Region - The region of extracted code to be changed.
1077 /// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1078 /// region.
1079 /// \param [in] FirstFunction - A flag to indicate whether we are using this
1080 /// function to define the overall outlined function for all the regions, or
1081 /// if we are operating on one of the following regions.
1082 static void
1085  bool FirstFunction = false) {
1086  OutlinableGroup &Group = *Region.Parent;
1087  assert(Region.ExtractedFunction && "Region has no extracted function?");
1088 
1089  Function *DominatingFunction = Region.ExtractedFunction;
1090  if (FirstFunction)
1091  DominatingFunction = Group.OutlinedFunction;
1092  DominatorTree DT(*DominatingFunction);
1093 
1094  for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1095  ArgIdx++) {
1096  assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1097  Region.ExtractedArgToAgg.end() &&
1098  "No mapping from extracted to outlined?");
1099  unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1100  Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1101  Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1102  // The argument is an input, so we can simply replace it with the overall
1103  // argument value
1104  if (ArgIdx < Region.NumExtractedInputs) {
1105  LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1106  << *Region.ExtractedFunction << " with " << *AggArg
1107  << " in function " << *Group.OutlinedFunction << "\n");
1108  Arg->replaceAllUsesWith(AggArg);
1109  continue;
1110  }
1111 
1112  // If we are replacing an output, we place the store value in its own
1113  // block inside the overall function before replacing the use of the output
1114  // in the function.
1115  assert(Arg->hasOneUse() && "Output argument can only have one use");
1116  User *InstAsUser = Arg->user_back();
1117  assert(InstAsUser && "User is nullptr!");
1118 
1119  Instruction *I = cast<Instruction>(InstAsUser);
1120  BasicBlock *BB = I->getParent();
1121  SmallVector<BasicBlock *, 4> Descendants;
1122  DT.getDescendants(BB, Descendants);
1123  bool EdgeAdded = false;
1124  if (Descendants.size() == 0) {
1125  EdgeAdded = true;
1126  DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1127  DT.getDescendants(BB, Descendants);
1128  }
1129 
1130  // Iterate over the following blocks, looking for return instructions,
1131  // if we find one, find the corresponding output block for the return value
1132  // and move our store instruction there.
1133  for (BasicBlock *DescendBB : Descendants) {
1134  ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1135  if (!RI)
1136  continue;
1137  Value *RetVal = RI->getReturnValue();
1138  auto VBBIt = OutputBBs.find(RetVal);
1139  assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1140 
1141  // If this is storing a PHINode, we must make sure it is included in the
1142  // overall function.
1143  StoreInst *SI = cast<StoreInst>(I);
1144 
1145  Value *ValueOperand = SI->getValueOperand();
1146 
1147  StoreInst *NewI = cast<StoreInst>(I->clone());
1148  NewI->setDebugLoc(DebugLoc());
1149  BasicBlock *OutputBB = VBBIt->second;
1150  OutputBB->getInstList().push_back(NewI);
1151  LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1152  << *OutputBB << "\n");
1153 
1154  if (FirstFunction)
1155  continue;
1156  Value *CorrVal =
1157  Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1158  assert(CorrVal && "Value is nullptr?");
1159  NewI->setOperand(0, CorrVal);
1160  }
1161 
1162  // If we added an edge for basic blocks without a predecessor, we remove it
1163  // here.
1164  if (EdgeAdded)
1165  DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1166  I->eraseFromParent();
1167 
1168  LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1169  << *Region.ExtractedFunction << " with " << *AggArg
1170  << " in function " << *Group.OutlinedFunction << "\n");
1171  Arg->replaceAllUsesWith(AggArg);
1172  }
1173 }
1174 
1175 /// Within an extracted function, replace the constants that need to be lifted
1176 /// into arguments with the actual argument.
1177 ///
1178 /// \param Region [in] - The region of extracted code to be changed.
1180  OutlinableGroup &Group = *Region.Parent;
1181  // Iterate over the constants that need to be elevated into arguments
1182  for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1183  unsigned AggArgIdx = Const.first;
1184  Function *OutlinedFunction = Group.OutlinedFunction;
1185  assert(OutlinedFunction && "Overall Function is not defined?");
1186  Constant *CST = Const.second;
1187  Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1188  // Identify the argument it will be elevated to, and replace instances of
1189  // that constant in the function.
1190 
1191  // TODO: If in the future constants do not have one global value number,
1192  // i.e. a constant 1 could be mapped to several values, this check will
1193  // have to be more strict. It cannot be using only replaceUsesWithIf.
1194 
1195  LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1196  << " in function " << *OutlinedFunction << " with "
1197  << *Arg << "\n");
1198  CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1199  if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1200  return I->getFunction() == OutlinedFunction;
1201  return false;
1202  });
1203  }
1204 }
1205 
1206 /// It is possible that there is a basic block that already performs the same
1207 /// stores. This returns a duplicate block, if it exists
1208 ///
1209 /// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1210 /// \param OutputStoreBBs [in] The existing output blocks.
1211 /// \returns an optional value with the number output block if there is a match.
1214  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1215 
1216  bool Mismatch = false;
1217  unsigned MatchingNum = 0;
1218  // We compare the new set output blocks to the other sets of output blocks.
1219  // If they are the same number, and have identical instructions, they are
1220  // considered to be the same.
1221  for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1222  Mismatch = false;
1223  for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1225  OutputBBs.find(VToB.first);
1226  if (OutputBBIt == OutputBBs.end()) {
1227  Mismatch = true;
1228  break;
1229  }
1230 
1231  BasicBlock *CompBB = VToB.second;
1232  BasicBlock *OutputBB = OutputBBIt->second;
1233  if (CompBB->size() - 1 != OutputBB->size()) {
1234  Mismatch = true;
1235  break;
1236  }
1237 
1238  BasicBlock::iterator NIt = OutputBB->begin();
1239  for (Instruction &I : *CompBB) {
1240  if (isa<BranchInst>(&I))
1241  continue;
1242 
1243  if (!I.isIdenticalTo(&(*NIt))) {
1244  Mismatch = true;
1245  break;
1246  }
1247 
1248  NIt++;
1249  }
1250  }
1251 
1252  if (!Mismatch)
1253  return MatchingNum;
1254 
1255  MatchingNum++;
1256  }
1257 
1258  return None;
1259 }
1260 
1261 /// Remove empty output blocks from the outlined region.
1262 ///
1263 /// \param BlocksToPrune - Mapping of return values output blocks for the \p
1264 /// Region.
1265 /// \param Region - The OutlinableRegion we are analyzing.
1266 static bool
1269  bool AllRemoved = true;
1270  Value *RetValueForBB;
1271  BasicBlock *NewBB;
1273  // Iterate over the output blocks created in the outlined section.
1274  for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1275  RetValueForBB = VtoBB.first;
1276  NewBB = VtoBB.second;
1277 
1278  // If there are no instructions, we remove it from the module, and also
1279  // mark the value for removal from the return value to output block mapping.
1280  if (NewBB->size() == 0) {
1281  NewBB->eraseFromParent();
1282  ToRemove.push_back(RetValueForBB);
1283  continue;
1284  }
1285 
1286  // Mark that we could not remove all the blocks since they were not all
1287  // empty.
1288  AllRemoved = false;
1289  }
1290 
1291  // Remove the return value from the mapping.
1292  for (Value *V : ToRemove)
1293  BlocksToPrune.erase(V);
1294 
1295  // Mark the region as having the no output scheme.
1296  if (AllRemoved)
1297  Region.OutputBlockNum = -1;
1298 
1299  return AllRemoved;
1300 }
1301 
1302 /// For the outlined section, move needed the StoreInsts for the output
1303 /// registers into their own block. Then, determine if there is a duplicate
1304 /// output block already created.
1305 ///
1306 /// \param [in] OG - The OutlinableGroup of regions to be outlined.
1307 /// \param [in] Region - The OutlinableRegion that is being analyzed.
1308 /// \param [in,out] OutputBBs - the blocks that stores for this region will be
1309 /// placed in.
1310 /// \param [in] EndBBs - the final blocks of the extracted function.
1311 /// \param [in] OutputMappings - OutputMappings the mapping of values that have
1312 /// been replaced by a new output value.
1313 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1318  const DenseMap<Value *, Value *> &OutputMappings,
1319  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1320  // If none of the output blocks have any instructions, this means that we do
1321  // not have to determine if it matches any of the other output schemes, and we
1322  // don't have to do anything else.
1323  if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
1324  return;
1325 
1326  // Determine is there is a duplicate set of blocks.
1327  Optional<unsigned> MatchingBB =
1328  findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
1329 
1330  // If there is, we remove the new output blocks. If it does not,
1331  // we add it to our list of sets of output blocks.
1332  if (MatchingBB.hasValue()) {
1333  LLVM_DEBUG(dbgs() << "Set output block for region in function"
1334  << Region.ExtractedFunction << " to "
1335  << MatchingBB.getValue());
1336 
1337  Region.OutputBlockNum = MatchingBB.getValue();
1338  for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
1339  VtoBB.second->eraseFromParent();
1340  return;
1341  }
1342 
1343  Region.OutputBlockNum = OutputStoreBBs.size();
1344 
1345  Value *RetValueForBB;
1346  BasicBlock *NewBB;
1347  OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1348  for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
1349  RetValueForBB = VtoBB.first;
1350  NewBB = VtoBB.second;
1352  EndBBs.find(RetValueForBB);
1353  LLVM_DEBUG(dbgs() << "Create output block for region in"
1354  << Region.ExtractedFunction << " to "
1355  << *NewBB);
1356  BranchInst::Create(VBBIt->second, NewBB);
1357  OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
1358  }
1359 }
1360 
1361 /// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
1362 /// before creating a basic block for each \p NewMap, and inserting into the new
1363 /// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
1364 ///
1365 /// \param OldMap [in] - The mapping to base the new mapping off of.
1366 /// \param NewMap [out] - The output mapping using the keys of \p OldMap.
1367 /// \param ParentFunc [in] - The function to put the new basic block in.
1368 /// \param BaseName [in] - The start of the BasicBlock names to be appended to
1369 /// by an index value.
1372  Function *ParentFunc, Twine BaseName) {
1373  unsigned Idx = 0;
1374  std::vector<Value *> SortedKeys;
1375 
1376  getSortedConstantKeys(SortedKeys, OldMap);
1377 
1378  for (Value *RetVal : SortedKeys) {
1379  BasicBlock *NewBB = BasicBlock::Create(
1380  ParentFunc->getContext(),
1381  Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
1382  ParentFunc);
1383  NewMap.insert(std::make_pair(RetVal, NewBB));
1384  }
1385 }
1386 
1387 /// Create the switch statement for outlined function to differentiate between
1388 /// all the output blocks.
1389 ///
1390 /// For the outlined section, determine if an outlined block already exists that
1391 /// matches the needed stores for the extracted section.
1392 /// \param [in] M - The module we are outlining from.
1393 /// \param [in] OG - The group of regions to be outlined.
1394 /// \param [in] EndBBs - The final blocks of the extracted function.
1395 /// \param [in,out] OutputStoreBBs - The existing output blocks.
1398  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1399  // We only need the switch statement if there is more than one store
1400  // combination.
1401  if (OG.OutputGVNCombinations.size() > 1) {
1402  Function *AggFunc = OG.OutlinedFunction;
1403  // Create a final block for each different return block.
1405  createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
1406 
1407  for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
1408  std::pair<Value *, BasicBlock *> &OutputBlock =
1409  *OG.EndBBs.find(RetBlockPair.first);
1410  BasicBlock *ReturnBlock = RetBlockPair.second;
1411  BasicBlock *EndBB = OutputBlock.second;
1412  Instruction *Term = EndBB->getTerminator();
1413  // Move the return value to the final block instead of the original exit
1414  // stub.
1415  Term->moveBefore(*ReturnBlock, ReturnBlock->end());
1416  // Put the switch statement in the old end basic block for the function
1417  // with a fall through to the new return block.
1418  LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
1419  << OutputStoreBBs.size() << "\n");
1420  SwitchInst *SwitchI =
1421  SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
1422  ReturnBlock, OutputStoreBBs.size(), EndBB);
1423 
1424  unsigned Idx = 0;
1425  for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
1427  OutputStoreBB.find(OutputBlock.first);
1428 
1429  if (OSBBIt == OutputStoreBB.end())
1430  continue;
1431 
1432  BasicBlock *BB = OSBBIt->second;
1433  SwitchI->addCase(
1434  ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
1435  Term = BB->getTerminator();
1436  Term->setSuccessor(0, ReturnBlock);
1437  Idx++;
1438  }
1439  }
1440  return;
1441  }
1442 
1443  // If there needs to be stores, move them from the output blocks to their
1444  // corresponding ending block.
1445  if (OutputStoreBBs.size() == 1) {
1446  LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
1447  << *OG.OutlinedFunction << "\n");
1448  DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
1449  for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
1451  EndBBs.find(VBPair.first);
1452  assert(EndBBIt != EndBBs.end() && "Could not find end block");
1453  BasicBlock *EndBB = EndBBIt->second;
1454  BasicBlock *OutputBB = VBPair.second;
1455  Instruction *Term = OutputBB->getTerminator();
1456  Term->eraseFromParent();
1457  Term = EndBB->getTerminator();
1458  moveBBContents(*OutputBB, *EndBB);
1459  Term->moveBefore(*EndBB, EndBB->end());
1460  OutputBB->eraseFromParent();
1461  }
1462  }
1463 }
1464 
1465 /// Fill the new function that will serve as the replacement function for all of
1466 /// the extracted regions of a certain structure from the first region in the
1467 /// list of regions. Replace this first region's extracted function with the
1468 /// new overall function.
1469 ///
1470 /// \param [in] M - The module we are outlining from.
1471 /// \param [in] CurrentGroup - The group of regions to be outlined.
1472 /// \param [in,out] OutputStoreBBs - The output blocks for each different
1473 /// set of stores needed for the different functions.
1474 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
1475 /// once outlining is complete.
1477  Module &M, OutlinableGroup &CurrentGroup,
1478  std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
1479  std::vector<Function *> &FuncsToRemove) {
1480  OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
1481 
1482  // Move first extracted function's instructions into new function.
1483  LLVM_DEBUG(dbgs() << "Move instructions from "
1484  << *CurrentOS->ExtractedFunction << " to instruction "
1485  << *CurrentGroup.OutlinedFunction << "\n");
1486  moveFunctionData(*CurrentOS->ExtractedFunction,
1487  *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
1488 
1489  // Transfer the attributes from the function to the new function.
1490  for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
1491  CurrentGroup.OutlinedFunction->addFnAttr(A);
1492 
1493  // Create a new set of output blocks for the first extracted function.
1495  createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
1496  CurrentGroup.OutlinedFunction, "output_block_0");
1497  CurrentOS->OutputBlockNum = 0;
1498 
1499  replaceArgumentUses(*CurrentOS, NewBBs, true);
1500  replaceConstants(*CurrentOS);
1501 
1502  // We first identify if any output blocks are empty, if they are we remove
1503  // them. We then create a branch instruction to the basic block to the return
1504  // block for the function for each non empty output block.
1505  if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
1506  OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
1507  for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
1509  CurrentGroup.EndBBs.find(VToBB.first);
1510  BasicBlock *EndBB = VBBIt->second;
1511  BranchInst::Create(EndBB, VToBB.second);
1512  OutputStoreBBs.back().insert(VToBB);
1513  }
1514  }
1515 
1516  // Replace the call to the extracted function with the outlined function.
1517  CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1518 
1519  // We only delete the extracted functions at the end since we may need to
1520  // reference instructions contained in them for mapping purposes.
1521  FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1522 }
1523 
1524 void IROutliner::deduplicateExtractedSections(
1525  Module &M, OutlinableGroup &CurrentGroup,
1526  std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
1527  createFunction(M, CurrentGroup, OutlinedFunctionNum);
1528 
1529  std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
1530 
1531  OutlinableRegion *CurrentOS;
1532 
1533  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
1534 
1535  std::vector<Value *> SortedKeys;
1536  for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
1537  CurrentOS = CurrentGroup.Regions[Idx];
1539  *CurrentOS->ExtractedFunction);
1540 
1541  // Create a set of BasicBlocks, one for each return block, to hold the
1542  // needed store instructions.
1545  CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
1546  "output_block_" + Twine(static_cast<unsigned>(Idx)));
1547 
1548  replaceArgumentUses(*CurrentOS, NewBBs);
1549  alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
1550  CurrentGroup.EndBBs, OutputMappings,
1551  OutputStoreBBs);
1552 
1553  CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
1554  FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
1555  }
1556 
1557  // Create a switch statement to handle the different output schemes.
1558  createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
1559 
1560  OutlinedFunctionNum++;
1561 }
1562 
1563 /// Checks that the next instruction in the InstructionDataList matches the
1564 /// next instruction in the module. If they do not, there could be the
1565 /// possibility that extra code has been inserted, and we must ignore it.
1566 ///
1567 /// \param ID - The IRInstructionData to check the next instruction of.
1568 /// \returns true if the InstructionDataList and actual instruction match.
1570  // We check if there is a discrepancy between the InstructionDataList
1571  // and the actual next instruction in the module. If there is, it means
1572  // that an extra instruction was added, likely by the CodeExtractor.
1573 
1574  // Since we do not have any similarity data about this particular
1575  // instruction, we cannot confidently outline it, and must discard this
1576  // candidate.
1577  IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
1578  Instruction *NextIDLInst = NextIDIt->Inst;
1579  Instruction *NextModuleInst = nullptr;
1580  if (!ID.Inst->isTerminator())
1581  NextModuleInst = ID.Inst->getNextNonDebugInstruction();
1582  else if (NextIDLInst != nullptr)
1583  NextModuleInst =
1584  &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
1585 
1586  if (NextIDLInst && NextIDLInst != NextModuleInst)
1587  return false;
1588 
1589  return true;
1590 }
1591 
1592 bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
1593  const OutlinableRegion &Region) {
1594  IRSimilarityCandidate *IRSC = Region.Candidate;
1595  unsigned StartIdx = IRSC->getStartIdx();
1596  unsigned EndIdx = IRSC->getEndIdx();
1597 
1598  // A check to make sure that we are not about to attempt to outline something
1599  // that has already been outlined.
1600  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1601  if (Outlined.contains(Idx))
1602  return false;
1603 
1604  // We check if the recorded instruction matches the actual next instruction,
1605  // if it does not, we fix it in the InstructionDataList.
1606  if (!Region.Candidate->backInstruction()->isTerminator()) {
1607  Instruction *NewEndInst =
1608  Region.Candidate->backInstruction()->getNextNonDebugInstruction();
1609  assert(NewEndInst && "Next instruction is a nullptr?");
1610  if (Region.Candidate->end()->Inst != NewEndInst) {
1611  IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1612  IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
1613  IRInstructionData(*NewEndInst,
1614  InstructionClassifier.visit(*NewEndInst), *IDL);
1615 
1616  // Insert the first IRInstructionData of the new region after the
1617  // last IRInstructionData of the IRSimilarityCandidate.
1618  IDL->insert(Region.Candidate->end(), *NewEndIRID);
1619  }
1620  }
1621 
1622  return none_of(*IRSC, [this](IRInstructionData &ID) {
1624  return true;
1625 
1626  return !this->InstructionClassifier.visit(ID.Inst);
1627  });
1628 }
1629 
1630 void IROutliner::pruneIncompatibleRegions(
1631  std::vector<IRSimilarityCandidate> &CandidateVec,
1632  OutlinableGroup &CurrentGroup) {
1633  bool PreviouslyOutlined;
1634 
1635  // Sort from beginning to end, so the IRSimilarityCandidates are in order.
1636  stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
1637  const IRSimilarityCandidate &RHS) {
1638  return LHS.getStartIdx() < RHS.getStartIdx();
1639  });
1640 
1641  IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
1642  // Since outlining a call and a branch instruction will be the same as only
1643  // outlinining a call instruction, we ignore it as a space saving.
1644  if (FirstCandidate.getLength() == 2) {
1645  if (isa<CallInst>(FirstCandidate.front()->Inst) &&
1646  isa<BranchInst>(FirstCandidate.back()->Inst))
1647  return;
1648  }
1649 
1650  unsigned CurrentEndIdx = 0;
1651  for (IRSimilarityCandidate &IRSC : CandidateVec) {
1652  PreviouslyOutlined = false;
1653  unsigned StartIdx = IRSC.getStartIdx();
1654  unsigned EndIdx = IRSC.getEndIdx();
1655 
1656  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
1657  if (Outlined.contains(Idx)) {
1658  PreviouslyOutlined = true;
1659  break;
1660  }
1661 
1662  if (PreviouslyOutlined)
1663  continue;
1664 
1665  // Check over the instructions, and if the basic block has its address
1666  // taken for use somewhere else, we do not outline that block.
1667  bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
1668  return ID.Inst->getParent()->hasAddressTaken();
1669  });
1670 
1671  if (BBHasAddressTaken)
1672  continue;
1673 
1674  if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
1675  !OutlineFromLinkODRs)
1676  continue;
1677 
1678  // Greedily prune out any regions that will overlap with already chosen
1679  // regions.
1680  if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
1681  continue;
1682 
1683  bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
1685  return true;
1686 
1687  return !this->InstructionClassifier.visit(ID.Inst);
1688  });
1689 
1690  if (BadInst)
1691  continue;
1692 
1693  OutlinableRegion *OS = new (RegionAllocator.Allocate())
1694  OutlinableRegion(IRSC, CurrentGroup);
1695  CurrentGroup.Regions.push_back(OS);
1696 
1697  CurrentEndIdx = EndIdx;
1698  }
1699 }
1700 
1702 IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
1703  InstructionCost RegionBenefit = 0;
1704  for (OutlinableRegion *Region : CurrentGroup.Regions) {
1705  TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1706  // We add the number of instructions in the region to the benefit as an
1707  // estimate as to how much will be removed.
1708  RegionBenefit += Region->getBenefit(TTI);
1709  LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
1710  << " saved instructions to overfall benefit.\n");
1711  }
1712 
1713  return RegionBenefit;
1714 }
1715 
1717 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
1718  InstructionCost OverallCost = 0;
1719  for (OutlinableRegion *Region : CurrentGroup.Regions) {
1720  TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
1721 
1722  // Each output incurs a load after the call, so we add that to the cost.
1723  for (unsigned OutputGVN : Region->GVNStores) {
1724  Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
1725  assert(OV.hasValue() && "Could not find value for GVN?");
1726  Value *V = OV.getValue();
1727  InstructionCost LoadCost =
1730 
1731  LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
1732  << " instructions to cost for output of type "
1733  << *V->getType() << "\n");
1734  OverallCost += LoadCost;
1735  }
1736  }
1737 
1738  return OverallCost;
1739 }
1740 
1741 /// Find the extra instructions needed to handle any output values for the
1742 /// region.
1743 ///
1744 /// \param [in] M - The Module to outline from.
1745 /// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
1746 /// \param [in] TTI - The TargetTransformInfo used to collect information for
1747 /// new instruction costs.
1748 /// \returns the additional cost to handle the outputs.
1750  OutlinableGroup &CurrentGroup,
1752  InstructionCost OutputCost = 0;
1753  unsigned NumOutputBranches = 0;
1754 
1755  IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
1756  DenseSet<BasicBlock *> CandidateBlocks;
1757  Candidate.getBasicBlocks(CandidateBlocks);
1758 
1759  // Count the number of different output branches that point to blocks outside
1760  // of the region.
1761  DenseSet<BasicBlock *> FoundBlocks;
1762  for (IRInstructionData &ID : Candidate) {
1763  if (!isa<BranchInst>(ID.Inst))
1764  continue;
1765 
1766  for (Value *V : ID.OperVals) {
1767  BasicBlock *BB = static_cast<BasicBlock *>(V);
1768  DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
1769  if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
1770  continue;
1771  FoundBlocks.insert(BB);
1772  NumOutputBranches++;
1773  }
1774  }
1775 
1776  CurrentGroup.BranchesToOutside = NumOutputBranches;
1777 
1778  for (const ArrayRef<unsigned> &OutputUse :
1779  CurrentGroup.OutputGVNCombinations) {
1780  for (unsigned GVN : OutputUse) {
1781  Optional<Value *> OV = Candidate.fromGVN(GVN);
1782  assert(OV.hasValue() && "Could not find value for GVN?");
1783  Value *V = OV.getValue();
1784  InstructionCost StoreCost =
1787 
1788  // An instruction cost is added for each store set that needs to occur for
1789  // various output combinations inside the function, plus a branch to
1790  // return to the exit block.
1791  LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
1792  << " instructions to cost for output of type "
1793  << *V->getType() << "\n");
1794  OutputCost += StoreCost * NumOutputBranches;
1795  }
1796 
1797  InstructionCost BranchCost =
1799  LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
1800  << " a branch instruction\n");
1801  OutputCost += BranchCost * NumOutputBranches;
1802  }
1803 
1804  // If there is more than one output scheme, we must have a comparison and
1805  // branch for each different item in the switch statement.
1806  if (CurrentGroup.OutputGVNCombinations.size() > 1) {
1807  InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
1808  Instruction::ICmp, Type::getInt32Ty(M.getContext()),
1811  InstructionCost BranchCost =
1813 
1814  unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
1815  InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
1816 
1817  LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
1818  << " instructions for each switch case for each different"
1819  << " output path in a function\n");
1820  OutputCost += TotalCost * NumOutputBranches;
1821  }
1822 
1823  return OutputCost;
1824 }
1825 
1826 void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
1827  InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
1828  CurrentGroup.Benefit += RegionBenefit;
1829  LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
1830 
1831  InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
1832  CurrentGroup.Cost += OutputReloadCost;
1833  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1834 
1835  InstructionCost AverageRegionBenefit =
1836  RegionBenefit / CurrentGroup.Regions.size();
1837  unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
1838  unsigned NumRegions = CurrentGroup.Regions.size();
1840  getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
1841 
1842  // We add one region to the cost once, to account for the instructions added
1843  // inside of the newly created function.
1844  LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
1845  << " instructions to cost for body of new function.\n");
1846  CurrentGroup.Cost += AverageRegionBenefit;
1847  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1848 
1849  // For each argument, we must add an instruction for loading the argument
1850  // out of the register and into a value inside of the newly outlined function.
1851  LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1852  << " instructions to cost for each argument in the new"
1853  << " function.\n");
1854  CurrentGroup.Cost +=
1855  OverallArgumentNum * TargetTransformInfo::TCC_Basic;
1856  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1857 
1858  // Each argument needs to either be loaded into a register or onto the stack.
1859  // Some arguments will only be loaded into the stack once the argument
1860  // registers are filled.
1861  LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
1862  << " instructions to cost for each argument in the new"
1863  << " function " << NumRegions << " times for the "
1864  << "needed argument handling at the call site.\n");
1865  CurrentGroup.Cost +=
1866  2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
1867  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1868 
1869  CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
1870  LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
1871 }
1872 
1873 void IROutliner::updateOutputMapping(OutlinableRegion &Region,
1874  ArrayRef<Value *> Outputs,
1875  LoadInst *LI) {
1876  // For and load instructions following the call
1877  Value *Operand = LI->getPointerOperand();
1878  Optional<unsigned> OutputIdx = None;
1879  // Find if the operand it is an output register.
1880  for (unsigned ArgIdx = Region.NumExtractedInputs;
1881  ArgIdx < Region.Call->arg_size(); ArgIdx++) {
1882  if (Operand == Region.Call->getArgOperand(ArgIdx)) {
1883  OutputIdx = ArgIdx - Region.NumExtractedInputs;
1884  break;
1885  }
1886  }
1887 
1888  // If we found an output register, place a mapping of the new value
1889  // to the original in the mapping.
1890  if (!OutputIdx.hasValue())
1891  return;
1892 
1893  if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
1894  OutputMappings.end()) {
1895  LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
1896  << *Outputs[OutputIdx.getValue()] << "\n");
1897  OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
1898  } else {
1899  Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
1900  LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
1901  << *Outputs[OutputIdx.getValue()] << "\n");
1902  OutputMappings.insert(std::make_pair(LI, Orig));
1903  }
1904 }
1905 
1906 bool IROutliner::extractSection(OutlinableRegion &Region) {
1907  SetVector<Value *> ArgInputs, Outputs, SinkCands;
1908  assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
1909  BasicBlock *InitialStart = Region.StartBB;
1910  Function *OrigF = Region.StartBB->getParent();
1911  CodeExtractorAnalysisCache CEAC(*OrigF);
1912  Region.ExtractedFunction =
1913  Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
1914 
1915  // If the extraction was successful, find the BasicBlock, and reassign the
1916  // OutlinableRegion blocks
1917  if (!Region.ExtractedFunction) {
1918  LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
1919  << "\n");
1920  Region.reattachCandidate();
1921  return false;
1922  }
1923 
1924  // Get the block containing the called branch, and reassign the blocks as
1925  // necessary. If the original block still exists, it is because we ended on
1926  // a branch instruction, and so we move the contents into the block before
1927  // and assign the previous block correctly.
1928  User *InstAsUser = Region.ExtractedFunction->user_back();
1929  BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
1930  Region.PrevBB = RewrittenBB->getSinglePredecessor();
1931  assert(Region.PrevBB && "PrevBB is nullptr?");
1932  if (Region.PrevBB == InitialStart) {
1933  BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
1934  Instruction *BI = NewPrev->getTerminator();
1935  BI->eraseFromParent();
1936  moveBBContents(*InitialStart, *NewPrev);
1937  Region.PrevBB = NewPrev;
1938  InitialStart->eraseFromParent();
1939  }
1940 
1941  Region.StartBB = RewrittenBB;
1942  Region.EndBB = RewrittenBB;
1943 
1944  // The sequences of outlinable regions has now changed. We must fix the
1945  // IRInstructionDataList for consistency. Although they may not be illegal
1946  // instructions, they should not be compared with anything else as they
1947  // should not be outlined in this round. So marking these as illegal is
1948  // allowed.
1949  IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
1950  Instruction *BeginRewritten = &*RewrittenBB->begin();
1951  Instruction *EndRewritten = &*RewrittenBB->begin();
1952  Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
1953  *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
1954  Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
1955  *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
1956 
1957  // Insert the first IRInstructionData of the new region in front of the
1958  // first IRInstructionData of the IRSimilarityCandidate.
1959  IDL->insert(Region.Candidate->begin(), *Region.NewFront);
1960  // Insert the first IRInstructionData of the new region after the
1961  // last IRInstructionData of the IRSimilarityCandidate.
1962  IDL->insert(Region.Candidate->end(), *Region.NewBack);
1963  // Remove the IRInstructionData from the IRSimilarityCandidate.
1964  IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
1965 
1966  assert(RewrittenBB != nullptr &&
1967  "Could not find a predecessor after extraction!");
1968 
1969  // Iterate over the new set of instructions to find the new call
1970  // instruction.
1971  for (Instruction &I : *RewrittenBB)
1972  if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1973  if (Region.ExtractedFunction == CI->getCalledFunction())
1974  Region.Call = CI;
1975  } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
1976  updateOutputMapping(Region, Outputs.getArrayRef(), LI);
1977  Region.reattachCandidate();
1978  return true;
1979 }
1980 
1981 unsigned IROutliner::doOutline(Module &M) {
1982  // Find the possible similarity sections.
1983  InstructionClassifier.EnableBranches = !DisableBranches;
1984  IRSimilarityIdentifier &Identifier = getIRSI(M);
1985  SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
1986 
1987  // Sort them by size of extracted sections
1988  unsigned OutlinedFunctionNum = 0;
1989  // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
1990  // to sort them by the potential number of instructions to be outlined
1991  if (SimilarityCandidates.size() > 1)
1992  llvm::stable_sort(SimilarityCandidates,
1993  [](const std::vector<IRSimilarityCandidate> &LHS,
1994  const std::vector<IRSimilarityCandidate> &RHS) {
1995  return LHS[0].getLength() * LHS.size() >
1996  RHS[0].getLength() * RHS.size();
1997  });
1998  // Creating OutlinableGroups for each SimilarityCandidate to be used in
1999  // each of the following for loops to avoid making an allocator.
2000  std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2001 
2002  DenseSet<unsigned> NotSame;
2003  std::vector<OutlinableGroup *> NegativeCostGroups;
2004  std::vector<OutlinableRegion *> OutlinedRegions;
2005  // Iterate over the possible sets of similarity.
2006  unsigned PotentialGroupIdx = 0;
2007  for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2008  OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2009 
2010  // Remove entries that were previously outlined
2011  pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2012 
2013  // We pruned the number of regions to 0 to 1, meaning that it's not worth
2014  // trying to outlined since there is no compatible similar instance of this
2015  // code.
2016  if (CurrentGroup.Regions.size() < 2)
2017  continue;
2018 
2019  // Determine if there are any values that are the same constant throughout
2020  // each section in the set.
2021  NotSame.clear();
2022  CurrentGroup.findSameConstants(NotSame);
2023 
2024  if (CurrentGroup.IgnoreGroup)
2025  continue;
2026 
2027  // Create a CodeExtractor for each outlinable region. Identify inputs and
2028  // outputs for each section using the code extractor and create the argument
2029  // types for the Aggregate Outlining Function.
2030  OutlinedRegions.clear();
2031  for (OutlinableRegion *OS : CurrentGroup.Regions) {
2032  // Break the outlinable region out of its parent BasicBlock into its own
2033  // BasicBlocks (see function implementation).
2034  OS->splitCandidate();
2035 
2036  // There's a chance that when the region is split, extra instructions are
2037  // added to the region. This makes the region no longer viable
2038  // to be split, so we ignore it for outlining.
2039  if (!OS->CandidateSplit)
2040  continue;
2041 
2043  DenseSet<BasicBlock *> BBSet;
2044  OS->Candidate->getBasicBlocks(BBSet, BE);
2045  OS->CE = new (ExtractorAllocator.Allocate())
2046  CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2047  false, "outlined");
2048  findAddInputsOutputs(M, *OS, NotSame);
2049  if (!OS->IgnoreRegion)
2050  OutlinedRegions.push_back(OS);
2051 
2052  // We recombine the blocks together now that we have gathered all the
2053  // needed information.
2054  OS->reattachCandidate();
2055  }
2056 
2057  CurrentGroup.Regions = std::move(OutlinedRegions);
2058 
2059  if (CurrentGroup.Regions.empty())
2060  continue;
2061 
2062  CurrentGroup.collectGVNStoreSets(M);
2063 
2064  if (CostModel)
2065  findCostBenefit(M, CurrentGroup);
2066 
2067  // If we are adhering to the cost model, skip those groups where the cost
2068  // outweighs the benefits.
2069  if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2071  getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2072  ORE.emit([&]() {
2073  IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2074  OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2075  C->frontInstruction());
2076  R << "did not outline "
2077  << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2078  << " regions due to estimated increase of "
2079  << ore::NV("InstructionIncrease",
2080  CurrentGroup.Cost - CurrentGroup.Benefit)
2081  << " instructions at locations ";
2082  interleave(
2083  CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2084  [&R](OutlinableRegion *Region) {
2085  R << ore::NV(
2086  "DebugLoc",
2087  Region->Candidate->frontInstruction()->getDebugLoc());
2088  },
2089  [&R]() { R << " "; });
2090  return R;
2091  });
2092  continue;
2093  }
2094 
2095  NegativeCostGroups.push_back(&CurrentGroup);
2096  }
2097 
2098  ExtractorAllocator.DestroyAll();
2099 
2100  if (NegativeCostGroups.size() > 1)
2101  stable_sort(NegativeCostGroups,
2102  [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2103  return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2104  });
2105 
2106  std::vector<Function *> FuncsToRemove;
2107  for (OutlinableGroup *CG : NegativeCostGroups) {
2108  OutlinableGroup &CurrentGroup = *CG;
2109 
2110  OutlinedRegions.clear();
2111  for (OutlinableRegion *Region : CurrentGroup.Regions) {
2112  // We check whether our region is compatible with what has already been
2113  // outlined, and whether we need to ignore this item.
2114  if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2115  continue;
2116  OutlinedRegions.push_back(Region);
2117  }
2118 
2119  if (OutlinedRegions.size() < 2)
2120  continue;
2121 
2122  // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2123  // we are still outlining enough regions to make up for the added cost.
2124  CurrentGroup.Regions = std::move(OutlinedRegions);
2125  if (CostModel) {
2126  CurrentGroup.Benefit = 0;
2127  CurrentGroup.Cost = 0;
2128  findCostBenefit(M, CurrentGroup);
2129  if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2130  continue;
2131  }
2132  OutlinedRegions.clear();
2133  for (OutlinableRegion *Region : CurrentGroup.Regions) {
2134  Region->splitCandidate();
2135  if (!Region->CandidateSplit)
2136  continue;
2137  OutlinedRegions.push_back(Region);
2138  }
2139 
2140  CurrentGroup.Regions = std::move(OutlinedRegions);
2141  if (CurrentGroup.Regions.size() < 2) {
2142  for (OutlinableRegion *R : CurrentGroup.Regions)
2143  R->reattachCandidate();
2144  continue;
2145  }
2146 
2147  LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2148  << " and benefit " << CurrentGroup.Benefit << "\n");
2149 
2150  // Create functions out of all the sections, and mark them as outlined.
2151  OutlinedRegions.clear();
2152  for (OutlinableRegion *OS : CurrentGroup.Regions) {
2154  DenseSet<BasicBlock *> BBSet;
2155  OS->Candidate->getBasicBlocks(BBSet, BE);
2156  OS->CE = new (ExtractorAllocator.Allocate())
2157  CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2158  false, "outlined");
2159  bool FunctionOutlined = extractSection(*OS);
2160  if (FunctionOutlined) {
2161  unsigned StartIdx = OS->Candidate->getStartIdx();
2162  unsigned EndIdx = OS->Candidate->getEndIdx();
2163  for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2164  Outlined.insert(Idx);
2165 
2166  OutlinedRegions.push_back(OS);
2167  }
2168  }
2169 
2170  LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2171  << " with benefit " << CurrentGroup.Benefit
2172  << " and cost " << CurrentGroup.Cost << "\n");
2173 
2174  CurrentGroup.Regions = std::move(OutlinedRegions);
2175 
2176  if (CurrentGroup.Regions.empty())
2177  continue;
2178 
2180  getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2181  ORE.emit([&]() {
2182  IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2183  OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2184  R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2185  << " regions with decrease of "
2186  << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2187  << " instructions at locations ";
2188  interleave(
2189  CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2190  [&R](OutlinableRegion *Region) {
2191  R << ore::NV("DebugLoc",
2192  Region->Candidate->frontInstruction()->getDebugLoc());
2193  },
2194  [&R]() { R << " "; });
2195  return R;
2196  });
2197 
2198  deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2199  OutlinedFunctionNum);
2200  }
2201 
2202  for (Function *F : FuncsToRemove)
2203  F->eraseFromParent();
2204 
2205  return OutlinedFunctionNum;
2206 }
2207 
2209  CostModel = !NoCostModel;
2210  OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2211 
2212  return doOutline(M) > 0;
2213 }
2214 
2215 // Pass Manager Boilerplate
2217 public:
2218  static char ID;
2221  }
2222 
2223  void getAnalysisUsage(AnalysisUsage &AU) const override {
2227  }
2228 
2229  bool runOnModule(Module &M) override;
2230 };
2231 
2233  if (skipModule(M))
2234  return false;
2235 
2236  std::unique_ptr<OptimizationRemarkEmitter> ORE;
2237  auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2238  ORE.reset(new OptimizationRemarkEmitter(&F));
2239  return *ORE.get();
2240  };
2241 
2242  auto GTTI = [this](Function &F) -> TargetTransformInfo & {
2243  return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2244  };
2245 
2246  auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
2247  return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
2248  };
2249 
2250  return IROutliner(GTTI, GIRSI, GORE).run(M);
2251 }
2252 
2254  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2255 
2257  [&FAM](Function &F) -> TargetTransformInfo & {
2258  return FAM.getResult<TargetIRAnalysis>(F);
2259  };
2260 
2262  [&AM](Module &M) -> IRSimilarityIdentifier & {
2263  return AM.getResult<IRSimilarityAnalysis>(M);
2264  };
2265 
2266  std::unique_ptr<OptimizationRemarkEmitter> ORE;
2268  [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2269  ORE.reset(new OptimizationRemarkEmitter(&F));
2270  return *ORE.get();
2271  };
2272 
2273  if (IROutliner(GTTI, GIRSI, GORE).run(M))
2274  return PreservedAnalyses::none();
2275  return PreservedAnalyses::all();
2276 }
2277 
2278 char IROutlinerLegacyPass::ID = 0;
2279 INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
2280  false)
2285  false)
2286 
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::Instruction::isTerminator
bool isTerminator() const
Definition: Instruction.h:163
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
alignOutputBlockWithAggFunc
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
Definition: IROutliner.cpp:1314
IROutliner.h
llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:298
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm::OptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: DiagnosticInfo.h:730
getSortedConstantKeys
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:151
llvm::AttributeFuncs::mergeAttributesForOutlining
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
Definition: Attributes.cpp:2034
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
moveBBContents
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:137
OutlinableGroup::ArgumentTypes
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:69
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1565
llvm::ReturnInst
Return a value (possibly void), from a function.
Definition: Instructions.h:2986
llvm::IRSimilarity::IRInstructionDataList
Definition: IRSimilarityIdentifier.h:241
llvm::RegionBase::end
iterator end()
Definition: RegionInfo.h:561
llvm::Function::end
iterator end()
Definition: Function.h:736
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::DIBuilder
Definition: DIBuilder.h:41
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
DebugInfoMetadata.h
OutlinableGroup::collectGVNStoreSets
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:426
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:808
llvm::Instruction::getNextNonDebugInstruction
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
Definition: Instruction.cpp:728
OutlinableGroup::EndBBs
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:80
llvm::Function
Definition: Function.h:62
llvm::Attribute
Definition: Attributes.h:52
createAndInsertBasicBlocks
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
Definition: IROutliner.cpp:1370
OutlinableGroup::OutputGVNCombinations
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:89
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::BasicBlock::instructionsWithoutDebug
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:104
llvm::IRSimilarityAnalysis
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Definition: IRSimilarityIdentifier.h:996
llvm::ReturnInst::getReturnValue
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Definition: Instructions.h:3031
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::SwitchInst::Create
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3402
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::find
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:718
ToRemove
ReachingDefAnalysis InstSet & ToRemove
Definition: ARMLowOverheadLoops.cpp:546
llvm::BasicBlock::replaceSuccessorsPhiUsesWith
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:461
llvm::IRSimilarity::IRSimilarityCandidate::getBasicBlocks
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
Definition: IRSimilarityIdentifier.h:732
OutlinableGroup::findSameConstants
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:419
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
EnableLinkOnceODRIROutlining
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
findConstants
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the the constants that will need to be lifted into arguments as they are not the same in each in...
Definition: IROutliner.cpp:590
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
OutlinableGroup::OutlinedFunctionType
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:71
fillOverallFunction
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs, std::vector< Function * > &FuncsToRemove)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
Definition: IROutliner.cpp:1476
llvm::DILocation
Debug location.
Definition: DebugInfoMetadata.h:1580
IROutlinerLegacyPass::ID
static char ID
Definition: IROutliner.cpp:2218
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::Function::arg_size
size_t arg_size() const
Definition: Function.h:782
llvm::SetVector::getArrayRef
ArrayRef< T > getArrayRef() const
Definition: SetVector.h:63
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::BasicBlock::eraseFromParent
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:133
DisableBranches
cl::opt< bool > DisableBranches
llvm::AttributeList::getFnAttrs
AttributeSet getFnAttrs() const
The function attributes are returned.
Definition: Attributes.cpp:1362
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:385
llvm::IRSimilarity::SimilarityGroup
std::vector< IRSimilarityCandidate > SimilarityGroup
Definition: IRSimilarityIdentifier.h:857
llvm::Optional< unsigned >
llvm::TargetTransformInfo::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:799
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::BasicBlock::getSinglePredecessor
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:268
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::IRSimilarity::SimilarityGroupList
std::vector< SimilarityGroup > SimilarityGroupList
Definition: IRSimilarityIdentifier.h:858
llvm::successors
succ_range successors(Instruction *I)
Definition: CFG.h:262
llvm::Mangler
Definition: Mangler.h:27
OutlinableGroup::OutlinedFunction
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:73
llvm::detail::DenseSetImpl::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:144
llvm::IROutlinerPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: IROutliner.cpp:2253
llvm::DominatorTreeBase::getDescendants
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
Definition: GenericDomTree.h:374
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:241
IROutlinerLegacyPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: IROutliner.cpp:2223
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::BasicBlock::getUniqueSuccessor
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:306
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::Value::isSwiftError
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1014
llvm::Optional::hasValue
constexpr bool hasValue() const
Definition: Optional.h:288
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
CommandLine.h
llvm::interleave
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:1794
OutlinableGroup::Regions
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:65
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
findExtractedOutputToOverallOutputMapping
static void findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
Definition: IROutliner.cpp:846
analyzeAndPruneOutputBlocks
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
Definition: IROutliner.cpp:1267
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
DEBUG_TYPE
#define DEBUG_TYPE
Definition: IROutliner.cpp:32
llvm::IRSimilarityIdentifierWrapperPass
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
Definition: IRSimilarityIdentifier.h:976
replaceArgumentUses
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, bool FirstFunction=false)
Definition: IROutliner.cpp:1083
llvm::DominatorTreeBase::insertEdge
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
Definition: GenericDomTree.h:584
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
findDuplicateOutputBlock
Optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
Definition: IROutliner.cpp:1212
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
OutlinableGroup::NumAggregateInputs
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:97
findExtractedInputToOverallInputMapping
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:747
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1518
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
OutlinableGroup::InputTypesSet
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:93
llvm::OutlinableRegion::CandidateSplit
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:120
false
Definition: StackSlotColoring.cpp:142
llvm::Mangler::getNameWithPrefix
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:119
IROutlinerLegacyPass::IROutlinerLegacyPass
IROutlinerLegacyPass()
Definition: IROutliner.cpp:2219
llvm::detail::DenseSetImpl::size
size_type size() const
Definition: DenseSet.h:81
llvm::Instruction
Definition: Instruction.h:45
IROutlinerLegacyPass::runOnModule
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
Definition: IROutliner.cpp:2232
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
mapInputsToGVNs
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:620
IR
Statically lint checks LLVM IR
Definition: Lint.cpp:744
llvm::OutlinableRegion::reattachCandidate
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:240
llvm::OutlinableRegion::OutputBlockNum
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:79
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::OutlinableRegion::findCorrespondingValueIn
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:166
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:245
llvm::None
const NoneType None
Definition: None.h:23
remapExtractedInputs
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:646
llvm::M68kBeads::Term
@ Term
Definition: M68kBaseInfo.h:71
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:327
llvm::IRSimilarity::IRSimilarityCandidate::getEndIdx
unsigned getEndIdx() const
Definition: IRSimilarityIdentifier.h:772
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
OutlinableGroup::CanonicalNumberToAggArg
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:101
llvm::OutlinableRegion::getBenefit
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:326
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:297
llvm::cl::opt< bool >
llvm::OutlinableRegion::ExtractedFunction
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:116
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::GVN
The core GVN pass object.
Definition: GVN.h:118
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::simple_ilist< IRInstructionData, ilist_sentinel_tracking< true > >::iterator
ilist_iterator< OptionsT, false, false > iterator
Definition: simple_ilist.h:95
llvm::OutlinableRegion::Call
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:113
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::simple_ilist::erase
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:196
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1782
IPO.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3124
llvm::DenseMap< Value *, BasicBlock * >
OutlinableGroup::PHIBlocks
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:84
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::OutlinableRegion::CE
CodeExtractor * CE
Used to create an outlined function.
Definition: IROutliner.h:110
DIBuilder.h
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1335
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
IROutlinerLegacyPass
Definition: IROutliner.cpp:2216
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::Function::Create
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:139
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::initializeIROutlinerLegacyPassPass
void initializeIROutlinerLegacyPassPass(PassRegistry &)
SI
StandardInstrumentations SI(Debug, VerifyEach)
collectRegionsConstants
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:371
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::IRSimilarity::IRInstructionData
This provides the utilities for hashing an Instruction to an unsigned integer.
Definition: IRSimilarityIdentifier.h:113
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::detail::DenseSetImpl::clear
void clear()
Definition: DenseSet.h:92
Mangler.h
llvm::IRSimilarity::IRSimilarityCandidate::front
IRInstructionData * front() const
Definition: IRSimilarityIdentifier.h:775
OutlinableGroup::Cost
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:112
llvm::NVPTXISD::Dummy
@ Dummy
Definition: NVPTXISelLowering.h:60
llvm::IROutliner::run
bool run(Module &M)
Definition: IROutliner.cpp:2208
OutlinableGroup
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:63
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false, false) INITIALIZE_PASS_END(IROutlinerLegacyPass
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::IRSimilarity::IRInstructionData::Inst
Instruction * Inst
The source Instruction that is being wrapped.
Definition: IRSimilarityIdentifier.h:117
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::OutlinableRegion::splitCandidate
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:176
constantMatches
static Optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:303
llvm::createIROutlinerPass
ModulePass * createIROutlinerPass()
createIROutlinerPass - This pass finds similar code regions and factors those regions out into functi...
Definition: IROutliner.cpp:2287
replaceCalledFunction
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
Definition: IROutliner.cpp:969
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
OutlinableGroup::Benefit
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:109
llvm::detail::DenseSetImpl::contains
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
llvm::RegionBase::begin
iterator begin()
Definition: RegionInfo.h:560
nextIRInstructionDataMatchesNextInst
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
Definition: IROutliner.cpp:1569
findCostForOutputBlocks
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
Definition: IROutliner.cpp:1749
llvm::CodeExtractor
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
llvm::OutlinableRegion::IgnoreRegion
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition: IROutliner.h:123
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
OutlinableGroup::IgnoreGroup
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:77
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::Function::begin
iterator begin()
Definition: Function.h:734
NoCostModel
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
getSubprogramOrNull
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:441
Attributes.h
llvm::Function::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:564
replaceConstants
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
Definition: IROutliner.cpp:1179
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::GlobalValue::hasLinkOnceODRLinkage
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:442
llvm::OptimizationRemarkEmitterWrapperPass
OptimizationRemarkEmitter legacy analysis pass.
Definition: OptimizationRemarkEmitter.h:146
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1686
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::Function::getArg
Argument * getArg(unsigned i) const
Definition: Function.h:767
llvm::Function::back
const BasicBlock & back() const
Definition: Function.h:743
llvm::OutlinableRegion
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:64
IRSimilarityIdentifier.h
llvm::IRSimilarity::IRSimilarityCandidate::getStartIdx
unsigned getStartIdx() const
Definition: IRSimilarityIdentifier.h:769
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
PassManager.h
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::object::Identifier
@ Identifier
Definition: COFFModuleDefinition.cpp:36
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:177
llvm::IRSimilarity::IRSimilarityCandidate
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
Definition: IRSimilarityIdentifier.h:549
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::CodeExtractorAnalysisCache
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
getCodeExtractorArguments
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:676
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::BasicBlock::getInstList
const InstListType & getInstList() const
Return the underlying instruction list container.
Definition: BasicBlock.h:363
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
Dominators.h
OutlinableGroup::SwiftErrorArgument
Optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:116
llvm::BasicBlock::size
size_t size() const
Definition: BasicBlock.h:306
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
TargetTransformInfo.h
llvm::Function::addFnAttr
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:536
llvm::PHINode
Definition: Instructions.h:2633
llvm::DISubprogram
Subprogram description.
Definition: DebugInfoMetadata.h:1820
moveFunctionData
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:528
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:940
llvm::IROutliner
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:180
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::IRSimilarity::IRSimilarityIdentifier
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
Definition: IRSimilarityIdentifier.h:883
llvm::Value::replaceUsesWithIf
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::simple_ilist::insert
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:159
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SwitchInst::addCase
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Definition: Instructions.cpp:4184
llvm::cl::desc
Definition: CommandLine.h:412
llvm::Region
Definition: RegionInfo.h:889
llvm::SetVector< Value * >
CU
Definition: AArch64AsmBackend.cpp:501
OutlinableGroup::BranchesToOutside
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:105
InitializePasses.h
iroutliner
iroutliner
Definition: IROutliner.cpp:2284
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::DominatorTreeBase::deleteEdge
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Definition: GenericDomTree.h:602
llvm::OutlinableRegion::Candidate
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:66
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:282
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1184
llvm::DIFile
File.
Definition: DebugInfoMetadata.h:530
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:103
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
createSwitchStatement
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * >> &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
Definition: IROutliner.cpp:1396
llvm::Function::size
size_t size() const
Definition: Function.h:739
llvm::Function::iterator
BasicBlockListType::iterator iterator
Definition: Function.h:68
llvm::RegionBase::getParent
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
Outliner
IR Outliner
Definition: IROutliner.cpp:2284
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37