LLVM 17.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Pass.h"
28#include "llvm/Transforms/IPO.h"
29#include <optional>
30#include <vector>
31
32#define DEBUG_TYPE "iroutliner"
33
34using namespace llvm;
35using namespace IRSimilarity;
36
37// A command flag to be used for debugging to exclude branches from similarity
38// matching and outlining.
39namespace llvm {
41
42// A command flag to be used for debugging to indirect calls from similarity
43// matching and outlining.
45
46// A command flag to be used for debugging to exclude intrinsics from similarity
47// matching and outlining.
49
50} // namespace llvm
51
52// Set to true if the user wants the ir outliner to run on linkonceodr linkage
53// functions. This is false by default because the linker can dedupe linkonceodr
54// functions. Since the outliner is confined to a single module (modulo LTO),
55// this is off by default. It should, however, be the default behavior in
56// LTO.
58 "enable-linkonceodr-ir-outlining", cl::Hidden,
59 cl::desc("Enable the IR outliner on linkonceodr functions"),
60 cl::init(false));
61
62// This is a debug option to test small pieces of code to ensure that outlining
63// works correctly.
65 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
66 cl::desc("Debug option to outline greedily, without restriction that "
67 "calculated benefit outweighs cost"));
68
69/// The OutlinableGroup holds all the overarching information for outlining
70/// a set of regions that are structurally similar to one another, such as the
71/// types of the overall function, the output blocks, the sets of stores needed
72/// and a list of the different regions. This information is used in the
73/// deduplication of extracted regions with the same structure.
75 /// The sections that could be outlined
76 std::vector<OutlinableRegion *> Regions;
77
78 /// The argument types for the function created as the overall function to
79 /// replace the extracted function for each region.
80 std::vector<Type *> ArgumentTypes;
81 /// The FunctionType for the overall function.
83 /// The Function for the collective overall function.
85
86 /// Flag for whether we should not consider this group of OutlinableRegions
87 /// for extraction.
88 bool IgnoreGroup = false;
89
90 /// The return blocks for the overall function.
92
93 /// The PHIBlocks with their corresponding return block based on the return
94 /// value as the key.
96
97 /// A set containing the different GVN store sets needed. Each array contains
98 /// a sorted list of the different values that need to be stored into output
99 /// registers.
101
102 /// Flag for whether the \ref ArgumentTypes have been defined after the
103 /// extraction of the first region.
104 bool InputTypesSet = false;
105
106 /// The number of input values in \ref ArgumentTypes. Anything after this
107 /// index in ArgumentTypes is an output argument.
108 unsigned NumAggregateInputs = 0;
109
110 /// The mapping of the canonical numbering of the values in outlined sections
111 /// to specific arguments.
113
114 /// The number of branches in the region target a basic block that is outside
115 /// of the region.
116 unsigned BranchesToOutside = 0;
117
118 /// Tracker counting backwards from the highest unsigned value possible to
119 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
120 /// -2 and -1 are assigned by the DenseMap.
121 unsigned PHINodeGVNTracker = -3;
122
124 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
127
128 /// The number of instructions that will be outlined by extracting \ref
129 /// Regions.
131 /// The number of added instructions needed for the outlining of the \ref
132 /// Regions.
134
135 /// The argument that needs to be marked with the swifterr attribute. If not
136 /// needed, there is no value.
137 std::optional<unsigned> SwiftErrorArgument;
138
139 /// For the \ref Regions, we look at every Value. If it is a constant,
140 /// we check whether it is the same in Region.
141 ///
142 /// \param [in,out] NotSame contains the global value numbers where the
143 /// constant is not always the same, and must be passed in as an argument.
145
146 /// For the regions, look at each set of GVN stores needed and account for
147 /// each combination. Add an argument to the argument types if there is
148 /// more than one combination.
149 ///
150 /// \param [in] M - The module we are outlining from.
152};
153
154/// Move the contents of \p SourceBB to before the last instruction of \p
155/// TargetBB.
156/// \param SourceBB - the BasicBlock to pull Instructions from.
157/// \param TargetBB - the BasicBlock to put Instruction into.
158static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
159 for (Instruction &I : llvm::make_early_inc_range(SourceBB))
160 I.moveBefore(TargetBB, TargetBB.end());
161}
162
163/// A function to sort the keys of \p Map, which must be a mapping of constant
164/// values to basic blocks and return it in \p SortedKeys
165///
166/// \param SortedKeys - The vector the keys will be return in and sorted.
167/// \param Map - The DenseMap containing keys to sort.
168static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
170 for (auto &VtoBB : Map)
171 SortedKeys.push_back(VtoBB.first);
172
173 // Here we expect to have either 1 value that is void (nullptr) or multiple
174 // values that are all constant integers.
175 if (SortedKeys.size() == 1) {
176 assert(!SortedKeys[0] && "Expected a single void value.");
177 return;
178 }
179
180 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
181 assert(LHS && RHS && "Expected non void values.");
182 const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
183 const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
184 assert(RHSC && "Not a constant integer in return value?");
185 assert(LHSC && "Not a constant integer in return value?");
186
187 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
188 });
189}
190
192 Value *V) {
193 std::optional<unsigned> GVN = Candidate->getGVN(V);
194 assert(GVN && "No GVN for incoming value");
195 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
196 std::optional<unsigned> FirstGVN =
197 Other.Candidate->fromCanonicalNum(*CanonNum);
198 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
199 return FoundValueOpt.value_or(nullptr);
200}
201
204 BasicBlock *BB) {
205 Instruction *FirstNonPHI = BB->getFirstNonPHI();
206 assert(FirstNonPHI && "block is empty?");
207 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
208 if (!CorrespondingVal)
209 return nullptr;
210 BasicBlock *CorrespondingBlock =
211 cast<Instruction>(CorrespondingVal)->getParent();
212 return CorrespondingBlock;
213}
214
215/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
216/// in \p Included to branch to BasicBlock \p Replace if they currently branch
217/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
218/// when PHINodes are included in outlined regions.
219///
220/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
221/// checked.
222/// \param Find - The successor block to be replaced.
223/// \param Replace - The new succesor block to branch to.
224/// \param Included - The set of blocks about to be outlined.
226 BasicBlock *Replace,
227 DenseSet<BasicBlock *> &Included) {
228 for (PHINode &PN : PHIBlock->phis()) {
229 for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
230 ++Idx) {
231 // Check if the incoming block is included in the set of blocks being
232 // outlined.
233 BasicBlock *Incoming = PN.getIncomingBlock(Idx);
234 if (!Included.contains(Incoming))
235 continue;
236
237 BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
238 assert(BI && "Not a branch instruction?");
239 // Look over the branching instructions into this block to see if we
240 // used to branch to Find in this outlined block.
241 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
242 Succ++) {
243 // If we have found the block to replace, we do so here.
244 if (BI->getSuccessor(Succ) != Find)
245 continue;
246 BI->setSuccessor(Succ, Replace);
247 }
248 }
249 }
250}
251
252
254 assert(!CandidateSplit && "Candidate already split!");
255
257
258 Instruction *EndInst = nullptr;
259 // Check whether the last instruction is a terminator, if it is, we do
260 // not split on the following instruction. We leave the block as it is. We
261 // also check that this is not the last instruction in the Module, otherwise
262 // the check for whether the current following instruction matches the
263 // previously recorded instruction will be incorrect.
264 if (!BackInst->isTerminator() ||
265 BackInst->getParent() != &BackInst->getFunction()->back()) {
266 EndInst = Candidate->end()->Inst;
267 assert(EndInst && "Expected an end instruction?");
268 }
269
270 // We check if the current instruction following the last instruction in the
271 // region is the same as the recorded instruction following the last
272 // instruction. If they do not match, there could be problems in rewriting
273 // the program after outlining, so we ignore it.
274 if (!BackInst->isTerminator() &&
275 EndInst != BackInst->getNextNonDebugInstruction())
276 return;
277
278 Instruction *StartInst = (*Candidate->begin()).Inst;
279 assert(StartInst && "Expected a start instruction?");
280 StartBB = StartInst->getParent();
281 PrevBB = StartBB;
282
285
286 // We iterate over the instructions in the region, if we find a PHINode, we
287 // check if there are predecessors outside of the region, if there are,
288 // we ignore this region since we are unable to handle the severing of the
289 // phi node right now.
290
291 // TODO: Handle extraneous inputs for PHINodes through variable number of
292 // inputs, similar to how outputs are handled.
293 BasicBlock::iterator It = StartInst->getIterator();
294 EndBB = BackInst->getParent();
295 BasicBlock *IBlock;
296 BasicBlock *PHIPredBlock = nullptr;
297 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
298 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
299 unsigned NumPredsOutsideRegion = 0;
300 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
301 if (!BBSet.contains(PN->getIncomingBlock(i))) {
302 PHIPredBlock = PN->getIncomingBlock(i);
303 ++NumPredsOutsideRegion;
304 continue;
305 }
306
307 // We must consider the case there the incoming block to the PHINode is
308 // the same as the final block of the OutlinableRegion. If this is the
309 // case, the branch from this block must also be outlined to be valid.
310 IBlock = PN->getIncomingBlock(i);
311 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
312 PHIPredBlock = PN->getIncomingBlock(i);
313 ++NumPredsOutsideRegion;
314 }
315 }
316
317 if (NumPredsOutsideRegion > 1)
318 return;
319
320 It++;
321 }
322
323 // If the region starts with a PHINode, but is not the initial instruction of
324 // the BasicBlock, we ignore this region for now.
325 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
326 return;
327
328 // If the region ends with a PHINode, but does not contain all of the phi node
329 // instructions of the region, we ignore it for now.
330 if (isa<PHINode>(BackInst) &&
331 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
332 return;
333
334 // The basic block gets split like so:
335 // block: block:
336 // inst1 inst1
337 // inst2 inst2
338 // region1 br block_to_outline
339 // region2 block_to_outline:
340 // region3 -> region1
341 // region4 region2
342 // inst3 region3
343 // inst4 region4
344 // br block_after_outline
345 // block_after_outline:
346 // inst3
347 // inst4
348
349 std::string OriginalName = PrevBB->getName().str();
350
351 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
353 // If there was a PHINode with an incoming block outside the region,
354 // make sure is correctly updated in the newly split block.
355 if (PHIPredBlock)
357
358 CandidateSplit = true;
359 if (!BackInst->isTerminator()) {
360 EndBB = EndInst->getParent();
361 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
364 } else {
365 EndBB = BackInst->getParent();
366 EndsInBranch = true;
367 FollowBB = nullptr;
368 }
369
370 // Refind the basic block set.
371 BBSet.clear();
373 // For the phi nodes in the new starting basic block of the region, we
374 // reassign the targets of the basic blocks branching instructions.
376 if (FollowBB)
378}
379
381 assert(CandidateSplit && "Candidate is not split!");
382
383 // The basic block gets reattached like so:
384 // block: block:
385 // inst1 inst1
386 // inst2 inst2
387 // br block_to_outline region1
388 // block_to_outline: -> region2
389 // region1 region3
390 // region2 region4
391 // region3 inst3
392 // region4 inst4
393 // br block_after_outline
394 // block_after_outline:
395 // inst3
396 // inst4
397 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
398
399 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
400 // Make sure PHINode references to the block we are merging into are
401 // updated to be incoming blocks from the predecessor to the current block.
402
403 // NOTE: If this is updated such that the outlined block can have more than
404 // one incoming block to a PHINode, this logic will have to updated
405 // to handle multiple precessors instead.
406
407 // We only need to update this if the outlined section contains a PHINode, if
408 // it does not, then the incoming block was never changed in the first place.
409 // On the other hand, if PrevBB has no predecessors, it means that all
410 // incoming blocks to the first block are contained in the region, and there
411 // will be nothing to update.
412 Instruction *StartInst = (*Candidate->begin()).Inst;
413 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
415 "PrevBB has more than one predecessor. Should be 0 or 1.");
416 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
418 }
420
421 // If we reattaching after outlining, we iterate over the phi nodes to
422 // the initial block, and reassign the branch instructions of the incoming
423 // blocks to the block we are remerging into.
424 if (!ExtractedFunction) {
427
429 if (!EndsInBranch)
431 }
432
434
435 BasicBlock *PlacementBB = PrevBB;
436 if (StartBB != EndBB)
437 PlacementBB = EndBB;
438 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
439 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
440 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
441 PlacementBB->getTerminator()->eraseFromParent();
442 moveBBContents(*FollowBB, *PlacementBB);
443 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
445 }
446
449
450 // Make sure to save changes back to the StartBB.
451 StartBB = PrevBB;
452 EndBB = nullptr;
453 PrevBB = nullptr;
454 FollowBB = nullptr;
455
456 CandidateSplit = false;
457}
458
459/// Find whether \p V matches the Constants previously found for the \p GVN.
460///
461/// \param V - The value to check for consistency.
462/// \param GVN - The global value number assigned to \p V.
463/// \param GVNToConstant - The mapping of global value number to Constants.
464/// \returns true if the Value matches the Constant mapped to by V and false if
465/// it \p V is a Constant but does not match.
466/// \returns std::nullopt if \p V is not a Constant.
467static std::optional<bool>
468constantMatches(Value *V, unsigned GVN,
469 DenseMap<unsigned, Constant *> &GVNToConstant) {
470 // See if we have a constants
471 Constant *CST = dyn_cast<Constant>(V);
472 if (!CST)
473 return std::nullopt;
474
475 // Holds a mapping from a global value number to a Constant.
477 bool Inserted;
478
479
480 // If we have a constant, try to make a new entry in the GVNToConstant.
481 std::tie(GVNToConstantIt, Inserted) =
482 GVNToConstant.insert(std::make_pair(GVN, CST));
483 // If it was found and is not equal, it is not the same. We do not
484 // handle this case yet, and exit early.
485 if (Inserted || (GVNToConstantIt->second == CST))
486 return true;
487
488 return false;
489}
490
492 InstructionCost Benefit = 0;
493
494 // Estimate the benefit of outlining a specific sections of the program. We
495 // delegate mostly this task to the TargetTransformInfo so that if the target
496 // has specific changes, we can have a more accurate estimate.
497
498 // However, getInstructionCost delegates the code size calculation for
499 // arithmetic instructions to getArithmeticInstrCost in
500 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
501 // code size for a division and remainder instruction to be equal to 4, and
502 // everything else to 1. This is not an accurate representation of the
503 // division instruction for targets that have a native division instruction.
504 // To be overly conservative, we only add 1 to the number of instructions for
505 // each division instruction.
506 for (IRInstructionData &ID : *Candidate) {
507 Instruction *I = ID.Inst;
508 switch (I->getOpcode()) {
509 case Instruction::FDiv:
510 case Instruction::FRem:
511 case Instruction::SDiv:
512 case Instruction::SRem:
513 case Instruction::UDiv:
514 case Instruction::URem:
515 Benefit += 1;
516 break;
517 default:
519 break;
520 }
521 }
522
523 return Benefit;
524}
525
526/// Check the \p OutputMappings structure for value \p Input, if it exists
527/// it has been used as an output for outlining, and has been renamed, and we
528/// return the new value, otherwise, we return the same value.
529///
530/// \param OutputMappings [in] - The mapping of values to their renamed value
531/// after being used as an output for an outlined region.
532/// \param Input [in] - The value to find the remapped value of, if it exists.
533/// \return The remapped value if it has been renamed, and the same value if has
534/// not.
536 Value *Input) {
538 OutputMappings.find(Input);
539 if (OutputMapping != OutputMappings.end())
540 return OutputMapping->second;
541 return Input;
542}
543
544/// Find whether \p Region matches the global value numbering to Constant
545/// mapping found so far.
546///
547/// \param Region - The OutlinableRegion we are checking for constants
548/// \param GVNToConstant - The mapping of global value number to Constants.
549/// \param NotSame - The set of global value numbers that do not have the same
550/// constant in each region.
551/// \returns true if all Constants are the same in every use of a Constant in \p
552/// Region and false if not
553static bool
555 DenseMap<unsigned, Constant *> &GVNToConstant,
556 DenseSet<unsigned> &NotSame) {
557 bool ConstantsTheSame = true;
558
559 IRSimilarityCandidate &C = *Region.Candidate;
560 for (IRInstructionData &ID : C) {
561
562 // Iterate over the operands in an instruction. If the global value number,
563 // assigned by the IRSimilarityCandidate, has been seen before, we check if
564 // the the number has been found to be not the same value in each instance.
565 for (Value *V : ID.OperVals) {
566 std::optional<unsigned> GVNOpt = C.getGVN(V);
567 assert(GVNOpt && "Expected a GVN for operand?");
568 unsigned GVN = *GVNOpt;
569
570 // Check if this global value has been found to not be the same already.
571 if (NotSame.contains(GVN)) {
572 if (isa<Constant>(V))
573 ConstantsTheSame = false;
574 continue;
575 }
576
577 // If it has been the same so far, we check the value for if the
578 // associated Constant value match the previous instances of the same
579 // global value number. If the global value does not map to a Constant,
580 // it is considered to not be the same value.
581 std::optional<bool> ConstantMatches =
582 constantMatches(V, GVN, GVNToConstant);
583 if (ConstantMatches) {
584 if (*ConstantMatches)
585 continue;
586 else
587 ConstantsTheSame = false;
588 }
589
590 // While this value is a register, it might not have been previously,
591 // make sure we don't already have a constant mapped to this global value
592 // number.
593 if (GVNToConstant.find(GVN) != GVNToConstant.end())
594 ConstantsTheSame = false;
595
596 NotSame.insert(GVN);
597 }
598 }
599
600 return ConstantsTheSame;
601}
602
604 DenseMap<unsigned, Constant *> GVNToConstant;
605
606 for (OutlinableRegion *Region : Regions)
607 collectRegionsConstants(*Region, GVNToConstant, NotSame);
608}
609
611 for (OutlinableRegion *OS : Regions)
612 OutputGVNCombinations.insert(OS->GVNStores);
613
614 // We are adding an extracted argument to decide between which output path
615 // to use in the basic block. It is used in a switch statement and only
616 // needs to be an integer.
617 if (OutputGVNCombinations.size() > 1)
618 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
619}
620
621/// Get the subprogram if it exists for one of the outlined regions.
622///
623/// \param [in] Group - The set of regions to find a subprogram for.
624/// \returns the subprogram if it exists, or nullptr.
626 for (OutlinableRegion *OS : Group.Regions)
627 if (Function *F = OS->Call->getFunction())
628 if (DISubprogram *SP = F->getSubprogram())
629 return SP;
630
631 return nullptr;
632}
633
634Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
635 unsigned FunctionNameSuffix) {
636 assert(!Group.OutlinedFunction && "Function is already defined!");
637
638 Type *RetTy = Type::getVoidTy(M.getContext());
639 // All extracted functions _should_ have the same return type at this point
640 // since the similarity identifier ensures that all branches outside of the
641 // region occur in the same place.
642
643 // NOTE: Should we ever move to the model that uses a switch at every point
644 // needed, meaning that we could branch within the region or out, it is
645 // possible that we will need to switch to using the most general case all of
646 // the time.
647 for (OutlinableRegion *R : Group.Regions) {
648 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
649 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
650 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
651 RetTy = ExtractedFuncType;
652 }
653
655 RetTy, Group.ArgumentTypes, false);
656
657 // These functions will only be called from within the same module, so
658 // we can set an internal linkage.
661 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
662
663 // Transfer the swifterr attribute to the correct function parameter.
664 if (Group.SwiftErrorArgument)
666 Attribute::SwiftError);
667
668 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
669 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
670
671 // If there's a DISubprogram associated with this outlined function, then
672 // emit debug info for the outlined function.
673 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
674 Function *F = Group.OutlinedFunction;
675 // We have a DISubprogram. Get its DICompileUnit.
676 DICompileUnit *CU = SP->getUnit();
677 DIBuilder DB(M, true, CU);
678 DIFile *Unit = SP->getFile();
679 Mangler Mg;
680 // Get the mangled name of the function for the linkage name.
681 std::string Dummy;
682 llvm::raw_string_ostream MangledNameStream(Dummy);
683 Mg.getNameWithPrefix(MangledNameStream, F, false);
684
685 DISubprogram *OutlinedSP = DB.createFunction(
686 Unit /* Context */, F->getName(), MangledNameStream.str(),
687 Unit /* File */,
688 0 /* Line 0 is reserved for compiler-generated code. */,
689 DB.createSubroutineType(
690 DB.getOrCreateTypeArray(std::nullopt)), /* void type */
691 0, /* Line 0 is reserved for compiler-generated code. */
692 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
693 /* Outlined code is optimized code by definition. */
694 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
695
696 // Don't add any new variables to the subprogram.
697 DB.finalizeSubprogram(OutlinedSP);
698
699 // Attach subprogram to the function.
700 F->setSubprogram(OutlinedSP);
701 // We're done with the DIBuilder.
702 DB.finalize();
703 }
704
705 return Group.OutlinedFunction;
706}
707
708/// Move each BasicBlock in \p Old to \p New.
709///
710/// \param [in] Old - The function to move the basic blocks from.
711/// \param [in] New - The function to move the basic blocks to.
712/// \param [out] NewEnds - The return blocks of the new overall function.
713static void moveFunctionData(Function &Old, Function &New,
715 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
716 CurrBB.removeFromParent();
717 CurrBB.insertInto(&New);
718 Instruction *I = CurrBB.getTerminator();
719
720 // For each block we find a return instruction is, it is a potential exit
721 // path for the function. We keep track of each block based on the return
722 // value here.
723 if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
724 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
725
726 std::vector<Instruction *> DebugInsts;
727
728 for (Instruction &Val : CurrBB) {
729 // We must handle the scoping of called functions differently than
730 // other outlined instructions.
731 if (!isa<CallInst>(&Val)) {
732 // Remove the debug information for outlined functions.
733 Val.setDebugLoc(DebugLoc());
734
735 // Loop info metadata may contain line locations. Update them to have no
736 // value in the new subprogram since the outlined code could be from
737 // several locations.
738 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
739 if (DISubprogram *SP = New.getSubprogram())
740 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
741 return DILocation::get(New.getContext(), Loc->getLine(),
742 Loc->getColumn(), SP, nullptr);
743 return MD;
744 };
745 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
746 continue;
747 }
748
749 // From this point we are only handling call instructions.
750 CallInst *CI = cast<CallInst>(&Val);
751
752 // We add any debug statements here, to be removed after. Since the
753 // instructions originate from many different locations in the program,
754 // it will cause incorrect reporting from a debugger if we keep the
755 // same debug instructions.
756 if (isa<DbgInfoIntrinsic>(CI)) {
757 DebugInsts.push_back(&Val);
758 continue;
759 }
760
761 // Edit the scope of called functions inside of outlined functions.
762 if (DISubprogram *SP = New.getSubprogram()) {
763 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
764 Val.setDebugLoc(DI);
765 }
766 }
767
768 for (Instruction *I : DebugInsts)
769 I->eraseFromParent();
770 }
771}
772
773/// Find the the constants that will need to be lifted into arguments
774/// as they are not the same in each instance of the region.
775///
776/// \param [in] C - The IRSimilarityCandidate containing the region we are
777/// analyzing.
778/// \param [in] NotSame - The set of global value numbers that do not have a
779/// single Constant across all OutlinableRegions similar to \p C.
780/// \param [out] Inputs - The list containing the global value numbers of the
781/// arguments needed for the region of code.
783 std::vector<unsigned> &Inputs) {
785 // Iterate over the instructions, and find what constants will need to be
786 // extracted into arguments.
787 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
788 IDIt != EndIDIt; IDIt++) {
789 for (Value *V : (*IDIt).OperVals) {
790 // Since these are stored before any outlining, they will be in the
791 // global value numbering.
792 unsigned GVN = *C.getGVN(V);
793 if (isa<Constant>(V))
794 if (NotSame.contains(GVN) && !Seen.contains(GVN)) {
795 Inputs.push_back(GVN);
796 Seen.insert(GVN);
797 }
798 }
799 }
800}
801
802/// Find the GVN for the inputs that have been found by the CodeExtractor.
803///
804/// \param [in] C - The IRSimilarityCandidate containing the region we are
805/// analyzing.
806/// \param [in] CurrentInputs - The set of inputs found by the
807/// CodeExtractor.
808/// \param [in] OutputMappings - The mapping of values that have been replaced
809/// by a new output value.
810/// \param [out] EndInputNumbers - The global value numbers for the extracted
811/// arguments.
813 SetVector<Value *> &CurrentInputs,
814 const DenseMap<Value *, Value *> &OutputMappings,
815 std::vector<unsigned> &EndInputNumbers) {
816 // Get the Global Value Number for each input. We check if the Value has been
817 // replaced by a different value at output, and use the original value before
818 // replacement.
819 for (Value *Input : CurrentInputs) {
820 assert(Input && "Have a nullptr as an input");
821 if (OutputMappings.find(Input) != OutputMappings.end())
822 Input = OutputMappings.find(Input)->second;
823 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
824 EndInputNumbers.push_back(*C.getGVN(Input));
825 }
826}
827
828/// Find the original value for the \p ArgInput values if any one of them was
829/// replaced during a previous extraction.
830///
831/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
832/// \param [in] OutputMappings - The mapping of values that have been replaced
833/// by a new output value.
834/// \param [out] RemappedArgInputs - The remapped values according to
835/// \p OutputMappings that will be extracted.
836static void
838 const DenseMap<Value *, Value *> &OutputMappings,
839 SetVector<Value *> &RemappedArgInputs) {
840 // Get the global value number for each input that will be extracted as an
841 // argument by the code extractor, remapping if needed for reloaded values.
842 for (Value *Input : ArgInputs) {
843 if (OutputMappings.find(Input) != OutputMappings.end())
844 Input = OutputMappings.find(Input)->second;
845 RemappedArgInputs.insert(Input);
846 }
847}
848
849/// Find the input GVNs and the output values for a region of Instructions.
850/// Using the code extractor, we collect the inputs to the extracted function.
851///
852/// The \p Region can be identified as needing to be ignored in this function.
853/// It should be checked whether it should be ignored after a call to this
854/// function.
855///
856/// \param [in,out] Region - The region of code to be analyzed.
857/// \param [out] InputGVNs - The global value numbers for the extracted
858/// arguments.
859/// \param [in] NotSame - The global value numbers in the region that do not
860/// have the same constant value in the regions structurally similar to
861/// \p Region.
862/// \param [in] OutputMappings - The mapping of values that have been replaced
863/// by a new output value after extraction.
864/// \param [out] ArgInputs - The values of the inputs to the extracted function.
865/// \param [out] Outputs - The set of values extracted by the CodeExtractor
866/// as outputs.
868 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
869 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
870 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
871 IRSimilarityCandidate &C = *Region.Candidate;
872
873 // OverallInputs are the inputs to the region found by the CodeExtractor,
874 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
875 // allocas of values whose lifetimes are contained completely within the
876 // outlined region. PremappedInputs are the arguments found by the
877 // CodeExtractor, removing conditions such as sunken allocas, but that
878 // may need to be remapped due to the extracted output values replacing
879 // the original values. We use DummyOutputs for this first run of finding
880 // inputs and outputs since the outputs could change during findAllocas,
881 // the correct set of extracted outputs will be in the final Outputs ValueSet.
882 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
883 DummyOutputs;
884
885 // Use the code extractor to get the inputs and outputs, without sunken
886 // allocas or removing llvm.assumes.
887 CodeExtractor *CE = Region.CE;
888 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
889 assert(Region.StartBB && "Region must have a start BasicBlock!");
890 Function *OrigF = Region.StartBB->getParent();
891 CodeExtractorAnalysisCache CEAC(*OrigF);
892 BasicBlock *Dummy = nullptr;
893
894 // The region may be ineligible due to VarArgs in the parent function. In this
895 // case we ignore the region.
896 if (!CE->isEligible()) {
897 Region.IgnoreRegion = true;
898 return;
899 }
900
901 // Find if any values are going to be sunk into the function when extracted
902 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
903 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
904
905 // TODO: Support regions with sunken allocas: values whose lifetimes are
906 // contained completely within the outlined region. These are not guaranteed
907 // to be the same in every region, so we must elevate them all to arguments
908 // when they appear. If these values are not equal, it means there is some
909 // Input in OverallInputs that was removed for ArgInputs.
910 if (OverallInputs.size() != PremappedInputs.size()) {
911 Region.IgnoreRegion = true;
912 return;
913 }
914
915 findConstants(C, NotSame, InputGVNs);
916
917 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
918
919 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
920 ArgInputs);
921
922 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
923 // we need to make sure they are in a deterministic order.
924 stable_sort(InputGVNs);
925}
926
927/// Look over the inputs and map each input argument to an argument in the
928/// overall function for the OutlinableRegions. This creates a way to replace
929/// the arguments of the extracted function with the arguments of the new
930/// overall function.
931///
932/// \param [in,out] Region - The region of code to be analyzed.
933/// \param [in] InputGVNs - The global value numbering of the input values
934/// collected.
935/// \param [in] ArgInputs - The values of the arguments to the extracted
936/// function.
937static void
939 std::vector<unsigned> &InputGVNs,
940 SetVector<Value *> &ArgInputs) {
941
942 IRSimilarityCandidate &C = *Region.Candidate;
943 OutlinableGroup &Group = *Region.Parent;
944
945 // This counts the argument number in the overall function.
946 unsigned TypeIndex = 0;
947
948 // This counts the argument number in the extracted function.
949 unsigned OriginalIndex = 0;
950
951 // Find the mapping of the extracted arguments to the arguments for the
952 // overall function. Since there may be extra arguments in the overall
953 // function to account for the extracted constants, we have two different
954 // counters as we find extracted arguments, and as we come across overall
955 // arguments.
956
957 // Additionally, in our first pass, for the first extracted function,
958 // we find argument locations for the canonical value numbering. This
959 // numbering overrides any discovered location for the extracted code.
960 for (unsigned InputVal : InputGVNs) {
961 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
962 assert(CanonicalNumberOpt && "Canonical number not found?");
963 unsigned CanonicalNumber = *CanonicalNumberOpt;
964
965 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
966 assert(InputOpt && "Global value number not found?");
967 Value *Input = *InputOpt;
968
970 Group.CanonicalNumberToAggArg.find(CanonicalNumber);
971
972 if (!Group.InputTypesSet) {
973 Group.ArgumentTypes.push_back(Input->getType());
974 // If the input value has a swifterr attribute, make sure to mark the
975 // argument in the overall function.
976 if (Input->isSwiftError()) {
977 assert(
978 !Group.SwiftErrorArgument &&
979 "Argument already marked with swifterr for this OutlinableGroup!");
980 Group.SwiftErrorArgument = TypeIndex;
981 }
982 }
983
984 // Check if we have a constant. If we do add it to the overall argument
985 // number to Constant map for the region, and continue to the next input.
986 if (Constant *CST = dyn_cast<Constant>(Input)) {
987 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
988 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
989 else {
991 std::make_pair(CanonicalNumber, TypeIndex));
992 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
993 }
994 TypeIndex++;
995 continue;
996 }
997
998 // It is not a constant, we create the mapping from extracted argument list
999 // to the overall argument list, using the canonical location, if it exists.
1000 assert(ArgInputs.count(Input) && "Input cannot be found!");
1001
1002 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
1003 if (OriginalIndex != AggArgIt->second)
1004 Region.ChangedArgOrder = true;
1005 Region.ExtractedArgToAgg.insert(
1006 std::make_pair(OriginalIndex, AggArgIt->second));
1007 Region.AggArgToExtracted.insert(
1008 std::make_pair(AggArgIt->second, OriginalIndex));
1009 } else {
1011 std::make_pair(CanonicalNumber, TypeIndex));
1012 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
1013 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
1014 }
1015 OriginalIndex++;
1016 TypeIndex++;
1017 }
1018
1019 // If the function type definitions for the OutlinableGroup holding the region
1020 // have not been set, set the length of the inputs here. We should have the
1021 // same inputs for all of the different regions contained in the
1022 // OutlinableGroup since they are all structurally similar to one another.
1023 if (!Group.InputTypesSet) {
1024 Group.NumAggregateInputs = TypeIndex;
1025 Group.InputTypesSet = true;
1026 }
1027
1028 Region.NumExtractedInputs = OriginalIndex;
1029}
1030
1031/// Check if the \p V has any uses outside of the region other than \p PN.
1032///
1033/// \param V [in] - The value to check.
1034/// \param PHILoc [in] - The location in the PHINode of \p V.
1035/// \param PN [in] - The PHINode using \p V.
1036/// \param Exits [in] - The potential blocks we exit to from the outlined
1037/// region.
1038/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1039/// \returns true if \p V has any use soutside its region other than \p PN.
1040static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1042 DenseSet<BasicBlock *> &BlocksInRegion) {
1043 // We check to see if the value is used by the PHINode from some other
1044 // predecessor not included in the region. If it is, we make sure
1045 // to keep it as an output.
1046 if (any_of(llvm::seq<unsigned>(0, PN.getNumIncomingValues()),
1047 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1048 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1049 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1050 }))
1051 return true;
1052
1053 // Check if the value is used by any other instructions outside the region.
1054 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1055 Instruction *I = dyn_cast<Instruction>(U);
1056 if (!I)
1057 return false;
1058
1059 // If the use of the item is inside the region, we skip it. Uses
1060 // inside the region give us useful information about how the item could be
1061 // used as an output.
1062 BasicBlock *Parent = I->getParent();
1063 if (BlocksInRegion.contains(Parent))
1064 return false;
1065
1066 // If it's not a PHINode then we definitely know the use matters. This
1067 // output value will not completely combined with another item in a PHINode
1068 // as it is directly reference by another non-phi instruction
1069 if (!isa<PHINode>(I))
1070 return true;
1071
1072 // If we have a PHINode outside one of the exit locations, then it
1073 // can be considered an outside use as well. If there is a PHINode
1074 // contained in the Exit where this values use matters, it will be
1075 // caught when we analyze that PHINode.
1076 if (!Exits.contains(Parent))
1077 return true;
1078
1079 return false;
1080 });
1081}
1082
1083/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1084/// considered outputs. A PHINodes is an output when more than one incoming
1085/// value has been marked by the CodeExtractor as an output.
1086///
1087/// \param CurrentExitFromRegion [in] - The block to analyze.
1088/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1089/// region.
1090/// \param RegionBlocks [in] - The basic blocks in the region.
1091/// \param Outputs [in, out] - The existing outputs for the region, we may add
1092/// PHINodes to this as we find that they replace output values.
1093/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1094/// totally replaced by a PHINode.
1095/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1096/// in PHINodes, but have other uses, and should still be considered outputs.
1098 BasicBlock *CurrentExitFromRegion,
1099 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1100 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1101 DenseSet<Value *> &OutputsReplacedByPHINode,
1102 DenseSet<Value *> &OutputsWithNonPhiUses) {
1103 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1104 // Find all incoming values from the outlining region.
1105 SmallVector<unsigned, 2> IncomingVals;
1106 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1107 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1108 IncomingVals.push_back(I);
1109
1110 // Do not process PHI if there are no predecessors from region.
1111 unsigned NumIncomingVals = IncomingVals.size();
1112 if (NumIncomingVals == 0)
1113 continue;
1114
1115 // If there is one predecessor, we mark it as a value that needs to be kept
1116 // as an output.
1117 if (NumIncomingVals == 1) {
1118 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1119 OutputsWithNonPhiUses.insert(V);
1120 OutputsReplacedByPHINode.erase(V);
1121 continue;
1122 }
1123
1124 // This PHINode will be used as an output value, so we add it to our list.
1125 Outputs.insert(&PN);
1126
1127 // Not all of the incoming values should be ignored as other inputs and
1128 // outputs may have uses in outlined region. If they have other uses
1129 // outside of the single PHINode we should not skip over it.
1130 for (unsigned Idx : IncomingVals) {
1131 Value *V = PN.getIncomingValue(Idx);
1132 if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1133 OutputsWithNonPhiUses.insert(V);
1134 OutputsReplacedByPHINode.erase(V);
1135 continue;
1136 }
1137 if (!OutputsWithNonPhiUses.contains(V))
1138 OutputsReplacedByPHINode.insert(V);
1139 }
1140 }
1141}
1142
1143// Represents the type for the unsigned number denoting the output number for
1144// phi node, along with the canonical number for the exit block.
1145using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1146// The list of canonical numbers for the incoming values to a PHINode.
1148// The pair type representing the set of canonical values being combined in the
1149// PHINode, along with the location data for the PHINode.
1150using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1151
1152/// Encode \p PND as an integer for easy lookup based on the argument location,
1153/// the parent BasicBlock canonical numbering, and the canonical numbering of
1154/// the values stored in the PHINode.
1155///
1156/// \param PND - The data to hash.
1157/// \returns The hash code of \p PND.
1159 return llvm::hash_combine(
1160 llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
1161 llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
1162}
1163
1164/// Create a special GVN for PHINodes that will be used outside of
1165/// the region. We create a hash code based on the Canonical number of the
1166/// parent BasicBlock, the canonical numbering of the values stored in the
1167/// PHINode and the aggregate argument location. This is used to find whether
1168/// this PHINode type has been given a canonical numbering already. If not, we
1169/// assign it a value and store it for later use. The value is returned to
1170/// identify different output schemes for the set of regions.
1171///
1172/// \param Region - The region that \p PN is an output for.
1173/// \param PN - The PHINode we are analyzing.
1174/// \param Blocks - The blocks for the region we are analyzing.
1175/// \param AggArgIdx - The argument \p PN will be stored into.
1176/// \returns An optional holding the assigned canonical number, or std::nullopt
1177/// if there is some attribute of the PHINode blocking it from being used.
1178static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1179 PHINode *PN,
1180 DenseSet<BasicBlock *> &Blocks,
1181 unsigned AggArgIdx) {
1182 OutlinableGroup &Group = *Region.Parent;
1183 IRSimilarityCandidate &Cand = *Region.Candidate;
1184 BasicBlock *PHIBB = PN->getParent();
1185 CanonList PHIGVNs;
1186 Value *Incoming;
1187 BasicBlock *IncomingBlock;
1188 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1189 Incoming = PN->getIncomingValue(Idx);
1190 IncomingBlock = PN->getIncomingBlock(Idx);
1191 // If we cannot find a GVN, and the incoming block is included in the region
1192 // this means that the input to the PHINode is not included in the region we
1193 // are trying to analyze, meaning, that if it was outlined, we would be
1194 // adding an extra input. We ignore this case for now, and so ignore the
1195 // region.
1196 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1197 if (!OGVN && Blocks.contains(IncomingBlock)) {
1198 Region.IgnoreRegion = true;
1199 return std::nullopt;
1200 }
1201
1202 // If the incoming block isn't in the region, we don't have to worry about
1203 // this incoming value.
1204 if (!Blocks.contains(IncomingBlock))
1205 continue;
1206
1207 // Collect the canonical numbers of the values in the PHINode.
1208 unsigned GVN = *OGVN;
1209 OGVN = Cand.getCanonicalNum(GVN);
1210 assert(OGVN && "No GVN found for incoming value?");
1211 PHIGVNs.push_back(*OGVN);
1212
1213 // Find the incoming block and use the canonical numbering as well to define
1214 // the hash for the PHINode.
1215 OGVN = Cand.getGVN(IncomingBlock);
1216
1217 // If there is no number for the incoming block, it is because we have
1218 // split the candidate basic blocks. So we use the previous block that it
1219 // was split from to find the valid global value numbering for the PHINode.
1220 if (!OGVN) {
1221 assert(Cand.getStartBB() == IncomingBlock &&
1222 "Unknown basic block used in exit path PHINode.");
1223
1224 BasicBlock *PrevBlock = nullptr;
1225 // Iterate over the predecessors to the incoming block of the
1226 // PHINode, when we find a block that is not contained in the region
1227 // we know that this is the first block that we split from, and should
1228 // have a valid global value numbering.
1229 for (BasicBlock *Pred : predecessors(IncomingBlock))
1230 if (!Blocks.contains(Pred)) {
1231 PrevBlock = Pred;
1232 break;
1233 }
1234 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1235 OGVN = Cand.getGVN(PrevBlock);
1236 }
1237 GVN = *OGVN;
1238 OGVN = Cand.getCanonicalNum(GVN);
1239 assert(OGVN && "No GVN found for incoming block?");
1240 PHIGVNs.push_back(*OGVN);
1241 }
1242
1243 // Now that we have the GVNs for the incoming values, we are going to combine
1244 // them with the GVN of the incoming bock, and the output location of the
1245 // PHINode to generate a hash value representing this instance of the PHINode.
1248 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1249 assert(BBGVN && "Could not find GVN for the incoming block!");
1250
1251 BBGVN = Cand.getCanonicalNum(*BBGVN);
1252 assert(BBGVN && "Could not find canonical number for the incoming block!");
1253 // Create a pair of the exit block canonical value, and the aggregate
1254 // argument location, connected to the canonical numbers stored in the
1255 // PHINode.
1256 PHINodeData TemporaryPair =
1257 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1258 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1259
1260 // Look for and create a new entry in our connection between canonical
1261 // numbers for PHINodes, and the set of objects we just created.
1262 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1263 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1264 bool Inserted = false;
1265 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1266 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1267 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1268 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1269 }
1270
1271 return GVNToPHIIt->second;
1272}
1273
1274/// Create a mapping of the output arguments for the \p Region to the output
1275/// arguments of the overall outlined function.
1276///
1277/// \param [in,out] Region - The region of code to be analyzed.
1278/// \param [in] Outputs - The values found by the code extractor.
1279static void
1281 SetVector<Value *> &Outputs) {
1282 OutlinableGroup &Group = *Region.Parent;
1283 IRSimilarityCandidate &C = *Region.Candidate;
1284
1286 DenseSet<BasicBlock *> BlocksInRegion;
1287 C.getBasicBlocks(BlocksInRegion, BE);
1288
1289 // Find the exits to the region.
1291 for (BasicBlock *Block : BE)
1292 for (BasicBlock *Succ : successors(Block))
1293 if (!BlocksInRegion.contains(Succ))
1294 Exits.insert(Succ);
1295
1296 // After determining which blocks exit to PHINodes, we add these PHINodes to
1297 // the set of outputs to be processed. We also check the incoming values of
1298 // the PHINodes for whether they should no longer be considered outputs.
1299 DenseSet<Value *> OutputsReplacedByPHINode;
1300 DenseSet<Value *> OutputsWithNonPhiUses;
1301 for (BasicBlock *ExitBB : Exits)
1302 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1303 OutputsReplacedByPHINode,
1304 OutputsWithNonPhiUses);
1305
1306 // This counts the argument number in the extracted function.
1307 unsigned OriginalIndex = Region.NumExtractedInputs;
1308
1309 // This counts the argument number in the overall function.
1310 unsigned TypeIndex = Group.NumAggregateInputs;
1311 bool TypeFound;
1312 DenseSet<unsigned> AggArgsUsed;
1313
1314 // Iterate over the output types and identify if there is an aggregate pointer
1315 // type whose base type matches the current output type. If there is, we mark
1316 // that we will use this output register for this value. If not we add another
1317 // type to the overall argument type list. We also store the GVNs used for
1318 // stores to identify which values will need to be moved into an special
1319 // block that holds the stores to the output registers.
1320 for (Value *Output : Outputs) {
1321 TypeFound = false;
1322 // We can do this since it is a result value, and will have a number
1323 // that is necessarily the same. BUT if in the future, the instructions
1324 // do not have to be in same order, but are functionally the same, we will
1325 // have to use a different scheme, as one-to-one correspondence is not
1326 // guaranteed.
1327 unsigned ArgumentSize = Group.ArgumentTypes.size();
1328
1329 // If the output is combined in a PHINode, we make sure to skip over it.
1330 if (OutputsReplacedByPHINode.contains(Output))
1331 continue;
1332
1333 unsigned AggArgIdx = 0;
1334 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1335 if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
1336 continue;
1337
1338 if (AggArgsUsed.contains(Jdx))
1339 continue;
1340
1341 TypeFound = true;
1342 AggArgsUsed.insert(Jdx);
1343 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1344 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1345 AggArgIdx = Jdx;
1346 break;
1347 }
1348
1349 // We were unable to find an unused type in the output type set that matches
1350 // the output, so we add a pointer type to the argument types of the overall
1351 // function to handle this output and create a mapping to it.
1352 if (!TypeFound) {
1353 Group.ArgumentTypes.push_back(Output->getType()->getPointerTo(
1354 M.getDataLayout().getAllocaAddrSpace()));
1355 // Mark the new pointer type as the last value in the aggregate argument
1356 // list.
1357 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1358 AggArgsUsed.insert(ArgTypeIdx);
1359 Region.ExtractedArgToAgg.insert(
1360 std::make_pair(OriginalIndex, ArgTypeIdx));
1361 Region.AggArgToExtracted.insert(
1362 std::make_pair(ArgTypeIdx, OriginalIndex));
1363 AggArgIdx = ArgTypeIdx;
1364 }
1365
1366 // TODO: Adapt to the extra input from the PHINode.
1367 PHINode *PN = dyn_cast<PHINode>(Output);
1368
1369 std::optional<unsigned> GVN;
1370 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1371 // Values outside the region can be combined into PHINode when we
1372 // have multiple exits. We collect both of these into a list to identify
1373 // which values are being used in the PHINode. Each list identifies a
1374 // different PHINode, and a different output. We store the PHINode as it's
1375 // own canonical value. These canonical values are also dependent on the
1376 // output argument it is saved to.
1377
1378 // If two PHINodes have the same canonical values, but different aggregate
1379 // argument locations, then they will have distinct Canonical Values.
1380 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1381 if (!GVN)
1382 return;
1383 } else {
1384 // If we do not have a PHINode we use the global value numbering for the
1385 // output value, to find the canonical number to add to the set of stored
1386 // values.
1387 GVN = C.getGVN(Output);
1388 GVN = C.getCanonicalNum(*GVN);
1389 }
1390
1391 // Each region has a potentially unique set of outputs. We save which
1392 // values are output in a list of canonical values so we can differentiate
1393 // among the different store schemes.
1394 Region.GVNStores.push_back(*GVN);
1395
1396 OriginalIndex++;
1397 TypeIndex++;
1398 }
1399
1400 // We sort the stored values to make sure that we are not affected by analysis
1401 // order when determining what combination of items were stored.
1402 stable_sort(Region.GVNStores);
1403}
1404
1405void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1406 DenseSet<unsigned> &NotSame) {
1407 std::vector<unsigned> Inputs;
1408 SetVector<Value *> ArgInputs, Outputs;
1409
1410 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1411 Outputs);
1412
1413 if (Region.IgnoreRegion)
1414 return;
1415
1416 // Map the inputs found by the CodeExtractor to the arguments found for
1417 // the overall function.
1419
1420 // Map the outputs found by the CodeExtractor to the arguments found for
1421 // the overall function.
1423}
1424
1425/// Replace the extracted function in the Region with a call to the overall
1426/// function constructed from the deduplicated similar regions, replacing and
1427/// remapping the values passed to the extracted function as arguments to the
1428/// new arguments of the overall function.
1429///
1430/// \param [in] M - The module to outline from.
1431/// \param [in] Region - The regions of extracted code to be replaced with a new
1432/// function.
1433/// \returns a call instruction with the replaced function.
1435 std::vector<Value *> NewCallArgs;
1437
1438 OutlinableGroup &Group = *Region.Parent;
1439 CallInst *Call = Region.Call;
1440 assert(Call && "Call to replace is nullptr?");
1441 Function *AggFunc = Group.OutlinedFunction;
1442 assert(AggFunc && "Function to replace with is nullptr?");
1443
1444 // If the arguments are the same size, there are not values that need to be
1445 // made into an argument, the argument ordering has not been change, or
1446 // different output registers to handle. We can simply replace the called
1447 // function in this case.
1448 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1449 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1450 << *AggFunc << " with same number of arguments\n");
1451 Call->setCalledFunction(AggFunc);
1452 return Call;
1453 }
1454
1455 // We have a different number of arguments than the new function, so
1456 // we need to use our previously mappings off extracted argument to overall
1457 // function argument, and constants to overall function argument to create the
1458 // new argument list.
1459 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1460
1461 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1462 Group.OutputGVNCombinations.size() > 1) {
1463 // If we are on the last argument, and we need to differentiate between
1464 // output blocks, add an integer to the argument list to determine
1465 // what block to take
1466 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1467 << Region.OutputBlockNum << "\n");
1468 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1469 Region.OutputBlockNum));
1470 continue;
1471 }
1472
1473 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1474 if (ArgPair != Region.AggArgToExtracted.end()) {
1475 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1476 // If we found the mapping from the extracted function to the overall
1477 // function, we simply add it to the argument list. We use the same
1478 // value, it just needs to honor the new order of arguments.
1479 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1480 << *ArgumentValue << "\n");
1481 NewCallArgs.push_back(ArgumentValue);
1482 continue;
1483 }
1484
1485 // If it is a constant, we simply add it to the argument list as a value.
1486 if (Region.AggArgToConstant.find(AggArgIdx) !=
1487 Region.AggArgToConstant.end()) {
1488 Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
1489 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1490 << *CST << "\n");
1491 NewCallArgs.push_back(CST);
1492 continue;
1493 }
1494
1495 // Add a nullptr value if the argument is not found in the extracted
1496 // function. If we cannot find a value, it means it is not in use
1497 // for the region, so we should not pass anything to it.
1498 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1499 NewCallArgs.push_back(ConstantPointerNull::get(
1500 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1501 }
1502
1503 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1504 << *AggFunc << " with new set of arguments\n");
1505 // Create the new call instruction and erase the old one.
1506 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1507 Call);
1508
1509 // It is possible that the call to the outlined function is either the first
1510 // instruction is in the new block, the last instruction, or both. If either
1511 // of these is the case, we need to make sure that we replace the instruction
1512 // in the IRInstructionData struct with the new call.
1513 CallInst *OldCall = Region.Call;
1514 if (Region.NewFront->Inst == OldCall)
1515 Region.NewFront->Inst = Call;
1516 if (Region.NewBack->Inst == OldCall)
1517 Region.NewBack->Inst = Call;
1518
1519 // Transfer any debug information.
1520 Call->setDebugLoc(Region.Call->getDebugLoc());
1521 // Since our output may determine which branch we go to, we make sure to
1522 // propogate this new call value through the module.
1523 OldCall->replaceAllUsesWith(Call);
1524
1525 // Remove the old instruction.
1526 OldCall->eraseFromParent();
1527 Region.Call = Call;
1528
1529 // Make sure that the argument in the new function has the SwiftError
1530 // argument.
1531 if (Group.SwiftErrorArgument)
1532 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1533
1534 return Call;
1535}
1536
1537/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1538/// for \p RetVal.
1539///
1540/// \param Group - The OutlinableGroup containing the information about the
1541/// overall outlined function.
1542/// \param RetVal - The return value or exit option that we are currently
1543/// evaluating.
1544/// \returns The found or newly created BasicBlock to contain the needed
1545/// PHINodes to be used as outputs.
1548 ReturnBlockForRetVal;
1549 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1550 ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1551 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1552 "Could not find output value!");
1553 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1554
1555 // Find if a PHIBlock exists for this return value already. If it is
1556 // the first time we are analyzing this, we will not, so we record it.
1557 PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
1558 if (PhiBlockForRetVal != Group.PHIBlocks.end())
1559 return PhiBlockForRetVal->second;
1560
1561 // If we did not find a block, we create one, and insert it into the
1562 // overall function and record it.
1563 bool Inserted = false;
1564 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1565 ReturnBB->getParent());
1566 std::tie(PhiBlockForRetVal, Inserted) =
1567 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1568
1569 // We find the predecessors of the return block in the newly created outlined
1570 // function in order to point them to the new PHIBlock rather than the already
1571 // existing return block.
1572 SmallVector<BranchInst *, 2> BranchesToChange;
1573 for (BasicBlock *Pred : predecessors(ReturnBB))
1574 BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
1575
1576 // Now we mark the branch instructions found, and change the references of the
1577 // return block to the newly created PHIBlock.
1578 for (BranchInst *BI : BranchesToChange)
1579 for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
1580 if (BI->getSuccessor(Succ) != ReturnBB)
1581 continue;
1582 BI->setSuccessor(Succ, PHIBlock);
1583 }
1584
1585 BranchInst::Create(ReturnBB, PHIBlock);
1586
1587 return PhiBlockForRetVal->second;
1588}
1589
1590/// For the function call now representing the \p Region, find the passed value
1591/// to that call that represents Argument \p A at the call location if the
1592/// call has already been replaced with a call to the overall, aggregate
1593/// function.
1594///
1595/// \param A - The Argument to get the passed value for.
1596/// \param Region - The extracted Region corresponding to the outlined function.
1597/// \returns The Value representing \p A at the call site.
1598static Value *
1600 const OutlinableRegion &Region) {
1601 // If we don't need to adjust the argument number at all (since the call
1602 // has already been replaced by a call to the overall outlined function)
1603 // we can just get the specified argument.
1604 return Region.Call->getArgOperand(A->getArgNo());
1605}
1606
1607/// For the function call now representing the \p Region, find the passed value
1608/// to that call that represents Argument \p A at the call location if the
1609/// call has only been replaced by the call to the aggregate function.
1610///
1611/// \param A - The Argument to get the passed value for.
1612/// \param Region - The extracted Region corresponding to the outlined function.
1613/// \returns The Value representing \p A at the call site.
1614static Value *
1616 const OutlinableRegion &Region) {
1617 unsigned ArgNum = A->getArgNo();
1618
1619 // If it is a constant, we can look at our mapping from when we created
1620 // the outputs to figure out what the constant value is.
1621 if (Region.AggArgToConstant.count(ArgNum))
1622 return Region.AggArgToConstant.find(ArgNum)->second;
1623
1624 // If it is not a constant, and we are not looking at the overall function, we
1625 // need to adjust which argument we are looking at.
1626 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1627 return Region.Call->getArgOperand(ArgNum);
1628}
1629
1630/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1631///
1632/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1633/// \param Region [in] - The OutlinableRegion containing \p PN.
1634/// \param OutputMappings [in] - The mapping of output values from outlined
1635/// region to their original values.
1636/// \param CanonNums [out] - The canonical numbering for the incoming values to
1637/// \p PN paired with their incoming block.
1638/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1639/// of \p Region rather than the overall function's call.
1642 const DenseMap<Value *, Value *> &OutputMappings,
1643 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1644 bool ReplacedWithOutlinedCall = true) {
1645 // Iterate over the incoming values.
1646 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1647 Value *IVal = PN->getIncomingValue(Idx);
1648 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1649 // If we have an argument as incoming value, we need to grab the passed
1650 // value from the call itself.
1651 if (Argument *A = dyn_cast<Argument>(IVal)) {
1652 if (ReplacedWithOutlinedCall)
1654 else
1656 }
1657
1658 // Get the original value if it has been replaced by an output value.
1659 IVal = findOutputMapping(OutputMappings, IVal);
1660
1661 // Find and add the canonical number for the incoming value.
1662 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1663 assert(GVN && "No GVN for incoming value");
1664 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1665 assert(CanonNum && "No Canonical Number for GVN");
1666 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1667 }
1668}
1669
1670/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1671/// in order to condense the number of instructions added to the outlined
1672/// function.
1673///
1674/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1675/// \param Region [in] - The OutlinableRegion containing \p PN.
1676/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1677/// \p PN in.
1678/// \param OutputMappings [in] - The mapping of output values from outlined
1679/// region to their original values.
1680/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1681/// matched.
1682/// \return the newly found or created PHINode in \p OverallPhiBlock.
1683static PHINode*
1685 BasicBlock *OverallPhiBlock,
1686 const DenseMap<Value *, Value *> &OutputMappings,
1687 DenseSet<PHINode *> &UsedPHIs) {
1688 OutlinableGroup &Group = *Region.Parent;
1689
1690
1691 // A list of the canonical numbering assigned to each incoming value, paired
1692 // with the incoming block for the PHINode passed into this function.
1694
1695 // We have to use the extracted function since we have merged this region into
1696 // the overall function yet. We make sure to reassign the argument numbering
1697 // since it is possible that the argument ordering is different between the
1698 // functions.
1699 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1700 /* ReplacedWithOutlinedCall = */ false);
1701
1702 OutlinableRegion *FirstRegion = Group.Regions[0];
1703
1704 // A list of the canonical numbering assigned to each incoming value, paired
1705 // with the incoming block for the PHINode that we are currently comparing
1706 // the passed PHINode to.
1708
1709 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1710 // the uses of the PHINode we are searching for, with the found PHINode.
1711 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1712 // If this PHINode has already been matched to another PHINode to be merged,
1713 // we skip it.
1714 if (UsedPHIs.contains(&CurrPN))
1715 continue;
1716
1717 CurrentCanonNums.clear();
1718 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1719 /* ReplacedWithOutlinedCall = */ true);
1720
1721 // If the list of incoming values is not the same length, then they cannot
1722 // match since there is not an analogue for each incoming value.
1723 if (PNCanonNums.size() != CurrentCanonNums.size())
1724 continue;
1725
1726 bool FoundMatch = true;
1727
1728 // We compare the canonical value for each incoming value in the passed
1729 // in PHINode to one already present in the outlined region. If the
1730 // incoming values do not match, then the PHINodes do not match.
1731
1732 // We also check to make sure that the incoming block matches as well by
1733 // finding the corresponding incoming block in the combined outlined region
1734 // for the current outlined region.
1735 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1736 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1737 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1738 if (ToCompareTo.first != ToAdd.first) {
1739 FoundMatch = false;
1740 break;
1741 }
1742
1743 BasicBlock *CorrespondingBlock =
1744 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1745 assert(CorrespondingBlock && "Found block is nullptr");
1746 if (CorrespondingBlock != ToCompareTo.second) {
1747 FoundMatch = false;
1748 break;
1749 }
1750 }
1751
1752 // If all incoming values and branches matched, then we can merge
1753 // into the found PHINode.
1754 if (FoundMatch) {
1755 UsedPHIs.insert(&CurrPN);
1756 return &CurrPN;
1757 }
1758 }
1759
1760 // If we've made it here, it means we weren't able to replace the PHINode, so
1761 // we must insert it ourselves.
1762 PHINode *NewPN = cast<PHINode>(PN.clone());
1763 NewPN->insertBefore(&*OverallPhiBlock->begin());
1764 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1765 Idx++) {
1766 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1767 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1768
1769 // Find corresponding basic block in the overall function for the incoming
1770 // block.
1771 BasicBlock *BlockToUse =
1772 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1773 NewPN->setIncomingBlock(Idx, BlockToUse);
1774
1775 // If we have an argument we make sure we replace using the argument from
1776 // the correct function.
1777 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1778 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1779 NewPN->setIncomingValue(Idx, Val);
1780 continue;
1781 }
1782
1783 // Find the corresponding value in the overall function.
1784 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1785 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1786 assert(Val && "Value is nullptr?");
1788 FirstRegion->RemappedArguments.find(Val);
1789 if (RemappedIt != FirstRegion->RemappedArguments.end())
1790 Val = RemappedIt->second;
1791 NewPN->setIncomingValue(Idx, Val);
1792 }
1793 return NewPN;
1794}
1795
1796// Within an extracted function, replace the argument uses of the extracted
1797// region with the arguments of the function for an OutlinableGroup.
1798//
1799/// \param [in] Region - The region of extracted code to be changed.
1800/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1801/// region.
1802/// \param [in] FirstFunction - A flag to indicate whether we are using this
1803/// function to define the overall outlined function for all the regions, or
1804/// if we are operating on one of the following regions.
1805static void
1808 const DenseMap<Value *, Value *> &OutputMappings,
1809 bool FirstFunction = false) {
1810 OutlinableGroup &Group = *Region.Parent;
1811 assert(Region.ExtractedFunction && "Region has no extracted function?");
1812
1813 Function *DominatingFunction = Region.ExtractedFunction;
1814 if (FirstFunction)
1815 DominatingFunction = Group.OutlinedFunction;
1816 DominatorTree DT(*DominatingFunction);
1817 DenseSet<PHINode *> UsedPHIs;
1818
1819 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1820 ArgIdx++) {
1821 assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
1822 Region.ExtractedArgToAgg.end() &&
1823 "No mapping from extracted to outlined?");
1824 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1825 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1826 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1827 // The argument is an input, so we can simply replace it with the overall
1828 // argument value
1829 if (ArgIdx < Region.NumExtractedInputs) {
1830 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1831 << *Region.ExtractedFunction << " with " << *AggArg
1832 << " in function " << *Group.OutlinedFunction << "\n");
1833 Arg->replaceAllUsesWith(AggArg);
1834 Value *V = Region.Call->getArgOperand(ArgIdx);
1835 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1836 continue;
1837 }
1838
1839 // If we are replacing an output, we place the store value in its own
1840 // block inside the overall function before replacing the use of the output
1841 // in the function.
1842 assert(Arg->hasOneUse() && "Output argument can only have one use");
1843 User *InstAsUser = Arg->user_back();
1844 assert(InstAsUser && "User is nullptr!");
1845
1846 Instruction *I = cast<Instruction>(InstAsUser);
1847 BasicBlock *BB = I->getParent();
1848 SmallVector<BasicBlock *, 4> Descendants;
1849 DT.getDescendants(BB, Descendants);
1850 bool EdgeAdded = false;
1851 if (Descendants.size() == 0) {
1852 EdgeAdded = true;
1853 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1854 DT.getDescendants(BB, Descendants);
1855 }
1856
1857 // Iterate over the following blocks, looking for return instructions,
1858 // if we find one, find the corresponding output block for the return value
1859 // and move our store instruction there.
1860 for (BasicBlock *DescendBB : Descendants) {
1861 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1862 if (!RI)
1863 continue;
1864 Value *RetVal = RI->getReturnValue();
1865 auto VBBIt = OutputBBs.find(RetVal);
1866 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1867
1868 // If this is storing a PHINode, we must make sure it is included in the
1869 // overall function.
1870 StoreInst *SI = cast<StoreInst>(I);
1871
1872 Value *ValueOperand = SI->getValueOperand();
1873
1874 StoreInst *NewI = cast<StoreInst>(I->clone());
1875 NewI->setDebugLoc(DebugLoc());
1876 BasicBlock *OutputBB = VBBIt->second;
1877 NewI->insertInto(OutputBB, OutputBB->end());
1878 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1879 << *OutputBB << "\n");
1880
1881 // If this is storing a PHINode, we must make sure it is included in the
1882 // overall function.
1883 if (!isa<PHINode>(ValueOperand) ||
1884 Region.Candidate->getGVN(ValueOperand).has_value()) {
1885 if (FirstFunction)
1886 continue;
1887 Value *CorrVal =
1888 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1889 assert(CorrVal && "Value is nullptr?");
1890 NewI->setOperand(0, CorrVal);
1891 continue;
1892 }
1893 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1894 // If it has a value, it was not split by the code extractor, which
1895 // is what we are looking for.
1896 if (Region.Candidate->getGVN(PN))
1897 continue;
1898
1899 // We record the parent block for the PHINode in the Region so that
1900 // we can exclude it from checks later on.
1901 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1902
1903 // If this is the first function, we do not need to worry about mergiing
1904 // this with any other block in the overall outlined function, so we can
1905 // just continue.
1906 if (FirstFunction) {
1907 BasicBlock *PHIBlock = PN->getParent();
1908 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1909 continue;
1910 }
1911
1912 // We look for the aggregate block that contains the PHINodes leading into
1913 // this exit path. If we can't find one, we create one.
1914 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1915
1916 // For our PHINode, we find the combined canonical numbering, and
1917 // attempt to find a matching PHINode in the overall PHIBlock. If we
1918 // cannot, we copy the PHINode and move it into this new block.
1919 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1920 OutputMappings, UsedPHIs);
1921 NewI->setOperand(0, NewPN);
1922 }
1923
1924 // If we added an edge for basic blocks without a predecessor, we remove it
1925 // here.
1926 if (EdgeAdded)
1927 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1928 I->eraseFromParent();
1929
1930 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1931 << *Region.ExtractedFunction << " with " << *AggArg
1932 << " in function " << *Group.OutlinedFunction << "\n");
1933 Arg->replaceAllUsesWith(AggArg);
1934 }
1935}
1936
1937/// Within an extracted function, replace the constants that need to be lifted
1938/// into arguments with the actual argument.
1939///
1940/// \param Region [in] - The region of extracted code to be changed.
1942 OutlinableGroup &Group = *Region.Parent;
1943 // Iterate over the constants that need to be elevated into arguments
1944 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1945 unsigned AggArgIdx = Const.first;
1946 Function *OutlinedFunction = Group.OutlinedFunction;
1947 assert(OutlinedFunction && "Overall Function is not defined?");
1948 Constant *CST = Const.second;
1949 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1950 // Identify the argument it will be elevated to, and replace instances of
1951 // that constant in the function.
1952
1953 // TODO: If in the future constants do not have one global value number,
1954 // i.e. a constant 1 could be mapped to several values, this check will
1955 // have to be more strict. It cannot be using only replaceUsesWithIf.
1956
1957 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1958 << " in function " << *OutlinedFunction << " with "
1959 << *Arg << "\n");
1960 CST->replaceUsesWithIf(Arg, [OutlinedFunction](Use &U) {
1961 if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
1962 return I->getFunction() == OutlinedFunction;
1963 return false;
1964 });
1965 }
1966}
1967
1968/// It is possible that there is a basic block that already performs the same
1969/// stores. This returns a duplicate block, if it exists
1970///
1971/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1972/// \param OutputStoreBBs [in] The existing output blocks.
1973/// \returns an optional value with the number output block if there is a match.
1974std::optional<unsigned> findDuplicateOutputBlock(
1976 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1977
1978 bool Mismatch = false;
1979 unsigned MatchingNum = 0;
1980 // We compare the new set output blocks to the other sets of output blocks.
1981 // If they are the same number, and have identical instructions, they are
1982 // considered to be the same.
1983 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1984 Mismatch = false;
1985 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1987 OutputBBs.find(VToB.first);
1988 if (OutputBBIt == OutputBBs.end()) {
1989 Mismatch = true;
1990 break;
1991 }
1992
1993 BasicBlock *CompBB = VToB.second;
1994 BasicBlock *OutputBB = OutputBBIt->second;
1995 if (CompBB->size() - 1 != OutputBB->size()) {
1996 Mismatch = true;
1997 break;
1998 }
1999
2000 BasicBlock::iterator NIt = OutputBB->begin();
2001 for (Instruction &I : *CompBB) {
2002 if (isa<BranchInst>(&I))
2003 continue;
2004
2005 if (!I.isIdenticalTo(&(*NIt))) {
2006 Mismatch = true;
2007 break;
2008 }
2009
2010 NIt++;
2011 }
2012 }
2013
2014 if (!Mismatch)
2015 return MatchingNum;
2016
2017 MatchingNum++;
2018 }
2019
2020 return std::nullopt;
2021}
2022
2023/// Remove empty output blocks from the outlined region.
2024///
2025/// \param BlocksToPrune - Mapping of return values output blocks for the \p
2026/// Region.
2027/// \param Region - The OutlinableRegion we are analyzing.
2028static bool
2031 bool AllRemoved = true;
2032 Value *RetValueForBB;
2033 BasicBlock *NewBB;
2035 // Iterate over the output blocks created in the outlined section.
2036 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
2037 RetValueForBB = VtoBB.first;
2038 NewBB = VtoBB.second;
2039
2040 // If there are no instructions, we remove it from the module, and also
2041 // mark the value for removal from the return value to output block mapping.
2042 if (NewBB->size() == 0) {
2043 NewBB->eraseFromParent();
2044 ToRemove.push_back(RetValueForBB);
2045 continue;
2046 }
2047
2048 // Mark that we could not remove all the blocks since they were not all
2049 // empty.
2050 AllRemoved = false;
2051 }
2052
2053 // Remove the return value from the mapping.
2054 for (Value *V : ToRemove)
2055 BlocksToPrune.erase(V);
2056
2057 // Mark the region as having the no output scheme.
2058 if (AllRemoved)
2059 Region.OutputBlockNum = -1;
2060
2061 return AllRemoved;
2062}
2063
2064/// For the outlined section, move needed the StoreInsts for the output
2065/// registers into their own block. Then, determine if there is a duplicate
2066/// output block already created.
2067///
2068/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2069/// \param [in] Region - The OutlinableRegion that is being analyzed.
2070/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2071/// placed in.
2072/// \param [in] EndBBs - the final blocks of the extracted function.
2073/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2074/// been replaced by a new output value.
2075/// \param [in,out] OutputStoreBBs - The existing output blocks.
2080 const DenseMap<Value *, Value *> &OutputMappings,
2081 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2082 // If none of the output blocks have any instructions, this means that we do
2083 // not have to determine if it matches any of the other output schemes, and we
2084 // don't have to do anything else.
2085 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2086 return;
2087
2088 // Determine is there is a duplicate set of blocks.
2089 std::optional<unsigned> MatchingBB =
2090 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2091
2092 // If there is, we remove the new output blocks. If it does not,
2093 // we add it to our list of sets of output blocks.
2094 if (MatchingBB) {
2095 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2096 << Region.ExtractedFunction << " to " << *MatchingBB);
2097
2098 Region.OutputBlockNum = *MatchingBB;
2099 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2100 VtoBB.second->eraseFromParent();
2101 return;
2102 }
2103
2104 Region.OutputBlockNum = OutputStoreBBs.size();
2105
2106 Value *RetValueForBB;
2107 BasicBlock *NewBB;
2108 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2109 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2110 RetValueForBB = VtoBB.first;
2111 NewBB = VtoBB.second;
2113 EndBBs.find(RetValueForBB);
2114 LLVM_DEBUG(dbgs() << "Create output block for region in"
2115 << Region.ExtractedFunction << " to "
2116 << *NewBB);
2117 BranchInst::Create(VBBIt->second, NewBB);
2118 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2119 }
2120}
2121
2122/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2123/// before creating a basic block for each \p NewMap, and inserting into the new
2124/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2125///
2126/// \param OldMap [in] - The mapping to base the new mapping off of.
2127/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2128/// \param ParentFunc [in] - The function to put the new basic block in.
2129/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2130/// by an index value.
2133 Function *ParentFunc, Twine BaseName) {
2134 unsigned Idx = 0;
2135 std::vector<Value *> SortedKeys;
2136
2137 getSortedConstantKeys(SortedKeys, OldMap);
2138
2139 for (Value *RetVal : SortedKeys) {
2141 ParentFunc->getContext(),
2142 Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
2143 ParentFunc);
2144 NewMap.insert(std::make_pair(RetVal, NewBB));
2145 }
2146}
2147
2148/// Create the switch statement for outlined function to differentiate between
2149/// all the output blocks.
2150///
2151/// For the outlined section, determine if an outlined block already exists that
2152/// matches the needed stores for the extracted section.
2153/// \param [in] M - The module we are outlining from.
2154/// \param [in] OG - The group of regions to be outlined.
2155/// \param [in] EndBBs - The final blocks of the extracted function.
2156/// \param [in,out] OutputStoreBBs - The existing output blocks.
2159 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2160 // We only need the switch statement if there is more than one store
2161 // combination, or there is more than one set of output blocks. The first
2162 // will occur when we store different sets of values for two different
2163 // regions. The second will occur when we have two outputs that are combined
2164 // in a PHINode outside of the region in one outlined instance, and are used
2165 // seaparately in another. This will create the same set of OutputGVNs, but
2166 // will generate two different output schemes.
2167 if (OG.OutputGVNCombinations.size() > 1) {
2168 Function *AggFunc = OG.OutlinedFunction;
2169 // Create a final block for each different return block.
2171 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2172
2173 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2174 std::pair<Value *, BasicBlock *> &OutputBlock =
2175 *OG.EndBBs.find(RetBlockPair.first);
2176 BasicBlock *ReturnBlock = RetBlockPair.second;
2177 BasicBlock *EndBB = OutputBlock.second;
2178 Instruction *Term = EndBB->getTerminator();
2179 // Move the return value to the final block instead of the original exit
2180 // stub.
2181 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2182 // Put the switch statement in the old end basic block for the function
2183 // with a fall through to the new return block.
2184 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2185 << OutputStoreBBs.size() << "\n");
2186 SwitchInst *SwitchI =
2187 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2188 ReturnBlock, OutputStoreBBs.size(), EndBB);
2189
2190 unsigned Idx = 0;
2191 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2193 OutputStoreBB.find(OutputBlock.first);
2194
2195 if (OSBBIt == OutputStoreBB.end())
2196 continue;
2197
2198 BasicBlock *BB = OSBBIt->second;
2199 SwitchI->addCase(
2200 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2201 Term = BB->getTerminator();
2202 Term->setSuccessor(0, ReturnBlock);
2203 Idx++;
2204 }
2205 }
2206 return;
2207 }
2208
2209 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2210
2211 // If there needs to be stores, move them from the output blocks to their
2212 // corresponding ending block. We do not check that the OutputGVNCombinations
2213 // is equal to 1 here since that could just been the case where there are 0
2214 // outputs. Instead, we check whether there is more than one set of output
2215 // blocks since this is the only case where we would have to move the
2216 // stores, and erase the extraneous blocks.
2217 if (OutputStoreBBs.size() == 1) {
2218 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2219 << *OG.OutlinedFunction << "\n");
2220 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2221 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2223 EndBBs.find(VBPair.first);
2224 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2225 BasicBlock *EndBB = EndBBIt->second;
2226 BasicBlock *OutputBB = VBPair.second;
2227 Instruction *Term = OutputBB->getTerminator();
2228 Term->eraseFromParent();
2229 Term = EndBB->getTerminator();
2230 moveBBContents(*OutputBB, *EndBB);
2231 Term->moveBefore(*EndBB, EndBB->end());
2232 OutputBB->eraseFromParent();
2233 }
2234 }
2235}
2236
2237/// Fill the new function that will serve as the replacement function for all of
2238/// the extracted regions of a certain structure from the first region in the
2239/// list of regions. Replace this first region's extracted function with the
2240/// new overall function.
2241///
2242/// \param [in] M - The module we are outlining from.
2243/// \param [in] CurrentGroup - The group of regions to be outlined.
2244/// \param [in,out] OutputStoreBBs - The output blocks for each different
2245/// set of stores needed for the different functions.
2246/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
2247/// once outlining is complete.
2248/// \param [in] OutputMappings - Extracted functions to erase from module
2249/// once outlining is complete.
2251 Module &M, OutlinableGroup &CurrentGroup,
2252 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2253 std::vector<Function *> &FuncsToRemove,
2254 const DenseMap<Value *, Value *> &OutputMappings) {
2255 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2256
2257 // Move first extracted function's instructions into new function.
2258 LLVM_DEBUG(dbgs() << "Move instructions from "
2259 << *CurrentOS->ExtractedFunction << " to instruction "
2260 << *CurrentGroup.OutlinedFunction << "\n");
2262 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2263
2264 // Transfer the attributes from the function to the new function.
2265 for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
2266 CurrentGroup.OutlinedFunction->addFnAttr(A);
2267
2268 // Create a new set of output blocks for the first extracted function.
2270 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2271 CurrentGroup.OutlinedFunction, "output_block_0");
2272 CurrentOS->OutputBlockNum = 0;
2273
2274 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2275 replaceConstants(*CurrentOS);
2276
2277 // We first identify if any output blocks are empty, if they are we remove
2278 // them. We then create a branch instruction to the basic block to the return
2279 // block for the function for each non empty output block.
2280 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2281 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2282 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2284 CurrentGroup.EndBBs.find(VToBB.first);
2285 BasicBlock *EndBB = VBBIt->second;
2286 BranchInst::Create(EndBB, VToBB.second);
2287 OutputStoreBBs.back().insert(VToBB);
2288 }
2289 }
2290
2291 // Replace the call to the extracted function with the outlined function.
2292 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2293
2294 // We only delete the extracted functions at the end since we may need to
2295 // reference instructions contained in them for mapping purposes.
2296 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2297}
2298
2299void IROutliner::deduplicateExtractedSections(
2300 Module &M, OutlinableGroup &CurrentGroup,
2301 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2302 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2303
2304 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2305
2306 OutlinableRegion *CurrentOS;
2307
2308 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
2309 OutputMappings);
2310
2311 std::vector<Value *> SortedKeys;
2312 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2313 CurrentOS = CurrentGroup.Regions[Idx];
2315 *CurrentOS->ExtractedFunction);
2316
2317 // Create a set of BasicBlocks, one for each return block, to hold the
2318 // needed store instructions.
2321 CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
2322 "output_block_" + Twine(static_cast<unsigned>(Idx)));
2323 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2324 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2325 CurrentGroup.EndBBs, OutputMappings,
2326 OutputStoreBBs);
2327
2328 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2329 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2330 }
2331
2332 // Create a switch statement to handle the different output schemes.
2333 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2334
2335 OutlinedFunctionNum++;
2336}
2337
2338/// Checks that the next instruction in the InstructionDataList matches the
2339/// next instruction in the module. If they do not, there could be the
2340/// possibility that extra code has been inserted, and we must ignore it.
2341///
2342/// \param ID - The IRInstructionData to check the next instruction of.
2343/// \returns true if the InstructionDataList and actual instruction match.
2345 // We check if there is a discrepancy between the InstructionDataList
2346 // and the actual next instruction in the module. If there is, it means
2347 // that an extra instruction was added, likely by the CodeExtractor.
2348
2349 // Since we do not have any similarity data about this particular
2350 // instruction, we cannot confidently outline it, and must discard this
2351 // candidate.
2352 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2353 Instruction *NextIDLInst = NextIDIt->Inst;
2354 Instruction *NextModuleInst = nullptr;
2355 if (!ID.Inst->isTerminator())
2356 NextModuleInst = ID.Inst->getNextNonDebugInstruction();
2357 else if (NextIDLInst != nullptr)
2358 NextModuleInst =
2359 &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
2360
2361 if (NextIDLInst && NextIDLInst != NextModuleInst)
2362 return false;
2363
2364 return true;
2365}
2366
2367bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2368 const OutlinableRegion &Region) {
2369 IRSimilarityCandidate *IRSC = Region.Candidate;
2370 unsigned StartIdx = IRSC->getStartIdx();
2371 unsigned EndIdx = IRSC->getEndIdx();
2372
2373 // A check to make sure that we are not about to attempt to outline something
2374 // that has already been outlined.
2375 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2376 if (Outlined.contains(Idx))
2377 return false;
2378
2379 // We check if the recorded instruction matches the actual next instruction,
2380 // if it does not, we fix it in the InstructionDataList.
2381 if (!Region.Candidate->backInstruction()->isTerminator()) {
2382 Instruction *NewEndInst =
2383 Region.Candidate->backInstruction()->getNextNonDebugInstruction();
2384 assert(NewEndInst && "Next instruction is a nullptr?");
2385 if (Region.Candidate->end()->Inst != NewEndInst) {
2386 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2387 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2388 IRInstructionData(*NewEndInst,
2389 InstructionClassifier.visit(*NewEndInst), *IDL);
2390
2391 // Insert the first IRInstructionData of the new region after the
2392 // last IRInstructionData of the IRSimilarityCandidate.
2393 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2394 }
2395 }
2396
2397 return none_of(*IRSC, [this](IRInstructionData &ID) {
2399 return true;
2400
2401 return !this->InstructionClassifier.visit(ID.Inst);
2402 });
2403}
2404
2405void IROutliner::pruneIncompatibleRegions(
2406 std::vector<IRSimilarityCandidate> &CandidateVec,
2407 OutlinableGroup &CurrentGroup) {
2408 bool PreviouslyOutlined;
2409
2410 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2411 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2412 const IRSimilarityCandidate &RHS) {
2413 return LHS.getStartIdx() < RHS.getStartIdx();
2414 });
2415
2416 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2417 // Since outlining a call and a branch instruction will be the same as only
2418 // outlinining a call instruction, we ignore it as a space saving.
2419 if (FirstCandidate.getLength() == 2) {
2420 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2421 isa<BranchInst>(FirstCandidate.back()->Inst))
2422 return;
2423 }
2424
2425 unsigned CurrentEndIdx = 0;
2426 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2427 PreviouslyOutlined = false;
2428 unsigned StartIdx = IRSC.getStartIdx();
2429 unsigned EndIdx = IRSC.getEndIdx();
2430 const Function &FnForCurrCand = *IRSC.getFunction();
2431
2432 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2433 if (Outlined.contains(Idx)) {
2434 PreviouslyOutlined = true;
2435 break;
2436 }
2437
2438 if (PreviouslyOutlined)
2439 continue;
2440
2441 // Check over the instructions, and if the basic block has its address
2442 // taken for use somewhere else, we do not outline that block.
2443 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2444 return ID.Inst->getParent()->hasAddressTaken();
2445 });
2446
2447 if (BBHasAddressTaken)
2448 continue;
2449
2450 if (FnForCurrCand.hasOptNone())
2451 continue;
2452
2453 if (FnForCurrCand.hasFnAttribute("nooutline")) {
2454 LLVM_DEBUG({
2455 dbgs() << "... Skipping function with nooutline attribute: "
2456 << FnForCurrCand.getName() << "\n";
2457 });
2458 continue;
2459 }
2460
2461 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2462 !OutlineFromLinkODRs)
2463 continue;
2464
2465 // Greedily prune out any regions that will overlap with already chosen
2466 // regions.
2467 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2468 continue;
2469
2470 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2472 return true;
2473
2474 return !this->InstructionClassifier.visit(ID.Inst);
2475 });
2476
2477 if (BadInst)
2478 continue;
2479
2480 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2481 OutlinableRegion(IRSC, CurrentGroup);
2482 CurrentGroup.Regions.push_back(OS);
2483
2484 CurrentEndIdx = EndIdx;
2485 }
2486}
2487
2489IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2490 InstructionCost RegionBenefit = 0;
2491 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2492 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2493 // We add the number of instructions in the region to the benefit as an
2494 // estimate as to how much will be removed.
2495 RegionBenefit += Region->getBenefit(TTI);
2496 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2497 << " saved instructions to overfall benefit.\n");
2498 }
2499
2500 return RegionBenefit;
2501}
2502
2503/// For the \p OutputCanon number passed in find the value represented by this
2504/// canonical number. If it is from a PHINode, we pick the first incoming
2505/// value and return that Value instead.
2506///
2507/// \param Region - The OutlinableRegion to get the Value from.
2508/// \param OutputCanon - The canonical number to find the Value from.
2509/// \returns The Value represented by a canonical number \p OutputCanon in \p
2510/// Region.
2512 unsigned OutputCanon) {
2513 OutlinableGroup &CurrentGroup = *Region.Parent;
2514 // If the value is greater than the value in the tracker, we have a
2515 // PHINode and will instead use one of the incoming values to find the
2516 // type.
2517 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2518 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2519 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2520 "Could not find GVN set for PHINode number!");
2521 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2522 OutputCanon = *It->second.second.begin();
2523 }
2524 std::optional<unsigned> OGVN =
2525 Region.Candidate->fromCanonicalNum(OutputCanon);
2526 assert(OGVN && "Could not find GVN for Canonical Number?");
2527 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2528 assert(OV && "Could not find value for GVN?");
2529 return *OV;
2530}
2531
2533IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2534 InstructionCost OverallCost = 0;
2535 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2536 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2537
2538 // Each output incurs a load after the call, so we add that to the cost.
2539 for (unsigned OutputCanon : Region->GVNStores) {
2540 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2541 InstructionCost LoadCost =
2542 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2544
2545 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2546 << " instructions to cost for output of type "
2547 << *V->getType() << "\n");
2548 OverallCost += LoadCost;
2549 }
2550 }
2551
2552 return OverallCost;
2553}
2554
2555/// Find the extra instructions needed to handle any output values for the
2556/// region.
2557///
2558/// \param [in] M - The Module to outline from.
2559/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2560/// \param [in] TTI - The TargetTransformInfo used to collect information for
2561/// new instruction costs.
2562/// \returns the additional cost to handle the outputs.
2564 OutlinableGroup &CurrentGroup,
2566 InstructionCost OutputCost = 0;
2567 unsigned NumOutputBranches = 0;
2568
2569 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2570 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2571 DenseSet<BasicBlock *> CandidateBlocks;
2572 Candidate.getBasicBlocks(CandidateBlocks);
2573
2574 // Count the number of different output branches that point to blocks outside
2575 // of the region.
2576 DenseSet<BasicBlock *> FoundBlocks;
2577 for (IRInstructionData &ID : Candidate) {
2578 if (!isa<BranchInst>(ID.Inst))
2579 continue;
2580
2581 for (Value *V : ID.OperVals) {
2582 BasicBlock *BB = static_cast<BasicBlock *>(V);
2583 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2584 NumOutputBranches++;
2585 }
2586 }
2587
2588 CurrentGroup.BranchesToOutside = NumOutputBranches;
2589
2590 for (const ArrayRef<unsigned> &OutputUse :
2591 CurrentGroup.OutputGVNCombinations) {
2592 for (unsigned OutputCanon : OutputUse) {
2593 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2594 InstructionCost StoreCost =
2595 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2597
2598 // An instruction cost is added for each store set that needs to occur for
2599 // various output combinations inside the function, plus a branch to
2600 // return to the exit block.
2601 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2602 << " instructions to cost for output of type "
2603 << *V->getType() << "\n");
2604 OutputCost += StoreCost * NumOutputBranches;
2605 }
2606
2607 InstructionCost BranchCost =
2609 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2610 << " a branch instruction\n");
2611 OutputCost += BranchCost * NumOutputBranches;
2612 }
2613
2614 // If there is more than one output scheme, we must have a comparison and
2615 // branch for each different item in the switch statement.
2616 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2617 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2618 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2621 InstructionCost BranchCost =
2623
2624 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2625 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2626
2627 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2628 << " instructions for each switch case for each different"
2629 << " output path in a function\n");
2630 OutputCost += TotalCost * NumOutputBranches;
2631 }
2632
2633 return OutputCost;
2634}
2635
2636void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2637 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2638 CurrentGroup.Benefit += RegionBenefit;
2639 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2640
2641 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2642 CurrentGroup.Cost += OutputReloadCost;
2643 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2644
2645 InstructionCost AverageRegionBenefit =
2646 RegionBenefit / CurrentGroup.Regions.size();
2647 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2648 unsigned NumRegions = CurrentGroup.Regions.size();
2650 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2651
2652 // We add one region to the cost once, to account for the instructions added
2653 // inside of the newly created function.
2654 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2655 << " instructions to cost for body of new function.\n");
2656 CurrentGroup.Cost += AverageRegionBenefit;
2657 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2658
2659 // For each argument, we must add an instruction for loading the argument
2660 // out of the register and into a value inside of the newly outlined function.
2661 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2662 << " instructions to cost for each argument in the new"
2663 << " function.\n");
2664 CurrentGroup.Cost +=
2665 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2666 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2667
2668 // Each argument needs to either be loaded into a register or onto the stack.
2669 // Some arguments will only be loaded into the stack once the argument
2670 // registers are filled.
2671 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2672 << " instructions to cost for each argument in the new"
2673 << " function " << NumRegions << " times for the "
2674 << "needed argument handling at the call site.\n");
2675 CurrentGroup.Cost +=
2676 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2677 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2678
2679 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2680 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2681}
2682
2683void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2684 ArrayRef<Value *> Outputs,
2685 LoadInst *LI) {
2686 // For and load instructions following the call
2687 Value *Operand = LI->getPointerOperand();
2688 std::optional<unsigned> OutputIdx;
2689 // Find if the operand it is an output register.
2690 for (unsigned ArgIdx = Region.NumExtractedInputs;
2691 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2692 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2693 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2694 break;
2695 }
2696 }
2697
2698 // If we found an output register, place a mapping of the new value
2699 // to the original in the mapping.
2700 if (!OutputIdx)
2701 return;
2702
2703 if (OutputMappings.find(Outputs[*OutputIdx]) == OutputMappings.end()) {
2704 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2705 << *Outputs[*OutputIdx] << "\n");
2706 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2707 } else {
2708 Value *Orig = OutputMappings.find(Outputs[*OutputIdx])->second;
2709 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2710 << *Outputs[*OutputIdx] << "\n");
2711 OutputMappings.insert(std::make_pair(LI, Orig));
2712 }
2713}
2714
2715bool IROutliner::extractSection(OutlinableRegion &Region) {
2716 SetVector<Value *> ArgInputs, Outputs, SinkCands;
2717 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2718 BasicBlock *InitialStart = Region.StartBB;
2719 Function *OrigF = Region.StartBB->getParent();
2720 CodeExtractorAnalysisCache CEAC(*OrigF);
2721 Region.ExtractedFunction =
2722 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2723
2724 // If the extraction was successful, find the BasicBlock, and reassign the
2725 // OutlinableRegion blocks
2726 if (!Region.ExtractedFunction) {
2727 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2728 << "\n");
2729 Region.reattachCandidate();
2730 return false;
2731 }
2732
2733 // Get the block containing the called branch, and reassign the blocks as
2734 // necessary. If the original block still exists, it is because we ended on
2735 // a branch instruction, and so we move the contents into the block before
2736 // and assign the previous block correctly.
2737 User *InstAsUser = Region.ExtractedFunction->user_back();
2738 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2739 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2740 assert(Region.PrevBB && "PrevBB is nullptr?");
2741 if (Region.PrevBB == InitialStart) {
2742 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2743 Instruction *BI = NewPrev->getTerminator();
2744 BI->eraseFromParent();
2745 moveBBContents(*InitialStart, *NewPrev);
2746 Region.PrevBB = NewPrev;
2747 InitialStart->eraseFromParent();
2748 }
2749
2750 Region.StartBB = RewrittenBB;
2751 Region.EndBB = RewrittenBB;
2752
2753 // The sequences of outlinable regions has now changed. We must fix the
2754 // IRInstructionDataList for consistency. Although they may not be illegal
2755 // instructions, they should not be compared with anything else as they
2756 // should not be outlined in this round. So marking these as illegal is
2757 // allowed.
2758 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2759 Instruction *BeginRewritten = &*RewrittenBB->begin();
2760 Instruction *EndRewritten = &*RewrittenBB->begin();
2761 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2762 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2763 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2764 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2765
2766 // Insert the first IRInstructionData of the new region in front of the
2767 // first IRInstructionData of the IRSimilarityCandidate.
2768 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2769 // Insert the first IRInstructionData of the new region after the
2770 // last IRInstructionData of the IRSimilarityCandidate.
2771 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2772 // Remove the IRInstructionData from the IRSimilarityCandidate.
2773 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2774
2775 assert(RewrittenBB != nullptr &&
2776 "Could not find a predecessor after extraction!");
2777
2778 // Iterate over the new set of instructions to find the new call
2779 // instruction.
2780 for (Instruction &I : *RewrittenBB)
2781 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2782 if (Region.ExtractedFunction == CI->getCalledFunction())
2783 Region.Call = CI;
2784 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2785 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2786 Region.reattachCandidate();
2787 return true;
2788}
2789
2790unsigned IROutliner::doOutline(Module &M) {
2791 // Find the possible similarity sections.
2792 InstructionClassifier.EnableBranches = !DisableBranches;
2793 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2794 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2795
2796 IRSimilarityIdentifier &Identifier = getIRSI(M);
2797 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2798
2799 // Sort them by size of extracted sections
2800 unsigned OutlinedFunctionNum = 0;
2801 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2802 // to sort them by the potential number of instructions to be outlined
2803 if (SimilarityCandidates.size() > 1)
2804 llvm::stable_sort(SimilarityCandidates,
2805 [](const std::vector<IRSimilarityCandidate> &LHS,
2806 const std::vector<IRSimilarityCandidate> &RHS) {
2807 return LHS[0].getLength() * LHS.size() >
2808 RHS[0].getLength() * RHS.size();
2809 });
2810 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2811 // each of the following for loops to avoid making an allocator.
2812 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2813
2814 DenseSet<unsigned> NotSame;
2815 std::vector<OutlinableGroup *> NegativeCostGroups;
2816 std::vector<OutlinableRegion *> OutlinedRegions;
2817 // Iterate over the possible sets of similarity.
2818 unsigned PotentialGroupIdx = 0;
2819 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2820 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2821
2822 // Remove entries that were previously outlined
2823 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2824
2825 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2826 // trying to outlined since there is no compatible similar instance of this
2827 // code.
2828 if (CurrentGroup.Regions.size() < 2)
2829 continue;
2830
2831 // Determine if there are any values that are the same constant throughout
2832 // each section in the set.
2833 NotSame.clear();
2834 CurrentGroup.findSameConstants(NotSame);
2835
2836 if (CurrentGroup.IgnoreGroup)
2837 continue;
2838
2839 // Create a CodeExtractor for each outlinable region. Identify inputs and
2840 // outputs for each section using the code extractor and create the argument
2841 // types for the Aggregate Outlining Function.
2842 OutlinedRegions.clear();
2843 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2844 // Break the outlinable region out of its parent BasicBlock into its own
2845 // BasicBlocks (see function implementation).
2846 OS->splitCandidate();
2847
2848 // There's a chance that when the region is split, extra instructions are
2849 // added to the region. This makes the region no longer viable
2850 // to be split, so we ignore it for outlining.
2851 if (!OS->CandidateSplit)
2852 continue;
2853
2855 DenseSet<BasicBlock *> BlocksInRegion;
2856 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2857 OS->CE = new (ExtractorAllocator.Allocate())
2858 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2859 false, nullptr, "outlined");
2860 findAddInputsOutputs(M, *OS, NotSame);
2861 if (!OS->IgnoreRegion)
2862 OutlinedRegions.push_back(OS);
2863
2864 // We recombine the blocks together now that we have gathered all the
2865 // needed information.
2866 OS->reattachCandidate();
2867 }
2868
2869 CurrentGroup.Regions = std::move(OutlinedRegions);
2870
2871 if (CurrentGroup.Regions.empty())
2872 continue;
2873
2874 CurrentGroup.collectGVNStoreSets(M);
2875
2876 if (CostModel)
2877 findCostBenefit(M, CurrentGroup);
2878
2879 // If we are adhering to the cost model, skip those groups where the cost
2880 // outweighs the benefits.
2881 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2883 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2884 ORE.emit([&]() {
2885 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2886 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2887 C->frontInstruction());
2888 R << "did not outline "
2889 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2890 << " regions due to estimated increase of "
2891 << ore::NV("InstructionIncrease",
2892 CurrentGroup.Cost - CurrentGroup.Benefit)
2893 << " instructions at locations ";
2894 interleave(
2895 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2896 [&R](OutlinableRegion *Region) {
2897 R << ore::NV(
2898 "DebugLoc",
2899 Region->Candidate->frontInstruction()->getDebugLoc());
2900 },
2901 [&R]() { R << " "; });
2902 return R;
2903 });
2904 continue;
2905 }
2906
2907 NegativeCostGroups.push_back(&CurrentGroup);
2908 }
2909
2910 ExtractorAllocator.DestroyAll();
2911
2912 if (NegativeCostGroups.size() > 1)
2913 stable_sort(NegativeCostGroups,
2914 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2915 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2916 });
2917
2918 std::vector<Function *> FuncsToRemove;
2919 for (OutlinableGroup *CG : NegativeCostGroups) {
2920 OutlinableGroup &CurrentGroup = *CG;
2921
2922 OutlinedRegions.clear();
2923 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2924 // We check whether our region is compatible with what has already been
2925 // outlined, and whether we need to ignore this item.
2926 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2927 continue;
2928 OutlinedRegions.push_back(Region);
2929 }
2930
2931 if (OutlinedRegions.size() < 2)
2932 continue;
2933
2934 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2935 // we are still outlining enough regions to make up for the added cost.
2936 CurrentGroup.Regions = std::move(OutlinedRegions);
2937 if (CostModel) {
2938 CurrentGroup.Benefit = 0;
2939 CurrentGroup.Cost = 0;
2940 findCostBenefit(M, CurrentGroup);
2941 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2942 continue;
2943 }
2944 OutlinedRegions.clear();
2945 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2946 Region->splitCandidate();
2947 if (!Region->CandidateSplit)
2948 continue;
2949 OutlinedRegions.push_back(Region);
2950 }
2951
2952 CurrentGroup.Regions = std::move(OutlinedRegions);
2953 if (CurrentGroup.Regions.size() < 2) {
2954 for (OutlinableRegion *R : CurrentGroup.Regions)
2955 R->reattachCandidate();
2956 continue;
2957 }
2958
2959 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2960 << " and benefit " << CurrentGroup.Benefit << "\n");
2961
2962 // Create functions out of all the sections, and mark them as outlined.
2963 OutlinedRegions.clear();
2964 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2966 DenseSet<BasicBlock *> BlocksInRegion;
2967 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2968 OS->CE = new (ExtractorAllocator.Allocate())
2969 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2970 false, nullptr, "outlined");
2971 bool FunctionOutlined = extractSection(*OS);
2972 if (FunctionOutlined) {
2973 unsigned StartIdx = OS->Candidate->getStartIdx();
2974 unsigned EndIdx = OS->Candidate->getEndIdx();
2975 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2976 Outlined.insert(Idx);
2977
2978 OutlinedRegions.push_back(OS);
2979 }
2980 }
2981
2982 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2983 << " with benefit " << CurrentGroup.Benefit
2984 << " and cost " << CurrentGroup.Cost << "\n");
2985
2986 CurrentGroup.Regions = std::move(OutlinedRegions);
2987
2988 if (CurrentGroup.Regions.empty())
2989 continue;
2990
2992 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2993 ORE.emit([&]() {
2994 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2995 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2996 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2997 << " regions with decrease of "
2998 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2999 << " instructions at locations ";
3000 interleave(
3001 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
3002 [&R](OutlinableRegion *Region) {
3003 R << ore::NV("DebugLoc",
3004 Region->Candidate->frontInstruction()->getDebugLoc());
3005 },
3006 [&R]() { R << " "; });
3007 return R;
3008 });
3009
3010 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
3011 OutlinedFunctionNum);
3012 }
3013
3014 for (Function *F : FuncsToRemove)
3015 F->eraseFromParent();
3016
3017 return OutlinedFunctionNum;
3018}
3019
3021 CostModel = !NoCostModel;
3022 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
3023
3024 return doOutline(M) > 0;
3025}
3026
3027// Pass Manager Boilerplate
3028namespace {
3029class IROutlinerLegacyPass : public ModulePass {
3030public:
3031 static char ID;
3032 IROutlinerLegacyPass() : ModulePass(ID) {
3034 }
3035
3036 void getAnalysisUsage(AnalysisUsage &AU) const override {
3040 }
3041
3042 bool runOnModule(Module &M) override;
3043};
3044} // namespace
3045
3046bool IROutlinerLegacyPass::runOnModule(Module &M) {
3047 if (skipModule(M))
3048 return false;
3049
3050 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3051 auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3052 ORE.reset(new OptimizationRemarkEmitter(&F));
3053 return *ORE;
3054 };
3055
3056 auto GTTI = [this](Function &F) -> TargetTransformInfo & {
3057 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3058 };
3059
3060 auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
3061 return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
3062 };
3063
3064 return IROutliner(GTTI, GIRSI, GORE).run(M);
3065}
3066
3068 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3069
3070 std::function<TargetTransformInfo &(Function &)> GTTI =
3071 [&FAM](Function &F) -> TargetTransformInfo & {
3073 };
3074
3075 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
3076 [&AM](Module &M) -> IRSimilarityIdentifier & {
3077 return AM.getResult<IRSimilarityAnalysis>(M);
3078 };
3079
3080 std::unique_ptr<OptimizationRemarkEmitter> ORE;
3081 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
3082 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
3083 ORE.reset(new OptimizationRemarkEmitter(&F));
3084 return *ORE;
3085 };
3086
3087 if (IROutliner(GTTI, GIRSI, GORE).run(M))
3088 return PreservedAnalyses::none();
3089 return PreservedAnalyses::all();
3090}
3091
3092char IROutlinerLegacyPass::ID = 0;
3093INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
3094 false)
3098INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
3099 false)
3100
3101ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
ReachingDefAnalysis InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1260
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
Definition: IROutliner.cpp:837
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
Definition: IROutliner.cpp:158
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
Definition: IROutliner.cpp:713
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
Definition: IROutliner.cpp:867
static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs, std::vector< Function * > &FuncsToRemove, const DenseMap< Value *, Value * > &OutputMappings)
Fill the new function that will serve as the replacement function for all of the extracted regions of...
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
Definition: IROutliner.cpp:554
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
Definition: IROutliner.cpp:168
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
Definition: IROutliner.cpp:225
IR Outliner
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
Definition: IROutliner.cpp:938
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
Definition: IROutliner.cpp:812
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
iroutliner
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
Definition: IROutliner.cpp:625
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
Definition: IROutliner.cpp:535
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
Definition: IROutliner.cpp:468
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the the constants that will need to be lifted into arguments as they are not the same in each in...
Definition: IROutliner.cpp:782
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
Statically lint checks LLVM IR
Definition: Lint.cpp:746
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
AttributeSet getFnAttrs() const
The function attributes are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:498
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:372
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:103
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:208
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:306
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:401
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:322
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:284
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:132
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
size_t size() const
Definition: BasicBlock.h:324
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:310
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:243
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1707
This is an important base class in LLVM.
Definition: Constant.h:41
Debug location.
Subprogram description.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Class to represent function types.
Definition: DerivedTypes.h:103
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:550
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:136
const BasicBlock & getEntryBlock() const
Definition: Function.h:735
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:174
const BasicBlock & back() const
Definition: Function.h:760
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:313
size_t size() const
Definition: Function.h:756
bool hasOptNone() const
Do not optimize this function (-O0).
Definition: Function.h:638
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:315
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:578
size_t arg_size() const
Definition: Function.h:799
Argument * getArg(unsigned i) const
Definition: Function.h:784
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:640
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:514
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition: IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:933
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:88
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
bool isTerminator() const
Definition: Instruction.h:171
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
SymbolTableList< Instruction >::iterator insertInto(BasicBlock *ParentBB, SymbolTableList< Instruction >::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Definition: Instruction.cpp:98
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition: Mangler.cpp:119
Root of the metadata hierarchy.
Definition: Metadata.h:61
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
iterator begin()
Definition: RegionInfo.h:560
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
iterator end()
Definition: RegionInfo.h:561
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition: SetVector.h:40
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:208
ArrayRef< T > getArrayRef() const
Definition: SetVector.h:63
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore=nullptr)
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:222
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:540
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:1015
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
bool erase(const ValueT &V)
Definition: DenseSet.h:101
An opaque object representing a hash code.
Definition: Hashing.h:74
self_iterator getIterator()
Definition: ilist_node.h:82
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
iterator erase(iterator I)
Remove a node by iterator; never deletes.
Definition: simple_ilist.h:194
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
Definition: simple_ilist.h:157
void mergeAttributesForOutlining(Function &Base, const Function &ToMerge)
Merges the functions attributes from ToMerge into function Base.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1948
hash_code hash_value(const FixedPointSemantics &Val)
Definition: APFixedPoint.h:128
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition: STLExtras.h:2060
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:44
void initializeIROutlinerLegacyPassPass(PassRegistry &)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:40
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:48
auto predecessors(const MachineBasicBlock *BB)
ModulePass * createIROutlinerPass()
createIROutlinerPass - This pass finds similar code regions and factors those regions out into functi...
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:608
void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
Definition: DebugInfo.cpp:389
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:486
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
Definition: IROutliner.cpp:74
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
Definition: IROutliner.cpp:137
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
Definition: IROutliner.cpp:80
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
Definition: IROutliner.cpp:100
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
Definition: IROutliner.cpp:603
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
Definition: IROutliner.cpp:133
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
Definition: IROutliner.cpp:125
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
Definition: IROutliner.cpp:121
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
Definition: IROutliner.cpp:76
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
Definition: IROutliner.cpp:126
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
Definition: IROutliner.cpp:88
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
Definition: IROutliner.cpp:91
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Definition: IROutliner.cpp:116
Function * OutlinedFunction
The Function for the collective overall function.
Definition: IROutliner.cpp:84
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
Definition: IROutliner.cpp:104
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
Definition: IROutliner.cpp:95
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
Definition: IROutliner.cpp:82
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
Definition: IROutliner.cpp:112
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
Definition: IROutliner.cpp:610
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
Definition: IROutliner.cpp:130
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
Definition: IROutliner.cpp:108
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition: IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition: IROutliner.h:123
CodeExtractor * CE
Used to create an outlined function.
Definition: IROutliner.h:120
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
Definition: IROutliner.cpp:491
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition: IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition: IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition: IROutliner.h:130
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition: IROutliner.h:133
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Definition: IROutliner.cpp:253
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
Definition: IROutliner.cpp:191
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
Definition: IROutliner.cpp:203
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition: IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition: IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition: IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition: IROutliner.h:93
Function * ExtractedFunction
The function for the extracted region.
Definition: IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition: IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition: IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...
Definition: IROutliner.cpp:380