LLVM 23.0.0git
IROutliner.cpp
Go to the documentation of this file.
1//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10// Implementation for the IROutliner which is used by the IROutliner Pass.
11//
12//===----------------------------------------------------------------------===//
13
18#include "llvm/IR/Attributes.h"
19#include "llvm/IR/DIBuilder.h"
20#include "llvm/IR/DebugInfo.h"
22#include "llvm/IR/Dominators.h"
23#include "llvm/IR/Mangler.h"
24#include "llvm/IR/PassManager.h"
26#include "llvm/Transforms/IPO.h"
28#include <optional>
29#include <vector>
30
31#define DEBUG_TYPE "iroutliner"
32
33using namespace llvm;
34using namespace IRSimilarity;
35
36// A command flag to be used for debugging to exclude branches from similarity
37// matching and outlining.
38namespace llvm {
40
41// A command flag to be used for debugging to indirect calls from similarity
42// matching and outlining.
44
45// A command flag to be used for debugging to exclude intrinsics from similarity
46// matching and outlining.
48
49} // namespace llvm
50
51// Set to true if the user wants the ir outliner to run on linkonceodr linkage
52// functions. This is false by default because the linker can dedupe linkonceodr
53// functions. Since the outliner is confined to a single module (modulo LTO),
54// this is off by default. It should, however, be the default behavior in
55// LTO.
57 "enable-linkonceodr-ir-outlining", cl::Hidden,
58 cl::desc("Enable the IR outliner on linkonceodr functions"),
59 cl::init(false));
60
61// This is a debug option to test small pieces of code to ensure that outlining
62// works correctly.
64 "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden,
65 cl::desc("Debug option to outline greedily, without restriction that "
66 "calculated benefit outweighs cost"));
67
68/// The OutlinableGroup holds all the overarching information for outlining
69/// a set of regions that are structurally similar to one another, such as the
70/// types of the overall function, the output blocks, the sets of stores needed
71/// and a list of the different regions. This information is used in the
72/// deduplication of extracted regions with the same structure.
74 /// The sections that could be outlined
75 std::vector<OutlinableRegion *> Regions;
76
77 /// The argument types for the function created as the overall function to
78 /// replace the extracted function for each region.
79 std::vector<Type *> ArgumentTypes;
80 /// The FunctionType for the overall function.
82 /// The Function for the collective overall function.
84
85 /// Flag for whether we should not consider this group of OutlinableRegions
86 /// for extraction.
87 bool IgnoreGroup = false;
88
89 /// The return blocks for the overall function.
91
92 /// The PHIBlocks with their corresponding return block based on the return
93 /// value as the key.
95
96 /// A set containing the different GVN store sets needed. Each array contains
97 /// a sorted list of the different values that need to be stored into output
98 /// registers.
100
101 /// Flag for whether the \ref ArgumentTypes have been defined after the
102 /// extraction of the first region.
103 bool InputTypesSet = false;
104
105 /// The number of input values in \ref ArgumentTypes. Anything after this
106 /// index in ArgumentTypes is an output argument.
107 unsigned NumAggregateInputs = 0;
108
109 /// The mapping of the canonical numbering of the values in outlined sections
110 /// to specific arguments.
112
113 /// The number of branches in the region target a basic block that is outside
114 /// of the region.
115 unsigned BranchesToOutside = 0;
116
117 /// Tracker counting backwards from the highest unsigned value possible to
118 /// avoid conflicting with the GVNs of assigned values. We start at -3 since
119 /// -2 and -1 are assigned by the DenseMap.
120 unsigned PHINodeGVNTracker = -3;
121
123 std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
126
127 /// The number of instructions that will be outlined by extracting \ref
128 /// Regions.
130 /// The number of added instructions needed for the outlining of the \ref
131 /// Regions.
133
134 /// The argument that needs to be marked with the swifterr attribute. If not
135 /// needed, there is no value.
136 std::optional<unsigned> SwiftErrorArgument;
137
138 /// For the \ref Regions, we look at every Value. If it is a constant,
139 /// we check whether it is the same in Region.
140 ///
141 /// \param [in,out] NotSame contains the global value numbers where the
142 /// constant is not always the same, and must be passed in as an argument.
144
145 /// For the regions, look at each set of GVN stores needed and account for
146 /// each combination. Add an argument to the argument types if there is
147 /// more than one combination.
148 ///
149 /// \param [in] M - The module we are outlining from.
151};
152
153/// Move the contents of \p SourceBB to before the last instruction of \p
154/// TargetBB.
155/// \param SourceBB - the BasicBlock to pull Instructions from.
156/// \param TargetBB - the BasicBlock to put Instruction into.
157static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
158 TargetBB.splice(TargetBB.end(), &SourceBB);
159}
160
161/// A function to sort the keys of \p Map, which must be a mapping of constant
162/// values to basic blocks and return it in \p SortedKeys
163///
164/// \param SortedKeys - The vector the keys will be return in and sorted.
165/// \param Map - The DenseMap containing keys to sort.
166static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
168 for (auto &VtoBB : Map)
169 SortedKeys.push_back(VtoBB.first);
170
171 // Here we expect to have either 1 value that is void (nullptr) or multiple
172 // values that are all constant integers.
173 if (SortedKeys.size() == 1) {
174 assert(!SortedKeys[0] && "Expected a single void value.");
175 return;
176 }
177
178 stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
179 assert(LHS && RHS && "Expected non void values.");
180 const ConstantInt *LHSC = cast<ConstantInt>(LHS);
181 const ConstantInt *RHSC = cast<ConstantInt>(RHS);
182
183 return LHSC->getLimitedValue() < RHSC->getLimitedValue();
184 });
185}
186
188 Value *V) {
189 std::optional<unsigned> GVN = Candidate->getGVN(V);
190 assert(GVN && "No GVN for incoming value");
191 std::optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
192 std::optional<unsigned> FirstGVN =
193 Other.Candidate->fromCanonicalNum(*CanonNum);
194 std::optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
195 return FoundValueOpt.value_or(nullptr);
196}
197
200 BasicBlock *BB) {
201 Instruction *FirstNonPHI = &*BB->getFirstNonPHIOrDbg();
202 assert(FirstNonPHI && "block is empty?");
203 Value *CorrespondingVal = findCorrespondingValueIn(Other, FirstNonPHI);
204 if (!CorrespondingVal)
205 return nullptr;
206 BasicBlock *CorrespondingBlock =
207 cast<Instruction>(CorrespondingVal)->getParent();
208 return CorrespondingBlock;
209}
210
211/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
212/// in \p Included to branch to BasicBlock \p Replace if they currently branch
213/// to the BasicBlock \p Find. This is used to fix up the incoming basic blocks
214/// when PHINodes are included in outlined regions.
215///
216/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
217/// checked.
218/// \param Find - The successor block to be replaced.
219/// \param Replace - The new succesor block to branch to.
220/// \param Included - The set of blocks about to be outlined.
223 DenseSet<BasicBlock *> &Included) {
224 for (PHINode &PN : PHIBlock->phis())
225 for (BasicBlock *Incoming : PN.blocks())
226 // Check if the incoming block is included in the set of blocks being
227 // outlined.
228 if (Included.contains(Incoming))
230}
231
232
234 assert(!CandidateSplit && "Candidate already split!");
235
236 Instruction *BackInst = Candidate->backInstruction();
237
238 Instruction *EndInst = nullptr;
239 // Check whether the last instruction is a terminator, if it is, we do
240 // not split on the following instruction. We leave the block as it is. We
241 // also check that this is not the last instruction in the Module, otherwise
242 // the check for whether the current following instruction matches the
243 // previously recorded instruction will be incorrect.
244 if (!BackInst->isTerminator() ||
245 BackInst->getParent() != &BackInst->getFunction()->back()) {
246 EndInst = Candidate->end()->Inst;
247 assert(EndInst && "Expected an end instruction?");
248 }
249
250 // We check if the current instruction following the last instruction in the
251 // region is the same as the recorded instruction following the last
252 // instruction. If they do not match, there could be problems in rewriting
253 // the program after outlining, so we ignore it.
254 if (!BackInst->isTerminator() && EndInst != BackInst->getNextNode())
255 return;
256
257 Instruction *StartInst = (*Candidate->begin()).Inst;
258 assert(StartInst && "Expected a start instruction?");
259 StartBB = StartInst->getParent();
260 PrevBB = StartBB;
261
263 Candidate->getBasicBlocks(BBSet);
264
265 // We iterate over the instructions in the region, if we find a PHINode, we
266 // check if there are predecessors outside of the region, if there are,
267 // we ignore this region since we are unable to handle the severing of the
268 // phi node right now.
269
270 // TODO: Handle extraneous inputs for PHINodes through variable number of
271 // inputs, similar to how outputs are handled.
272 BasicBlock::iterator It = StartInst->getIterator();
273 EndBB = BackInst->getParent();
274 BasicBlock *IBlock;
275 BasicBlock *PHIPredBlock = nullptr;
276 bool EndBBTermAndBackInstDifferent = EndBB->getTerminator() != BackInst;
277 while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
278 unsigned NumPredsOutsideRegion = 0;
279 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
280 if (!BBSet.contains(PN->getIncomingBlock(i))) {
281 PHIPredBlock = PN->getIncomingBlock(i);
282 ++NumPredsOutsideRegion;
283 continue;
284 }
285
286 // We must consider the case there the incoming block to the PHINode is
287 // the same as the final block of the OutlinableRegion. If this is the
288 // case, the branch from this block must also be outlined to be valid.
289 IBlock = PN->getIncomingBlock(i);
290 if (IBlock == EndBB && EndBBTermAndBackInstDifferent) {
291 PHIPredBlock = PN->getIncomingBlock(i);
292 ++NumPredsOutsideRegion;
293 }
294 }
295
296 if (NumPredsOutsideRegion > 1)
297 return;
298
299 It++;
300 }
301
302 // If the region starts with a PHINode, but is not the initial instruction of
303 // the BasicBlock, we ignore this region for now.
304 if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
305 return;
306
307 // If the region ends with a PHINode, but does not contain all of the phi node
308 // instructions of the region, we ignore it for now.
309 if (isa<PHINode>(BackInst) &&
310 BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
311 return;
312
313 // The basic block gets split like so:
314 // block: block:
315 // inst1 inst1
316 // inst2 inst2
317 // region1 br block_to_outline
318 // region2 block_to_outline:
319 // region3 -> region1
320 // region4 region2
321 // inst3 region3
322 // inst4 region4
323 // br block_after_outline
324 // block_after_outline:
325 // inst3
326 // inst4
327
328 std::string OriginalName = PrevBB->getName().str();
329
330 StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
331 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
332 // If there was a PHINode with an incoming block outside the region,
333 // make sure is correctly updated in the newly split block.
334 if (PHIPredBlock)
335 PrevBB->replaceSuccessorsPhiUsesWith(PHIPredBlock, PrevBB);
336
337 CandidateSplit = true;
338 if (!BackInst->isTerminator()) {
339 EndBB = EndInst->getParent();
340 FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
341 EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
342 FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
343 } else {
344 EndBB = BackInst->getParent();
345 EndsInBranch = true;
346 FollowBB = nullptr;
347 }
348
349 // Refind the basic block set.
350 BBSet.clear();
351 Candidate->getBasicBlocks(BBSet);
352 // For the phi nodes in the new starting basic block of the region, we
353 // reassign the targets of the basic blocks branching instructions.
355 if (FollowBB)
357}
358
360 assert(CandidateSplit && "Candidate is not split!");
361
362 // The basic block gets reattached like so:
363 // block: block:
364 // inst1 inst1
365 // inst2 inst2
366 // br block_to_outline region1
367 // block_to_outline: -> region2
368 // region1 region3
369 // region2 region4
370 // region3 inst3
371 // region4 inst4
372 // br block_after_outline
373 // block_after_outline:
374 // inst3
375 // inst4
376 assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
377
378 assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
379 // Make sure PHINode references to the block we are merging into are
380 // updated to be incoming blocks from the predecessor to the current block.
381
382 // NOTE: If this is updated such that the outlined block can have more than
383 // one incoming block to a PHINode, this logic will have to updated
384 // to handle multiple precessors instead.
385
386 // We only need to update this if the outlined section contains a PHINode, if
387 // it does not, then the incoming block was never changed in the first place.
388 // On the other hand, if PrevBB has no predecessors, it means that all
389 // incoming blocks to the first block are contained in the region, and there
390 // will be nothing to update.
391 Instruction *StartInst = (*Candidate->begin()).Inst;
392 if (isa<PHINode>(StartInst) && !PrevBB->hasNPredecessors(0)) {
393 assert(!PrevBB->hasNPredecessorsOrMore(2) &&
394 "PrevBB has more than one predecessor. Should be 0 or 1.");
395 BasicBlock *BeforePrevBB = PrevBB->getSinglePredecessor();
396 PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, BeforePrevBB);
397 }
398 PrevBB->getTerminator()->eraseFromParent();
399
400 // If we reattaching after outlining, we iterate over the phi nodes to
401 // the initial block, and reassign the branch instructions of the incoming
402 // blocks to the block we are remerging into.
403 if (!ExtractedFunction) {
405 Candidate->getBasicBlocks(BBSet);
406
408 if (!EndsInBranch)
410 }
411
413
414 BasicBlock *PlacementBB = PrevBB;
415 if (StartBB != EndBB)
416 PlacementBB = EndBB;
417 if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
418 assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
419 assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
420 PlacementBB->getTerminator()->eraseFromParent();
421 moveBBContents(*FollowBB, *PlacementBB);
422 PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
423 FollowBB->eraseFromParent();
424 }
425
426 PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
427 StartBB->eraseFromParent();
428
429 // Make sure to save changes back to the StartBB.
430 StartBB = PrevBB;
431 EndBB = nullptr;
432 PrevBB = nullptr;
433 FollowBB = nullptr;
434
435 CandidateSplit = false;
436}
437
438/// Find whether \p V matches the Constants previously found for the \p GVN.
439///
440/// \param V - The value to check for consistency.
441/// \param GVN - The global value number assigned to \p V.
442/// \param GVNToConstant - The mapping of global value number to Constants.
443/// \returns true if the Value matches the Constant mapped to by V and false if
444/// it \p V is a Constant but does not match.
445/// \returns std::nullopt if \p V is not a Constant.
446static std::optional<bool>
447constantMatches(Value *V, unsigned GVN,
448 DenseMap<unsigned, Constant *> &GVNToConstant) {
449 // See if we have a constants
451 if (!CST)
452 return std::nullopt;
453
454 // Holds a mapping from a global value number to a Constant.
456 bool Inserted;
457
458
459 // If we have a constant, try to make a new entry in the GVNToConstant.
460 std::tie(GVNToConstantIt, Inserted) =
461 GVNToConstant.insert(std::make_pair(GVN, CST));
462 // If it was found and is not equal, it is not the same. We do not
463 // handle this case yet, and exit early.
464 if (Inserted || (GVNToConstantIt->second == CST))
465 return true;
466
467 return false;
468}
469
471 InstructionCost Benefit = 0;
472
473 // Estimate the benefit of outlining a specific sections of the program. We
474 // delegate mostly this task to the TargetTransformInfo so that if the target
475 // has specific changes, we can have a more accurate estimate.
476
477 // However, getInstructionCost delegates the code size calculation for
478 // arithmetic instructions to getArithmeticInstrCost in
479 // include/Analysis/TargetTransformImpl.h, where it always estimates that the
480 // code size for a division and remainder instruction to be equal to 4, and
481 // everything else to 1. This is not an accurate representation of the
482 // division instruction for targets that have a native division instruction.
483 // To be overly conservative, we only add 1 to the number of instructions for
484 // each division instruction.
485 for (IRInstructionData &ID : *Candidate) {
486 Instruction *I = ID.Inst;
487 switch (I->getOpcode()) {
488 case Instruction::FDiv:
489 case Instruction::FRem:
490 case Instruction::SDiv:
491 case Instruction::SRem:
492 case Instruction::UDiv:
493 case Instruction::URem:
494 Benefit += 1;
495 break;
496 default:
497 Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
498 break;
499 }
500 }
501
502 return Benefit;
503}
504
505/// Check the \p OutputMappings structure for value \p Input, if it exists
506/// it has been used as an output for outlining, and has been renamed, and we
507/// return the new value, otherwise, we return the same value.
508///
509/// \param OutputMappings [in] - The mapping of values to their renamed value
510/// after being used as an output for an outlined region.
511/// \param Input [in] - The value to find the remapped value of, if it exists.
512/// \return The remapped value if it has been renamed, and the same value if has
513/// not.
515 Value *Input) {
516 auto OutputMapping = OutputMappings.find(Input);
517 if (OutputMapping != OutputMappings.end())
518 return OutputMapping->second;
519 return Input;
520}
521
522/// Find whether \p Region matches the global value numbering to Constant
523/// mapping found so far.
524///
525/// \param Region - The OutlinableRegion we are checking for constants
526/// \param GVNToConstant - The mapping of global value number to Constants.
527/// \param NotSame - The set of global value numbers that do not have the same
528/// constant in each region.
529/// \returns true if all Constants are the same in every use of a Constant in \p
530/// Region and false if not
531static bool
533 DenseMap<unsigned, Constant *> &GVNToConstant,
534 DenseSet<unsigned> &NotSame) {
535 bool ConstantsTheSame = true;
536
537 IRSimilarityCandidate &C = *Region.Candidate;
538 for (IRInstructionData &ID : C) {
539
540 // Iterate over the operands in an instruction. If the global value number,
541 // assigned by the IRSimilarityCandidate, has been seen before, we check if
542 // the number has been found to be not the same value in each instance.
543 for (Value *V : ID.OperVals) {
544 std::optional<unsigned> GVNOpt = C.getGVN(V);
545 assert(GVNOpt && "Expected a GVN for operand?");
546 unsigned GVN = *GVNOpt;
547
548 // Check if this global value has been found to not be the same already.
549 if (NotSame.contains(GVN)) {
550 if (isa<Constant>(V))
551 ConstantsTheSame = false;
552 continue;
553 }
554
555 // If it has been the same so far, we check the value for if the
556 // associated Constant value match the previous instances of the same
557 // global value number. If the global value does not map to a Constant,
558 // it is considered to not be the same value.
559 std::optional<bool> ConstantMatches =
560 constantMatches(V, GVN, GVNToConstant);
561 if (ConstantMatches) {
562 if (*ConstantMatches)
563 continue;
564 else
565 ConstantsTheSame = false;
566 }
567
568 // While this value is a register, it might not have been previously,
569 // make sure we don't already have a constant mapped to this global value
570 // number.
571 if (GVNToConstant.contains(GVN))
572 ConstantsTheSame = false;
573
574 NotSame.insert(GVN);
575 }
576 }
577
578 return ConstantsTheSame;
579}
580
582 DenseMap<unsigned, Constant *> GVNToConstant;
583
585 collectRegionsConstants(*Region, GVNToConstant, NotSame);
586}
587
589 for (OutlinableRegion *OS : Regions)
590 OutputGVNCombinations.insert(OS->GVNStores);
591
592 // We are adding an extracted argument to decide between which output path
593 // to use in the basic block. It is used in a switch statement and only
594 // needs to be an integer.
595 if (OutputGVNCombinations.size() > 1)
596 ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
597}
598
599/// Get the subprogram if it exists for one of the outlined regions.
600///
601/// \param [in] Group - The set of regions to find a subprogram for.
602/// \returns the subprogram if it exists, or nullptr.
604 for (OutlinableRegion *OS : Group.Regions)
605 if (Function *F = OS->Call->getFunction())
606 if (DISubprogram *SP = F->getSubprogram())
607 return SP;
608
609 return nullptr;
610}
611
612Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
613 unsigned FunctionNameSuffix) {
614 assert(!Group.OutlinedFunction && "Function is already defined!");
615
616 Type *RetTy = Type::getVoidTy(M.getContext());
617 // All extracted functions _should_ have the same return type at this point
618 // since the similarity identifier ensures that all branches outside of the
619 // region occur in the same place.
620
621 // NOTE: Should we ever move to the model that uses a switch at every point
622 // needed, meaning that we could branch within the region or out, it is
623 // possible that we will need to switch to using the most general case all of
624 // the time.
625 for (OutlinableRegion *R : Group.Regions) {
626 Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
627 if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
628 (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
629 RetTy = ExtractedFuncType;
630 }
631
633 RetTy, Group.ArgumentTypes, false);
634
635 // These functions will only be called from within the same module, so
636 // we can set an internal linkage.
639 "outlined_ir_func_" + std::to_string(FunctionNameSuffix), M);
640
641 // Transfer the swifterr attribute to the correct function parameter.
642 if (Group.SwiftErrorArgument)
644 Attribute::SwiftError);
645
646 Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
647 Group.OutlinedFunction->addFnAttr(Attribute::MinSize);
648
649 // If there's a DISubprogram associated with this outlined function, then
650 // emit debug info for the outlined function.
651 if (DISubprogram *SP = getSubprogramOrNull(Group)) {
652 Function *F = Group.OutlinedFunction;
653 // We have a DISubprogram. Get its DICompileUnit.
654 DICompileUnit *CU = SP->getUnit();
655 DIBuilder DB(M, true, CU);
656 DIFile *Unit = SP->getFile();
657 Mangler Mg;
658 // Get the mangled name of the function for the linkage name.
659 std::string Dummy;
660 llvm::raw_string_ostream MangledNameStream(Dummy);
661 Mg.getNameWithPrefix(MangledNameStream, F, false);
662
663 DISubprogram *OutlinedSP = DB.createFunction(
664 Unit /* Context */, F->getName(), Dummy, Unit /* File */,
665 0 /* Line 0 is reserved for compiler-generated code. */,
666 DB.createSubroutineType(DB.getOrCreateTypeArray({})), /* void type */
667 0, /* Line 0 is reserved for compiler-generated code. */
668 DINode::DIFlags::FlagArtificial /* Compiler-generated code. */,
669 /* Outlined code is optimized code by definition. */
670 DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized);
671
672 // Attach subprogram to the function.
673 F->setSubprogram(OutlinedSP);
674 // We're done with the DIBuilder.
675 DB.finalize();
676 }
677
678 return Group.OutlinedFunction;
679}
680
681/// Move each BasicBlock in \p Old to \p New.
682///
683/// \param [in] Old - The function to move the basic blocks from.
684/// \param [in] New - The function to move the basic blocks to.
685/// \param [out] NewEnds - The return blocks of the new overall function.
686static void moveFunctionData(Function &Old, Function &New,
688 for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
689 CurrBB.removeFromParent();
690 CurrBB.insertInto(&New);
691 Instruction *I = CurrBB.getTerminator();
692
693 // For each block we find a return instruction is, it is a potential exit
694 // path for the function. We keep track of each block based on the return
695 // value here.
697 NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
698
699 for (Instruction &Val : CurrBB) {
700 // Since debug-info originates from many different locations in the
701 // program, it will cause incorrect reporting from a debugger if we keep
702 // the same debug instructions. Drop non-intrinsic DbgVariableRecords
703 // here, collect intrinsics for removal later.
704 Val.dropDbgRecords();
705
706 // We must handle the scoping of called functions differently than
707 // other outlined instructions.
708 if (!isa<CallInst>(&Val)) {
709 // Remove the debug information for outlined functions.
710 Val.setDebugLoc(DebugLoc::getDropped());
711
712 // Loop info metadata may contain line locations. Update them to have no
713 // value in the new subprogram since the outlined code could be from
714 // several locations.
715 auto updateLoopInfoLoc = [&New](Metadata *MD) -> Metadata * {
716 if (DISubprogram *SP = New.getSubprogram())
717 if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
718 return DILocation::get(New.getContext(), Loc->getLine(),
719 Loc->getColumn(), SP, nullptr);
720 return MD;
721 };
722 updateLoopMetadataDebugLocations(Val, updateLoopInfoLoc);
723 continue;
724 }
725
726 // Edit the scope of called functions inside of outlined functions.
727 if (DISubprogram *SP = New.getSubprogram()) {
728 DILocation *DI = DILocation::get(New.getContext(), 0, 0, SP);
729 Val.setDebugLoc(DI);
730 }
731 }
732 }
733}
734
735/// Find the constants that will need to be lifted into arguments
736/// as they are not the same in each instance of the region.
737///
738/// \param [in] C - The IRSimilarityCandidate containing the region we are
739/// analyzing.
740/// \param [in] NotSame - The set of global value numbers that do not have a
741/// single Constant across all OutlinableRegions similar to \p C.
742/// \param [out] Inputs - The list containing the global value numbers of the
743/// arguments needed for the region of code.
745 std::vector<unsigned> &Inputs) {
747 // Iterate over the instructions, and find what constants will need to be
748 // extracted into arguments.
749 for (IRInstructionDataList::iterator IDIt = C.begin(), EndIDIt = C.end();
750 IDIt != EndIDIt; IDIt++) {
751 for (Value *V : (*IDIt).OperVals) {
752 // Since these are stored before any outlining, they will be in the
753 // global value numbering.
754 unsigned GVN = *C.getGVN(V);
755 if (isa<Constant>(V))
756 if (NotSame.contains(GVN) && Seen.insert(GVN).second)
757 Inputs.push_back(GVN);
758 }
759 }
760}
761
762/// Find the GVN for the inputs that have been found by the CodeExtractor.
763///
764/// \param [in] C - The IRSimilarityCandidate containing the region we are
765/// analyzing.
766/// \param [in] CurrentInputs - The set of inputs found by the
767/// CodeExtractor.
768/// \param [in] OutputMappings - The mapping of values that have been replaced
769/// by a new output value.
770/// \param [out] EndInputNumbers - The global value numbers for the extracted
771/// arguments.
773 SetVector<Value *> &CurrentInputs,
774 const DenseMap<Value *, Value *> &OutputMappings,
775 std::vector<unsigned> &EndInputNumbers) {
776 // Get the Global Value Number for each input. We check if the Value has been
777 // replaced by a different value at output, and use the original value before
778 // replacement.
779 for (Value *Input : CurrentInputs) {
780 assert(Input && "Have a nullptr as an input");
781 auto It = OutputMappings.find(Input);
782 if (It != OutputMappings.end())
783 Input = It->second;
784 assert(C.getGVN(Input) && "Could not find a numbering for the given input");
785 EndInputNumbers.push_back(*C.getGVN(Input));
786 }
787}
788
789/// Find the original value for the \p ArgInput values if any one of them was
790/// replaced during a previous extraction.
791///
792/// \param [in] ArgInputs - The inputs to be extracted by the code extractor.
793/// \param [in] OutputMappings - The mapping of values that have been replaced
794/// by a new output value.
795/// \param [out] RemappedArgInputs - The remapped values according to
796/// \p OutputMappings that will be extracted.
797static void
799 const DenseMap<Value *, Value *> &OutputMappings,
800 SetVector<Value *> &RemappedArgInputs) {
801 // Get the global value number for each input that will be extracted as an
802 // argument by the code extractor, remapping if needed for reloaded values.
803 for (Value *Input : ArgInputs) {
804 auto It = OutputMappings.find(Input);
805 if (It != OutputMappings.end())
806 Input = It->second;
807 RemappedArgInputs.insert(Input);
808 }
809}
810
811/// Find the input GVNs and the output values for a region of Instructions.
812/// Using the code extractor, we collect the inputs to the extracted function.
813///
814/// The \p Region can be identified as needing to be ignored in this function.
815/// It should be checked whether it should be ignored after a call to this
816/// function.
817///
818/// \param [in,out] Region - The region of code to be analyzed.
819/// \param [out] InputGVNs - The global value numbers for the extracted
820/// arguments.
821/// \param [in] NotSame - The global value numbers in the region that do not
822/// have the same constant value in the regions structurally similar to
823/// \p Region.
824/// \param [in] OutputMappings - The mapping of values that have been replaced
825/// by a new output value after extraction.
826/// \param [out] ArgInputs - The values of the inputs to the extracted function.
827/// \param [out] Outputs - The set of values extracted by the CodeExtractor
828/// as outputs.
830 OutlinableRegion &Region, std::vector<unsigned> &InputGVNs,
831 DenseSet<unsigned> &NotSame, DenseMap<Value *, Value *> &OutputMappings,
832 SetVector<Value *> &ArgInputs, SetVector<Value *> &Outputs) {
833 IRSimilarityCandidate &C = *Region.Candidate;
834
835 // OverallInputs are the inputs to the region found by the CodeExtractor,
836 // SinkCands and HoistCands are used by the CodeExtractor to find sunken
837 // allocas of values whose lifetimes are contained completely within the
838 // outlined region. PremappedInputs are the arguments found by the
839 // CodeExtractor, removing conditions such as sunken allocas, but that
840 // may need to be remapped due to the extracted output values replacing
841 // the original values. We use DummyOutputs for this first run of finding
842 // inputs and outputs since the outputs could change during findAllocas,
843 // the correct set of extracted outputs will be in the final Outputs ValueSet.
844 SetVector<Value *> OverallInputs, PremappedInputs, SinkCands, HoistCands,
845 DummyOutputs;
846
847 // Use the code extractor to get the inputs and outputs, without sunken
848 // allocas or removing llvm.assumes.
849 CodeExtractor *CE = Region.CE;
850 CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands);
851 assert(Region.StartBB && "Region must have a start BasicBlock!");
852 Function *OrigF = Region.StartBB->getParent();
853 CodeExtractorAnalysisCache CEAC(*OrigF);
854 BasicBlock *Dummy = nullptr;
855
856 // The region may be ineligible due to VarArgs in the parent function. In this
857 // case we ignore the region.
858 if (!CE->isEligible()) {
859 Region.IgnoreRegion = true;
860 return;
861 }
862
863 // Find if any values are going to be sunk into the function when extracted
864 CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy);
865 CE->findInputsOutputs(PremappedInputs, Outputs, SinkCands);
866
867 // TODO: Support regions with sunken allocas: values whose lifetimes are
868 // contained completely within the outlined region. These are not guaranteed
869 // to be the same in every region, so we must elevate them all to arguments
870 // when they appear. If these values are not equal, it means there is some
871 // Input in OverallInputs that was removed for ArgInputs.
872 if (OverallInputs.size() != PremappedInputs.size()) {
873 Region.IgnoreRegion = true;
874 return;
875 }
876
877 findConstants(C, NotSame, InputGVNs);
878
879 mapInputsToGVNs(C, OverallInputs, OutputMappings, InputGVNs);
880
881 remapExtractedInputs(PremappedInputs.getArrayRef(), OutputMappings,
882 ArgInputs);
883
884 // Sort the GVNs, since we now have constants included in the \ref InputGVNs
885 // we need to make sure they are in a deterministic order.
886 stable_sort(InputGVNs);
887}
888
889/// Look over the inputs and map each input argument to an argument in the
890/// overall function for the OutlinableRegions. This creates a way to replace
891/// the arguments of the extracted function with the arguments of the new
892/// overall function.
893///
894/// \param [in,out] Region - The region of code to be analyzed.
895/// \param [in] InputGVNs - The global value numbering of the input values
896/// collected.
897/// \param [in] ArgInputs - The values of the arguments to the extracted
898/// function.
899static void
901 std::vector<unsigned> &InputGVNs,
902 SetVector<Value *> &ArgInputs) {
903
904 IRSimilarityCandidate &C = *Region.Candidate;
905 OutlinableGroup &Group = *Region.Parent;
906
907 // This counts the argument number in the overall function.
908 unsigned TypeIndex = 0;
909
910 // This counts the argument number in the extracted function.
911 unsigned OriginalIndex = 0;
912
913 // Find the mapping of the extracted arguments to the arguments for the
914 // overall function. Since there may be extra arguments in the overall
915 // function to account for the extracted constants, we have two different
916 // counters as we find extracted arguments, and as we come across overall
917 // arguments.
918
919 // Additionally, in our first pass, for the first extracted function,
920 // we find argument locations for the canonical value numbering. This
921 // numbering overrides any discovered location for the extracted code.
922 for (unsigned InputVal : InputGVNs) {
923 std::optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
924 assert(CanonicalNumberOpt && "Canonical number not found?");
925 unsigned CanonicalNumber = *CanonicalNumberOpt;
926
927 std::optional<Value *> InputOpt = C.fromGVN(InputVal);
928 assert(InputOpt && "Global value number not found?");
929 Value *Input = *InputOpt;
930
931 auto AggArgIt = Group.CanonicalNumberToAggArg.find(CanonicalNumber);
932
933 if (!Group.InputTypesSet) {
934 Group.ArgumentTypes.push_back(Input->getType());
935 // If the input value has a swifterr attribute, make sure to mark the
936 // argument in the overall function.
937 if (Input->isSwiftError()) {
938 assert(
939 !Group.SwiftErrorArgument &&
940 "Argument already marked with swifterr for this OutlinableGroup!");
941 Group.SwiftErrorArgument = TypeIndex;
942 }
943 }
944
945 // Check if we have a constant. If we do add it to the overall argument
946 // number to Constant map for the region, and continue to the next input.
947 if (Constant *CST = dyn_cast<Constant>(Input)) {
948 if (AggArgIt != Group.CanonicalNumberToAggArg.end())
949 Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
950 else {
952 std::make_pair(CanonicalNumber, TypeIndex));
953 Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
954 }
955 TypeIndex++;
956 continue;
957 }
958
959 // It is not a constant, we create the mapping from extracted argument list
960 // to the overall argument list, using the canonical location, if it exists.
961 assert(ArgInputs.count(Input) && "Input cannot be found!");
962
963 if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
964 if (OriginalIndex != AggArgIt->second)
965 Region.ChangedArgOrder = true;
966 Region.ExtractedArgToAgg.insert(
967 std::make_pair(OriginalIndex, AggArgIt->second));
968 Region.AggArgToExtracted.insert(
969 std::make_pair(AggArgIt->second, OriginalIndex));
970 } else {
972 std::make_pair(CanonicalNumber, TypeIndex));
973 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
974 Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
975 }
976 OriginalIndex++;
977 TypeIndex++;
978 }
979
980 // If the function type definitions for the OutlinableGroup holding the region
981 // have not been set, set the length of the inputs here. We should have the
982 // same inputs for all of the different regions contained in the
983 // OutlinableGroup since they are all structurally similar to one another.
984 if (!Group.InputTypesSet) {
985 Group.NumAggregateInputs = TypeIndex;
986 Group.InputTypesSet = true;
987 }
988
989 Region.NumExtractedInputs = OriginalIndex;
990}
991
992/// Check if the \p V has any uses outside of the region other than \p PN.
993///
994/// \param V [in] - The value to check.
995/// \param PHILoc [in] - The location in the PHINode of \p V.
996/// \param PN [in] - The PHINode using \p V.
997/// \param Exits [in] - The potential blocks we exit to from the outlined
998/// region.
999/// \param BlocksInRegion [in] - The basic blocks contained in the region.
1000/// \returns true if \p V has any use soutside its region other than \p PN.
1001static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
1003 DenseSet<BasicBlock *> &BlocksInRegion) {
1004 // We check to see if the value is used by the PHINode from some other
1005 // predecessor not included in the region. If it is, we make sure
1006 // to keep it as an output.
1008 [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
1009 return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
1010 !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
1011 }))
1012 return true;
1013
1014 // Check if the value is used by any other instructions outside the region.
1015 return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
1016 Instruction *I = dyn_cast<Instruction>(U);
1017 if (!I)
1018 return false;
1019
1020 // If the use of the item is inside the region, we skip it. Uses
1021 // inside the region give us useful information about how the item could be
1022 // used as an output.
1023 BasicBlock *Parent = I->getParent();
1024 if (BlocksInRegion.contains(Parent))
1025 return false;
1026
1027 // If it's not a PHINode then we definitely know the use matters. This
1028 // output value will not completely combined with another item in a PHINode
1029 // as it is directly reference by another non-phi instruction
1030 if (!isa<PHINode>(I))
1031 return true;
1032
1033 // If we have a PHINode outside one of the exit locations, then it
1034 // can be considered an outside use as well. If there is a PHINode
1035 // contained in the Exit where this values use matters, it will be
1036 // caught when we analyze that PHINode.
1037 if (!Exits.contains(Parent))
1038 return true;
1039
1040 return false;
1041 });
1042}
1043
1044/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
1045/// considered outputs. A PHINodes is an output when more than one incoming
1046/// value has been marked by the CodeExtractor as an output.
1047///
1048/// \param CurrentExitFromRegion [in] - The block to analyze.
1049/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
1050/// region.
1051/// \param RegionBlocks [in] - The basic blocks in the region.
1052/// \param Outputs [in, out] - The existing outputs for the region, we may add
1053/// PHINodes to this as we find that they replace output values.
1054/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
1055/// totally replaced by a PHINode.
1056/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
1057/// in PHINodes, but have other uses, and should still be considered outputs.
1059 BasicBlock *CurrentExitFromRegion,
1060 SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
1061 DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
1062 DenseSet<Value *> &OutputsReplacedByPHINode,
1063 DenseSet<Value *> &OutputsWithNonPhiUses) {
1064 for (PHINode &PN : CurrentExitFromRegion->phis()) {
1065 // Find all incoming values from the outlining region.
1066 SmallVector<unsigned, 2> IncomingVals;
1067 for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
1068 if (RegionBlocks.contains(PN.getIncomingBlock(I)))
1069 IncomingVals.push_back(I);
1070
1071 // Do not process PHI if there are no predecessors from region.
1072 unsigned NumIncomingVals = IncomingVals.size();
1073 if (NumIncomingVals == 0)
1074 continue;
1075
1076 // If there is one predecessor, we mark it as a value that needs to be kept
1077 // as an output.
1078 if (NumIncomingVals == 1) {
1079 Value *V = PN.getIncomingValue(*IncomingVals.begin());
1080 OutputsWithNonPhiUses.insert(V);
1081 OutputsReplacedByPHINode.erase(V);
1082 continue;
1083 }
1084
1085 // This PHINode will be used as an output value, so we add it to our list.
1086 Outputs.insert(&PN);
1087
1088 // Not all of the incoming values should be ignored as other inputs and
1089 // outputs may have uses in outlined region. If they have other uses
1090 // outside of the single PHINode we should not skip over it.
1091 for (unsigned Idx : IncomingVals) {
1092 Value *V = PN.getIncomingValue(Idx);
1093 if (!isa<Constant>(V) &&
1094 outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
1095 OutputsWithNonPhiUses.insert(V);
1096 OutputsReplacedByPHINode.erase(V);
1097 continue;
1098 }
1099 if (!OutputsWithNonPhiUses.contains(V))
1100 OutputsReplacedByPHINode.insert(V);
1101 }
1102 }
1103}
1104
1105// Represents the type for the unsigned number denoting the output number for
1106// phi node, along with the canonical number for the exit block.
1107using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
1108// The list of canonical numbers for the incoming values to a PHINode.
1110// The pair type representing the set of canonical values being combined in the
1111// PHINode, along with the location data for the PHINode.
1112using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
1113
1114/// Encode \p PND as an integer for easy lookup based on the argument location,
1115/// the parent BasicBlock canonical numbering, and the canonical numbering of
1116/// the values stored in the PHINode.
1117///
1118/// \param PND - The data to hash.
1119/// \returns The hash code of \p PND.
1121 return llvm::hash_combine(llvm::hash_value(PND.first.first),
1122 llvm::hash_value(PND.first.second),
1123 llvm::hash_combine_range(PND.second));
1124}
1125
1126/// Create a special GVN for PHINodes that will be used outside of
1127/// the region. We create a hash code based on the Canonical number of the
1128/// parent BasicBlock, the canonical numbering of the values stored in the
1129/// PHINode and the aggregate argument location. This is used to find whether
1130/// this PHINode type has been given a canonical numbering already. If not, we
1131/// assign it a value and store it for later use. The value is returned to
1132/// identify different output schemes for the set of regions.
1133///
1134/// \param Region - The region that \p PN is an output for.
1135/// \param PN - The PHINode we are analyzing.
1136/// \param Blocks - The blocks for the region we are analyzing.
1137/// \param AggArgIdx - The argument \p PN will be stored into.
1138/// \returns An optional holding the assigned canonical number, or std::nullopt
1139/// if there is some attribute of the PHINode blocking it from being used.
1140static std::optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
1141 PHINode *PN,
1142 DenseSet<BasicBlock *> &Blocks,
1143 unsigned AggArgIdx) {
1144 OutlinableGroup &Group = *Region.Parent;
1145 IRSimilarityCandidate &Cand = *Region.Candidate;
1146 BasicBlock *PHIBB = PN->getParent();
1147 CanonList PHIGVNs;
1148 Value *Incoming;
1149 BasicBlock *IncomingBlock;
1150 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1151 Incoming = PN->getIncomingValue(Idx);
1152 IncomingBlock = PN->getIncomingBlock(Idx);
1153 // If the incoming block isn't in the region, we don't have to worry about
1154 // this incoming value.
1155 if (!Blocks.contains(IncomingBlock))
1156 continue;
1157
1158 // If we cannot find a GVN, and the incoming block is included in the region
1159 // this means that the input to the PHINode is not included in the region we
1160 // are trying to analyze, meaning, that if it was outlined, we would be
1161 // adding an extra input. We ignore this case for now, and so ignore the
1162 // region.
1163 std::optional<unsigned> OGVN = Cand.getGVN(Incoming);
1164 if (!OGVN) {
1165 Region.IgnoreRegion = true;
1166 return std::nullopt;
1167 }
1168
1169 // Collect the canonical numbers of the values in the PHINode.
1170 unsigned GVN = *OGVN;
1171 OGVN = Cand.getCanonicalNum(GVN);
1172 assert(OGVN && "No GVN found for incoming value?");
1173 PHIGVNs.push_back(*OGVN);
1174
1175 // Find the incoming block and use the canonical numbering as well to define
1176 // the hash for the PHINode.
1177 OGVN = Cand.getGVN(IncomingBlock);
1178
1179 // If there is no number for the incoming block, it is because we have
1180 // split the candidate basic blocks. So we use the previous block that it
1181 // was split from to find the valid global value numbering for the PHINode.
1182 if (!OGVN) {
1183 assert(Cand.getStartBB() == IncomingBlock &&
1184 "Unknown basic block used in exit path PHINode.");
1185
1186 BasicBlock *PrevBlock = nullptr;
1187 // Iterate over the predecessors to the incoming block of the
1188 // PHINode, when we find a block that is not contained in the region
1189 // we know that this is the first block that we split from, and should
1190 // have a valid global value numbering.
1191 for (BasicBlock *Pred : predecessors(IncomingBlock))
1192 if (!Blocks.contains(Pred)) {
1193 PrevBlock = Pred;
1194 break;
1195 }
1196 assert(PrevBlock && "Expected a predecessor not in the reigon!");
1197 OGVN = Cand.getGVN(PrevBlock);
1198 }
1199 GVN = *OGVN;
1200 OGVN = Cand.getCanonicalNum(GVN);
1201 assert(OGVN && "No GVN found for incoming block?");
1202 PHIGVNs.push_back(*OGVN);
1203 }
1204
1205 // Now that we have the GVNs for the incoming values, we are going to combine
1206 // them with the GVN of the incoming bock, and the output location of the
1207 // PHINode to generate a hash value representing this instance of the PHINode.
1210 std::optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
1211 assert(BBGVN && "Could not find GVN for the incoming block!");
1212
1213 BBGVN = Cand.getCanonicalNum(*BBGVN);
1214 assert(BBGVN && "Could not find canonical number for the incoming block!");
1215 // Create a pair of the exit block canonical value, and the aggregate
1216 // argument location, connected to the canonical numbers stored in the
1217 // PHINode.
1218 PHINodeData TemporaryPair =
1219 std::make_pair(std::make_pair(*BBGVN, AggArgIdx), PHIGVNs);
1220 hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
1221
1222 // Look for and create a new entry in our connection between canonical
1223 // numbers for PHINodes, and the set of objects we just created.
1224 GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
1225 if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
1226 bool Inserted = false;
1227 std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
1228 std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
1229 std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
1230 std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
1231 }
1232
1233 return GVNToPHIIt->second;
1234}
1235
1236/// Create a mapping of the output arguments for the \p Region to the output
1237/// arguments of the overall outlined function.
1238///
1239/// \param [in,out] Region - The region of code to be analyzed.
1240/// \param [in] Outputs - The values found by the code extractor.
1241static void
1243 SetVector<Value *> &Outputs) {
1244 OutlinableGroup &Group = *Region.Parent;
1245 IRSimilarityCandidate &C = *Region.Candidate;
1246
1248 DenseSet<BasicBlock *> BlocksInRegion;
1249 C.getBasicBlocks(BlocksInRegion, BE);
1250
1251 // Find the exits to the region.
1253 for (BasicBlock *Block : BE)
1254 for (BasicBlock *Succ : successors(Block))
1255 if (!BlocksInRegion.contains(Succ))
1256 Exits.insert(Succ);
1257
1258 // After determining which blocks exit to PHINodes, we add these PHINodes to
1259 // the set of outputs to be processed. We also check the incoming values of
1260 // the PHINodes for whether they should no longer be considered outputs.
1261 DenseSet<Value *> OutputsReplacedByPHINode;
1262 DenseSet<Value *> OutputsWithNonPhiUses;
1263 for (BasicBlock *ExitBB : Exits)
1264 analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
1265 OutputsReplacedByPHINode,
1266 OutputsWithNonPhiUses);
1267
1268 // This counts the argument number in the extracted function.
1269 unsigned OriginalIndex = Region.NumExtractedInputs;
1270
1271 // This counts the argument number in the overall function.
1272 unsigned TypeIndex = Group.NumAggregateInputs;
1273 bool TypeFound;
1274 DenseSet<unsigned> AggArgsUsed;
1275
1276 // Iterate over the output types and identify if there is an aggregate pointer
1277 // type whose base type matches the current output type. If there is, we mark
1278 // that we will use this output register for this value. If not we add another
1279 // type to the overall argument type list. We also store the GVNs used for
1280 // stores to identify which values will need to be moved into an special
1281 // block that holds the stores to the output registers.
1282 for (Value *Output : Outputs) {
1283 TypeFound = false;
1284 // We can do this since it is a result value, and will have a number
1285 // that is necessarily the same. BUT if in the future, the instructions
1286 // do not have to be in same order, but are functionally the same, we will
1287 // have to use a different scheme, as one-to-one correspondence is not
1288 // guaranteed.
1289 unsigned ArgumentSize = Group.ArgumentTypes.size();
1290
1291 // If the output is combined in a PHINode, we make sure to skip over it.
1292 if (OutputsReplacedByPHINode.contains(Output))
1293 continue;
1294
1295 unsigned AggArgIdx = 0;
1296 for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
1297 if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
1298 continue;
1299
1300 if (!AggArgsUsed.insert(Jdx).second)
1301 continue;
1302
1303 TypeFound = true;
1304 Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
1305 Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
1306 AggArgIdx = Jdx;
1307 break;
1308 }
1309
1310 // We were unable to find an unused type in the output type set that matches
1311 // the output, so we add a pointer type to the argument types of the overall
1312 // function to handle this output and create a mapping to it.
1313 if (!TypeFound) {
1314 Group.ArgumentTypes.push_back(PointerType::get(Output->getContext(),
1315 M.getDataLayout().getAllocaAddrSpace()));
1316 // Mark the new pointer type as the last value in the aggregate argument
1317 // list.
1318 unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
1319 AggArgsUsed.insert(ArgTypeIdx);
1320 Region.ExtractedArgToAgg.insert(
1321 std::make_pair(OriginalIndex, ArgTypeIdx));
1322 Region.AggArgToExtracted.insert(
1323 std::make_pair(ArgTypeIdx, OriginalIndex));
1324 AggArgIdx = ArgTypeIdx;
1325 }
1326
1327 // TODO: Adapt to the extra input from the PHINode.
1328 PHINode *PN = dyn_cast<PHINode>(Output);
1329
1330 std::optional<unsigned> GVN;
1331 if (PN && !BlocksInRegion.contains(PN->getParent())) {
1332 // Values outside the region can be combined into PHINode when we
1333 // have multiple exits. We collect both of these into a list to identify
1334 // which values are being used in the PHINode. Each list identifies a
1335 // different PHINode, and a different output. We store the PHINode as it's
1336 // own canonical value. These canonical values are also dependent on the
1337 // output argument it is saved to.
1338
1339 // If two PHINodes have the same canonical values, but different aggregate
1340 // argument locations, then they will have distinct Canonical Values.
1341 GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx);
1342 if (!GVN)
1343 return;
1344 } else {
1345 // If we do not have a PHINode we use the global value numbering for the
1346 // output value, to find the canonical number to add to the set of stored
1347 // values.
1348 GVN = C.getGVN(Output);
1349 GVN = C.getCanonicalNum(*GVN);
1350 }
1351
1352 // Each region has a potentially unique set of outputs. We save which
1353 // values are output in a list of canonical values so we can differentiate
1354 // among the different store schemes.
1355 Region.GVNStores.push_back(*GVN);
1356
1357 OriginalIndex++;
1358 TypeIndex++;
1359 }
1360
1361 // We sort the stored values to make sure that we are not affected by analysis
1362 // order when determining what combination of items were stored.
1363 stable_sort(Region.GVNStores);
1364}
1365
1366void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
1367 DenseSet<unsigned> &NotSame) {
1368 std::vector<unsigned> Inputs;
1369 SetVector<Value *> ArgInputs, Outputs;
1370
1371 getCodeExtractorArguments(Region, Inputs, NotSame, OutputMappings, ArgInputs,
1372 Outputs);
1373
1374 if (Region.IgnoreRegion)
1375 return;
1376
1377 // Map the inputs found by the CodeExtractor to the arguments found for
1378 // the overall function.
1379 findExtractedInputToOverallInputMapping(Region, Inputs, ArgInputs);
1380
1381 // Map the outputs found by the CodeExtractor to the arguments found for
1382 // the overall function.
1384}
1385
1386/// Replace the extracted function in the Region with a call to the overall
1387/// function constructed from the deduplicated similar regions, replacing and
1388/// remapping the values passed to the extracted function as arguments to the
1389/// new arguments of the overall function.
1390///
1391/// \param [in] M - The module to outline from.
1392/// \param [in] Region - The regions of extracted code to be replaced with a new
1393/// function.
1394/// \returns a call instruction with the replaced function.
1396 std::vector<Value *> NewCallArgs;
1398
1399 OutlinableGroup &Group = *Region.Parent;
1400 CallInst *Call = Region.Call;
1401 assert(Call && "Call to replace is nullptr?");
1402 Function *AggFunc = Group.OutlinedFunction;
1403 assert(AggFunc && "Function to replace with is nullptr?");
1404
1405 // If the arguments are the same size, there are not values that need to be
1406 // made into an argument, the argument ordering has not been change, or
1407 // different output registers to handle. We can simply replace the called
1408 // function in this case.
1409 if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
1410 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1411 << *AggFunc << " with same number of arguments\n");
1412 Call->setCalledFunction(AggFunc);
1413 return Call;
1414 }
1415
1416 // We have a different number of arguments than the new function, so
1417 // we need to use our previously mappings off extracted argument to overall
1418 // function argument, and constants to overall function argument to create the
1419 // new argument list.
1420 for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
1421
1422 if (AggArgIdx == AggFunc->arg_size() - 1 &&
1423 Group.OutputGVNCombinations.size() > 1) {
1424 // If we are on the last argument, and we need to differentiate between
1425 // output blocks, add an integer to the argument list to determine
1426 // what block to take
1427 LLVM_DEBUG(dbgs() << "Set switch block argument to "
1428 << Region.OutputBlockNum << "\n");
1429 NewCallArgs.push_back(ConstantInt::get(Type::getInt32Ty(M.getContext()),
1430 Region.OutputBlockNum));
1431 continue;
1432 }
1433
1434 ArgPair = Region.AggArgToExtracted.find(AggArgIdx);
1435 if (ArgPair != Region.AggArgToExtracted.end()) {
1436 Value *ArgumentValue = Call->getArgOperand(ArgPair->second);
1437 // If we found the mapping from the extracted function to the overall
1438 // function, we simply add it to the argument list. We use the same
1439 // value, it just needs to honor the new order of arguments.
1440 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1441 << *ArgumentValue << "\n");
1442 NewCallArgs.push_back(ArgumentValue);
1443 continue;
1444 }
1445
1446 // If it is a constant, we simply add it to the argument list as a value.
1447 if (auto It = Region.AggArgToConstant.find(AggArgIdx);
1448 It != Region.AggArgToConstant.end()) {
1449 Constant *CST = It->second;
1450 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
1451 << *CST << "\n");
1452 NewCallArgs.push_back(CST);
1453 continue;
1454 }
1455
1456 // Add a nullptr value if the argument is not found in the extracted
1457 // function. If we cannot find a value, it means it is not in use
1458 // for the region, so we should not pass anything to it.
1459 LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to nullptr\n");
1460 NewCallArgs.push_back(ConstantPointerNull::get(
1461 static_cast<PointerType *>(AggFunc->getArg(AggArgIdx)->getType())));
1462 }
1463
1464 LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
1465 << *AggFunc << " with new set of arguments\n");
1466 // Create the new call instruction and erase the old one.
1467 Call = CallInst::Create(AggFunc->getFunctionType(), AggFunc, NewCallArgs, "",
1468 Call->getIterator());
1469
1470 // It is possible that the call to the outlined function is either the first
1471 // instruction is in the new block, the last instruction, or both. If either
1472 // of these is the case, we need to make sure that we replace the instruction
1473 // in the IRInstructionData struct with the new call.
1474 CallInst *OldCall = Region.Call;
1475 if (Region.NewFront->Inst == OldCall)
1476 Region.NewFront->Inst = Call;
1477 if (Region.NewBack->Inst == OldCall)
1478 Region.NewBack->Inst = Call;
1479
1480 // Transfer any debug information.
1481 Call->setDebugLoc(Region.Call->getDebugLoc());
1482 // Since our output may determine which branch we go to, we make sure to
1483 // propagate this new call value through the module.
1484 OldCall->replaceAllUsesWith(Call);
1485
1486 // Remove the old instruction.
1487 OldCall->eraseFromParent();
1488 Region.Call = Call;
1489
1490 // Make sure that the argument in the new function has the SwiftError
1491 // argument.
1492 if (Group.SwiftErrorArgument)
1493 Call->addParamAttr(*Group.SwiftErrorArgument, Attribute::SwiftError);
1494
1495 return Call;
1496}
1497
1498/// Find or create a BasicBlock in the outlined function containing PhiBlocks
1499/// for \p RetVal.
1500///
1501/// \param Group - The OutlinableGroup containing the information about the
1502/// overall outlined function.
1503/// \param RetVal - The return value or exit option that we are currently
1504/// evaluating.
1505/// \returns The found or newly created BasicBlock to contain the needed
1506/// PHINodes to be used as outputs.
1508 // Find if a PHIBlock exists for this return value already. If it is
1509 // the first time we are analyzing this, we will not, so we record it.
1510 auto [PhiBlockForRetVal, Inserted] = Group.PHIBlocks.try_emplace(RetVal);
1511 if (!Inserted)
1512 return PhiBlockForRetVal->second;
1513
1514 auto ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
1515 assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
1516 "Could not find output value!");
1517 BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
1518
1519 // If we did not find a block, we create one, and insert it into the
1520 // overall function and record it.
1521 BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
1522 ReturnBB->getParent());
1523 PhiBlockForRetVal->second = PHIBlock;
1524
1525 // We replace all branches to the return block in the newly created outlined
1526 // function to point to the new PHIBlock.
1527 ReturnBB->replaceAllUsesWith(PHIBlock);
1528
1529 UncondBrInst::Create(ReturnBB, PHIBlock);
1530
1531 return PhiBlockForRetVal->second;
1532}
1533
1534/// For the function call now representing the \p Region, find the passed value
1535/// to that call that represents Argument \p A at the call location if the
1536/// call has already been replaced with a call to the overall, aggregate
1537/// function.
1538///
1539/// \param A - The Argument to get the passed value for.
1540/// \param Region - The extracted Region corresponding to the outlined function.
1541/// \returns The Value representing \p A at the call site.
1542static Value *
1544 const OutlinableRegion &Region) {
1545 // If we don't need to adjust the argument number at all (since the call
1546 // has already been replaced by a call to the overall outlined function)
1547 // we can just get the specified argument.
1548 return Region.Call->getArgOperand(A->getArgNo());
1549}
1550
1551/// For the function call now representing the \p Region, find the passed value
1552/// to that call that represents Argument \p A at the call location if the
1553/// call has only been replaced by the call to the aggregate function.
1554///
1555/// \param A - The Argument to get the passed value for.
1556/// \param Region - The extracted Region corresponding to the outlined function.
1557/// \returns The Value representing \p A at the call site.
1558static Value *
1560 const OutlinableRegion &Region) {
1561 unsigned ArgNum = A->getArgNo();
1562
1563 // If it is a constant, we can look at our mapping from when we created
1564 // the outputs to figure out what the constant value is.
1565 if (auto It = Region.AggArgToConstant.find(ArgNum);
1566 It != Region.AggArgToConstant.end())
1567 return It->second;
1568
1569 // If it is not a constant, and we are not looking at the overall function, we
1570 // need to adjust which argument we are looking at.
1571 ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
1572 return Region.Call->getArgOperand(ArgNum);
1573}
1574
1575/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
1576///
1577/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1578/// \param Region [in] - The OutlinableRegion containing \p PN.
1579/// \param OutputMappings [in] - The mapping of output values from outlined
1580/// region to their original values.
1581/// \param CanonNums [out] - The canonical numbering for the incoming values to
1582/// \p PN paired with their incoming block.
1583/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
1584/// of \p Region rather than the overall function's call.
1587 const DenseMap<Value *, Value *> &OutputMappings,
1588 SmallVector<std::pair<unsigned, BasicBlock *>> &CanonNums,
1589 bool ReplacedWithOutlinedCall = true) {
1590 // Iterate over the incoming values.
1591 for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
1592 Value *IVal = PN->getIncomingValue(Idx);
1593 BasicBlock *IBlock = PN->getIncomingBlock(Idx);
1594 // If we have an argument as incoming value, we need to grab the passed
1595 // value from the call itself.
1596 if (Argument *A = dyn_cast<Argument>(IVal)) {
1597 if (ReplacedWithOutlinedCall)
1599 else
1601 }
1602
1603 // Get the original value if it has been replaced by an output value.
1604 IVal = findOutputMapping(OutputMappings, IVal);
1605
1606 // Find and add the canonical number for the incoming value.
1607 std::optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
1608 assert(GVN && "No GVN for incoming value");
1609 std::optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
1610 assert(CanonNum && "No Canonical Number for GVN");
1611 CanonNums.push_back(std::make_pair(*CanonNum, IBlock));
1612 }
1613}
1614
1615/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
1616/// in order to condense the number of instructions added to the outlined
1617/// function.
1618///
1619/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
1620/// \param Region [in] - The OutlinableRegion containing \p PN.
1621/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
1622/// \p PN in.
1623/// \param OutputMappings [in] - The mapping of output values from outlined
1624/// region to their original values.
1625/// \param UsedPHIs [in, out] - The PHINodes in the block that have already been
1626/// matched.
1627/// \return the newly found or created PHINode in \p OverallPhiBlock.
1628static PHINode*
1630 BasicBlock *OverallPhiBlock,
1631 const DenseMap<Value *, Value *> &OutputMappings,
1632 DenseSet<PHINode *> &UsedPHIs) {
1633 OutlinableGroup &Group = *Region.Parent;
1634
1635
1636 // A list of the canonical numbering assigned to each incoming value, paired
1637 // with the incoming block for the PHINode passed into this function.
1639
1640 // We have to use the extracted function since we have merged this region into
1641 // the overall function yet. We make sure to reassign the argument numbering
1642 // since it is possible that the argument ordering is different between the
1643 // functions.
1644 findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
1645 /* ReplacedWithOutlinedCall = */ false);
1646
1647 OutlinableRegion *FirstRegion = Group.Regions[0];
1648
1649 // A list of the canonical numbering assigned to each incoming value, paired
1650 // with the incoming block for the PHINode that we are currently comparing
1651 // the passed PHINode to.
1653
1654 // Find the Canonical Numbering for each PHINode, if it matches, we replace
1655 // the uses of the PHINode we are searching for, with the found PHINode.
1656 for (PHINode &CurrPN : OverallPhiBlock->phis()) {
1657 // If this PHINode has already been matched to another PHINode to be merged,
1658 // we skip it.
1659 if (UsedPHIs.contains(&CurrPN))
1660 continue;
1661
1662 CurrentCanonNums.clear();
1663 findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
1664 /* ReplacedWithOutlinedCall = */ true);
1665
1666 // If the list of incoming values is not the same length, then they cannot
1667 // match since there is not an analogue for each incoming value.
1668 if (PNCanonNums.size() != CurrentCanonNums.size())
1669 continue;
1670
1671 bool FoundMatch = true;
1672
1673 // We compare the canonical value for each incoming value in the passed
1674 // in PHINode to one already present in the outlined region. If the
1675 // incoming values do not match, then the PHINodes do not match.
1676
1677 // We also check to make sure that the incoming block matches as well by
1678 // finding the corresponding incoming block in the combined outlined region
1679 // for the current outlined region.
1680 for (unsigned Idx = 0, Edx = PNCanonNums.size(); Idx < Edx; ++Idx) {
1681 std::pair<unsigned, BasicBlock *> ToCompareTo = CurrentCanonNums[Idx];
1682 std::pair<unsigned, BasicBlock *> ToAdd = PNCanonNums[Idx];
1683 if (ToCompareTo.first != ToAdd.first) {
1684 FoundMatch = false;
1685 break;
1686 }
1687
1688 BasicBlock *CorrespondingBlock =
1689 Region.findCorrespondingBlockIn(*FirstRegion, ToAdd.second);
1690 assert(CorrespondingBlock && "Found block is nullptr");
1691 if (CorrespondingBlock != ToCompareTo.second) {
1692 FoundMatch = false;
1693 break;
1694 }
1695 }
1696
1697 // If all incoming values and branches matched, then we can merge
1698 // into the found PHINode.
1699 if (FoundMatch) {
1700 UsedPHIs.insert(&CurrPN);
1701 return &CurrPN;
1702 }
1703 }
1704
1705 // If we've made it here, it means we weren't able to replace the PHINode, so
1706 // we must insert it ourselves.
1707 PHINode *NewPN = cast<PHINode>(PN.clone());
1708 NewPN->insertBefore(OverallPhiBlock->begin());
1709 for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
1710 Idx++) {
1711 Value *IncomingVal = NewPN->getIncomingValue(Idx);
1712 BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
1713
1714 // Find corresponding basic block in the overall function for the incoming
1715 // block.
1716 BasicBlock *BlockToUse =
1717 Region.findCorrespondingBlockIn(*FirstRegion, IncomingBlock);
1718 NewPN->setIncomingBlock(Idx, BlockToUse);
1719
1720 // If we have an argument we make sure we replace using the argument from
1721 // the correct function.
1722 if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
1723 Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
1724 NewPN->setIncomingValue(Idx, Val);
1725 continue;
1726 }
1727
1728 // Find the corresponding value in the overall function.
1729 IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
1730 Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
1731 assert(Val && "Value is nullptr?");
1732 auto RemappedIt = FirstRegion->RemappedArguments.find(Val);
1733 if (RemappedIt != FirstRegion->RemappedArguments.end())
1734 Val = RemappedIt->second;
1735 NewPN->setIncomingValue(Idx, Val);
1736 }
1737 return NewPN;
1738}
1739
1740// Within an extracted function, replace the argument uses of the extracted
1741// region with the arguments of the function for an OutlinableGroup.
1742//
1743/// \param [in] Region - The region of extracted code to be changed.
1744/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
1745/// region.
1746/// \param [in] FirstFunction - A flag to indicate whether we are using this
1747/// function to define the overall outlined function for all the regions, or
1748/// if we are operating on one of the following regions.
1749static void
1752 const DenseMap<Value *, Value *> &OutputMappings,
1753 bool FirstFunction = false) {
1754 OutlinableGroup &Group = *Region.Parent;
1755 assert(Region.ExtractedFunction && "Region has no extracted function?");
1756
1757 Function *DominatingFunction = Region.ExtractedFunction;
1758 if (FirstFunction)
1759 DominatingFunction = Group.OutlinedFunction;
1760 DominatorTree DT(*DominatingFunction);
1761 DenseSet<PHINode *> UsedPHIs;
1762
1763 for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
1764 ArgIdx++) {
1765 assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
1766 "No mapping from extracted to outlined?");
1767 unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
1768 Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
1769 Argument *Arg = Region.ExtractedFunction->getArg(ArgIdx);
1770 // The argument is an input, so we can simply replace it with the overall
1771 // argument value
1772 if (ArgIdx < Region.NumExtractedInputs) {
1773 LLVM_DEBUG(dbgs() << "Replacing uses of input " << *Arg << " in function "
1774 << *Region.ExtractedFunction << " with " << *AggArg
1775 << " in function " << *Group.OutlinedFunction << "\n");
1776 Arg->replaceAllUsesWith(AggArg);
1777 Value *V = Region.Call->getArgOperand(ArgIdx);
1778 Region.RemappedArguments.insert(std::make_pair(V, AggArg));
1779 continue;
1780 }
1781
1782 // If we are replacing an output, we place the store value in its own
1783 // block inside the overall function before replacing the use of the output
1784 // in the function.
1785 assert(Arg->hasOneUse() && "Output argument can only have one use");
1786 User *InstAsUser = Arg->user_back();
1787 assert(InstAsUser && "User is nullptr!");
1788
1789 Instruction *I = cast<Instruction>(InstAsUser);
1790 BasicBlock *BB = I->getParent();
1791 SmallVector<BasicBlock *, 4> Descendants;
1792 DT.getDescendants(BB, Descendants);
1793 bool EdgeAdded = false;
1794 if (Descendants.size() == 0) {
1795 EdgeAdded = true;
1796 DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
1797 DT.getDescendants(BB, Descendants);
1798 }
1799
1800 // Iterate over the following blocks, looking for return instructions,
1801 // if we find one, find the corresponding output block for the return value
1802 // and move our store instruction there.
1803 for (BasicBlock *DescendBB : Descendants) {
1804 ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
1805 if (!RI)
1806 continue;
1807 Value *RetVal = RI->getReturnValue();
1808 auto VBBIt = OutputBBs.find(RetVal);
1809 assert(VBBIt != OutputBBs.end() && "Could not find output value!");
1810
1811 // If this is storing a PHINode, we must make sure it is included in the
1812 // overall function.
1814
1815 Value *ValueOperand = SI->getValueOperand();
1816
1817 StoreInst *NewI = cast<StoreInst>(I->clone());
1819 BasicBlock *OutputBB = VBBIt->second;
1820 NewI->insertInto(OutputBB, OutputBB->end());
1821 LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
1822 << *OutputBB << "\n");
1823
1824 // If this is storing a PHINode, we must make sure it is included in the
1825 // overall function.
1826 if (!isa<PHINode>(ValueOperand) ||
1827 Region.Candidate->getGVN(ValueOperand).has_value()) {
1828 if (FirstFunction)
1829 continue;
1830 Value *CorrVal =
1831 Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
1832 assert(CorrVal && "Value is nullptr?");
1833 NewI->setOperand(0, CorrVal);
1834 continue;
1835 }
1836 PHINode *PN = cast<PHINode>(SI->getValueOperand());
1837 // If it has a value, it was not split by the code extractor, which
1838 // is what we are looking for.
1839 if (Region.Candidate->getGVN(PN))
1840 continue;
1841
1842 // We record the parent block for the PHINode in the Region so that
1843 // we can exclude it from checks later on.
1844 Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
1845
1846 // If this is the first function, we do not need to worry about mergiing
1847 // this with any other block in the overall outlined function, so we can
1848 // just continue.
1849 if (FirstFunction) {
1850 BasicBlock *PHIBlock = PN->getParent();
1851 Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
1852 continue;
1853 }
1854
1855 // We look for the aggregate block that contains the PHINodes leading into
1856 // this exit path. If we can't find one, we create one.
1857 BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
1858
1859 // For our PHINode, we find the combined canonical numbering, and
1860 // attempt to find a matching PHINode in the overall PHIBlock. If we
1861 // cannot, we copy the PHINode and move it into this new block.
1862 PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock,
1863 OutputMappings, UsedPHIs);
1864 NewI->setOperand(0, NewPN);
1865 }
1866
1867 // If we added an edge for basic blocks without a predecessor, we remove it
1868 // here.
1869 if (EdgeAdded)
1870 DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
1871 I->eraseFromParent();
1872
1873 LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
1874 << *Region.ExtractedFunction << " with " << *AggArg
1875 << " in function " << *Group.OutlinedFunction << "\n");
1876 Arg->replaceAllUsesWith(AggArg);
1877 }
1878}
1879
1880/// Within an extracted function, replace the constants that need to be lifted
1881/// into arguments with the actual argument.
1882///
1883/// \param Region [in] - The region of extracted code to be changed.
1885 OutlinableGroup &Group = *Region.Parent;
1886 Function *OutlinedFunction = Group.OutlinedFunction;
1887 ValueToValueMapTy VMap;
1888
1889 // Iterate over the constants that need to be elevated into arguments
1890 for (std::pair<unsigned, Constant *> &Const : Region.AggArgToConstant) {
1891 unsigned AggArgIdx = Const.first;
1892 assert(OutlinedFunction && "Overall Function is not defined?");
1893 Constant *CST = Const.second;
1894 Argument *Arg = Group.OutlinedFunction->getArg(AggArgIdx);
1895 // Identify the argument it will be elevated to, and replace instances of
1896 // that constant in the function.
1897 VMap[CST] = Arg;
1898 LLVM_DEBUG(dbgs() << "Replacing uses of constant " << *CST
1899 << " in function " << *OutlinedFunction << " with "
1900 << *Arg << '\n');
1901 }
1902
1903 RemapFunction(*OutlinedFunction, VMap,
1905}
1906
1907/// It is possible that there is a basic block that already performs the same
1908/// stores. This returns a duplicate block, if it exists
1909///
1910/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
1911/// \param OutputStoreBBs [in] The existing output blocks.
1912/// \returns an optional value with the number output block if there is a match.
1913std::optional<unsigned> findDuplicateOutputBlock(
1915 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
1916
1917 bool Mismatch = false;
1918 unsigned MatchingNum = 0;
1919 // We compare the new set output blocks to the other sets of output blocks.
1920 // If they are the same number, and have identical instructions, they are
1921 // considered to be the same.
1922 for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
1923 Mismatch = false;
1924 for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
1925 auto OutputBBIt = OutputBBs.find(VToB.first);
1926 if (OutputBBIt == OutputBBs.end()) {
1927 Mismatch = true;
1928 break;
1929 }
1930
1931 BasicBlock *CompBB = VToB.second;
1932 BasicBlock *OutputBB = OutputBBIt->second;
1933 if (CompBB->size() - 1 != OutputBB->size()) {
1934 Mismatch = true;
1935 break;
1936 }
1937
1938 BasicBlock::iterator NIt = OutputBB->begin();
1939 for (Instruction &I : *CompBB) {
1941 continue;
1942
1943 if (!I.isIdenticalTo(&(*NIt))) {
1944 Mismatch = true;
1945 break;
1946 }
1947
1948 NIt++;
1949 }
1950 }
1951
1952 if (!Mismatch)
1953 return MatchingNum;
1954
1955 MatchingNum++;
1956 }
1957
1958 return std::nullopt;
1959}
1960
1961/// Remove empty output blocks from the outlined region.
1962///
1963/// \param BlocksToPrune - Mapping of return values output blocks for the \p
1964/// Region.
1965/// \param Region - The OutlinableRegion we are analyzing.
1966static bool
1969 bool AllRemoved = true;
1970 Value *RetValueForBB;
1971 BasicBlock *NewBB;
1973 // Iterate over the output blocks created in the outlined section.
1974 for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
1975 RetValueForBB = VtoBB.first;
1976 NewBB = VtoBB.second;
1977
1978 // If there are no instructions, we remove it from the module, and also
1979 // mark the value for removal from the return value to output block mapping.
1980 if (NewBB->size() == 0) {
1981 NewBB->eraseFromParent();
1982 ToRemove.push_back(RetValueForBB);
1983 continue;
1984 }
1985
1986 // Mark that we could not remove all the blocks since they were not all
1987 // empty.
1988 AllRemoved = false;
1989 }
1990
1991 // Remove the return value from the mapping.
1992 for (Value *V : ToRemove)
1993 BlocksToPrune.erase(V);
1994
1995 // Mark the region as having the no output scheme.
1996 if (AllRemoved)
1997 Region.OutputBlockNum = -1;
1998
1999 return AllRemoved;
2000}
2001
2002/// For the outlined section, move needed the StoreInsts for the output
2003/// registers into their own block. Then, determine if there is a duplicate
2004/// output block already created.
2005///
2006/// \param [in] OG - The OutlinableGroup of regions to be outlined.
2007/// \param [in] Region - The OutlinableRegion that is being analyzed.
2008/// \param [in,out] OutputBBs - the blocks that stores for this region will be
2009/// placed in.
2010/// \param [in] EndBBs - the final blocks of the extracted function.
2011/// \param [in] OutputMappings - OutputMappings the mapping of values that have
2012/// been replaced by a new output value.
2013/// \param [in,out] OutputStoreBBs - The existing output blocks.
2018 const DenseMap<Value *, Value *> &OutputMappings,
2019 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2020 // If none of the output blocks have any instructions, this means that we do
2021 // not have to determine if it matches any of the other output schemes, and we
2022 // don't have to do anything else.
2023 if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
2024 return;
2025
2026 // Determine is there is a duplicate set of blocks.
2027 std::optional<unsigned> MatchingBB =
2028 findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
2029
2030 // If there is, we remove the new output blocks. If it does not,
2031 // we add it to our list of sets of output blocks.
2032 if (MatchingBB) {
2033 LLVM_DEBUG(dbgs() << "Set output block for region in function"
2034 << Region.ExtractedFunction << " to " << *MatchingBB);
2035
2036 Region.OutputBlockNum = *MatchingBB;
2037 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
2038 VtoBB.second->eraseFromParent();
2039 return;
2040 }
2041
2042 Region.OutputBlockNum = OutputStoreBBs.size();
2043
2044 Value *RetValueForBB;
2045 BasicBlock *NewBB;
2046 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2047 for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
2048 RetValueForBB = VtoBB.first;
2049 NewBB = VtoBB.second;
2050 auto VBBIt = EndBBs.find(RetValueForBB);
2051 LLVM_DEBUG(dbgs() << "Create output block for region in"
2052 << Region.ExtractedFunction << " to "
2053 << *NewBB);
2054 UncondBrInst::Create(VBBIt->second, NewBB);
2055 OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
2056 }
2057}
2058
2059/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
2060/// before creating a basic block for each \p NewMap, and inserting into the new
2061/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
2062///
2063/// \param OldMap [in] - The mapping to base the new mapping off of.
2064/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
2065/// \param ParentFunc [in] - The function to put the new basic block in.
2066/// \param BaseName [in] - The start of the BasicBlock names to be appended to
2067/// by an index value.
2070 Function *ParentFunc, Twine BaseName) {
2071 unsigned Idx = 0;
2072 std::vector<Value *> SortedKeys;
2073
2074 getSortedConstantKeys(SortedKeys, OldMap);
2075
2076 for (Value *RetVal : SortedKeys) {
2078 ParentFunc->getContext(), Twine(BaseName) + Twine("_") + Twine(Idx++),
2079 ParentFunc);
2080 NewMap.insert(std::make_pair(RetVal, NewBB));
2081 }
2082}
2083
2084/// Create the switch statement for outlined function to differentiate between
2085/// all the output blocks.
2086///
2087/// For the outlined section, determine if an outlined block already exists that
2088/// matches the needed stores for the extracted section.
2089/// \param [in] M - The module we are outlining from.
2090/// \param [in] OG - The group of regions to be outlined.
2091/// \param [in] EndBBs - The final blocks of the extracted function.
2092/// \param [in,out] OutputStoreBBs - The existing output blocks.
2095 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
2096 // We only need the switch statement if there is more than one store
2097 // combination, or there is more than one set of output blocks. The first
2098 // will occur when we store different sets of values for two different
2099 // regions. The second will occur when we have two outputs that are combined
2100 // in a PHINode outside of the region in one outlined instance, and are used
2101 // seaparately in another. This will create the same set of OutputGVNs, but
2102 // will generate two different output schemes.
2103 if (OG.OutputGVNCombinations.size() > 1) {
2104 Function *AggFunc = OG.OutlinedFunction;
2105 // Create a final block for each different return block.
2107 createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
2108
2109 for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
2110 std::pair<Value *, BasicBlock *> &OutputBlock =
2111 *OG.EndBBs.find(RetBlockPair.first);
2112 BasicBlock *ReturnBlock = RetBlockPair.second;
2113 BasicBlock *EndBB = OutputBlock.second;
2114 Instruction *Term = EndBB->getTerminator();
2115 // Move the return value to the final block instead of the original exit
2116 // stub.
2117 Term->moveBefore(*ReturnBlock, ReturnBlock->end());
2118 // Put the switch statement in the old end basic block for the function
2119 // with a fall through to the new return block.
2120 LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
2121 << OutputStoreBBs.size() << "\n");
2122 SwitchInst *SwitchI =
2123 SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
2124 ReturnBlock, OutputStoreBBs.size(), EndBB);
2125
2126 unsigned Idx = 0;
2127 for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
2128 auto OSBBIt = OutputStoreBB.find(OutputBlock.first);
2129
2130 if (OSBBIt == OutputStoreBB.end())
2131 continue;
2132
2133 BasicBlock *BB = OSBBIt->second;
2134 SwitchI->addCase(
2135 ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
2136 Term = BB->getTerminator();
2137 Term->setSuccessor(0, ReturnBlock);
2138 Idx++;
2139 }
2140 }
2141 return;
2142 }
2143
2144 assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
2145
2146 // If there needs to be stores, move them from the output blocks to their
2147 // corresponding ending block. We do not check that the OutputGVNCombinations
2148 // is equal to 1 here since that could just been the case where there are 0
2149 // outputs. Instead, we check whether there is more than one set of output
2150 // blocks since this is the only case where we would have to move the
2151 // stores, and erase the extraneous blocks.
2152 if (OutputStoreBBs.size() == 1) {
2153 LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
2154 << *OG.OutlinedFunction << "\n");
2155 DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
2156 for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
2157 auto EndBBIt = EndBBs.find(VBPair.first);
2158 assert(EndBBIt != EndBBs.end() && "Could not find end block");
2159 BasicBlock *EndBB = EndBBIt->second;
2160 BasicBlock *OutputBB = VBPair.second;
2161 Instruction *Term = OutputBB->getTerminator();
2162 Term->eraseFromParent();
2163 Term = EndBB->getTerminator();
2164 moveBBContents(*OutputBB, *EndBB);
2165 Term->moveBefore(*EndBB, EndBB->end());
2166 OutputBB->eraseFromParent();
2167 }
2168 }
2169}
2170
2171void IROutliner::fillOverallFunction(
2172 Module &M, OutlinableGroup &CurrentGroup,
2173 std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
2174 std::vector<Function *> &FuncsToRemove) {
2175 OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
2176
2177 TargetTransformInfo &TTI = getTTI(*CurrentOS->StartBB->getParent());
2178
2179 // Move first extracted function's instructions into new function.
2180 LLVM_DEBUG(dbgs() << "Move instructions from "
2181 << *CurrentOS->ExtractedFunction << " to instruction "
2182 << *CurrentGroup.OutlinedFunction << "\n");
2184 *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
2185
2186 // Transfer the attributes from the function to the new function.
2187 for (Attribute A :
2188 CurrentOS->ExtractedFunction->getAttributes().getFnAttrs()) {
2190 A))
2191 continue;
2192
2193 CurrentGroup.OutlinedFunction->addFnAttr(A);
2194 }
2195
2196 // Create a new set of output blocks for the first extracted function.
2197 DenseMap<Value *, BasicBlock *> NewBBs;
2198 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2199 CurrentGroup.OutlinedFunction, "output_block_0");
2200 CurrentOS->OutputBlockNum = 0;
2201
2202 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
2203 replaceConstants(*CurrentOS);
2204
2205 // We first identify if any output blocks are empty, if they are we remove
2206 // them. We then create a branch instruction to the basic block to the return
2207 // block for the function for each non empty output block.
2208 if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
2209 OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
2210 for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
2211 auto VBBIt = CurrentGroup.EndBBs.find(VToBB.first);
2212 BasicBlock *EndBB = VBBIt->second;
2213 UncondBrInst::Create(EndBB, VToBB.second);
2214 OutputStoreBBs.back().insert(VToBB);
2215 }
2216 }
2217
2218 // Replace the call to the extracted function with the outlined function.
2219 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2220
2221 // We only delete the extracted functions at the end since we may need to
2222 // reference instructions contained in them for mapping purposes.
2223 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2224}
2225
2226void IROutliner::deduplicateExtractedSections(
2227 Module &M, OutlinableGroup &CurrentGroup,
2228 std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
2229 createFunction(M, CurrentGroup, OutlinedFunctionNum);
2230
2231 std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
2232
2233 OutlinableRegion *CurrentOS;
2234
2235 fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
2236
2237 for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
2238 CurrentOS = CurrentGroup.Regions[Idx];
2239 AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
2240 *CurrentOS->ExtractedFunction);
2241
2242 // Create a set of BasicBlocks, one for each return block, to hold the
2243 // needed store instructions.
2244 DenseMap<Value *, BasicBlock *> NewBBs;
2245 createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
2246 CurrentGroup.OutlinedFunction,
2247 "output_block_" + Twine(Idx));
2248 replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
2249 alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
2250 CurrentGroup.EndBBs, OutputMappings,
2251 OutputStoreBBs);
2252
2253 CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
2254 FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
2255 }
2256
2257 // Create a switch statement to handle the different output schemes.
2258 createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
2259
2260 OutlinedFunctionNum++;
2261}
2262
2263/// Checks that the next instruction in the InstructionDataList matches the
2264/// next instruction in the module. If they do not, there could be the
2265/// possibility that extra code has been inserted, and we must ignore it.
2266///
2267/// \param ID - The IRInstructionData to check the next instruction of.
2268/// \returns true if the InstructionDataList and actual instruction match.
2270 // We check if there is a discrepancy between the InstructionDataList
2271 // and the actual next instruction in the module. If there is, it means
2272 // that an extra instruction was added, likely by the CodeExtractor.
2273
2274 // Since we do not have any similarity data about this particular
2275 // instruction, we cannot confidently outline it, and must discard this
2276 // candidate.
2277 IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
2278 Instruction *NextIDLInst = NextIDIt->Inst;
2279 Instruction *NextModuleInst = nullptr;
2280 if (!ID.Inst->isTerminator())
2281 NextModuleInst = ID.Inst->getNextNode();
2282 else if (NextIDLInst != nullptr)
2283 NextModuleInst = &*NextIDIt->Inst->getParent()->begin();
2284
2285 if (NextIDLInst && NextIDLInst != NextModuleInst)
2286 return false;
2287
2288 return true;
2289}
2290
2291bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
2292 const OutlinableRegion &Region) {
2293 IRSimilarityCandidate *IRSC = Region.Candidate;
2294 unsigned StartIdx = IRSC->getStartIdx();
2295 unsigned EndIdx = IRSC->getEndIdx();
2296
2297 // A check to make sure that we are not about to attempt to outline something
2298 // that has already been outlined.
2299 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2300 if (Outlined.contains(Idx))
2301 return false;
2302
2303 // We check if the recorded instruction matches the actual next instruction,
2304 // if it does not, we fix it in the InstructionDataList.
2305 if (!Region.Candidate->backInstruction()->isTerminator()) {
2306 Instruction *NewEndInst =
2307 Region.Candidate->backInstruction()->getNextNode();
2308 assert(NewEndInst && "Next instruction is a nullptr?");
2309 if (Region.Candidate->end()->Inst != NewEndInst) {
2310 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2311 IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
2312 IRInstructionData(*NewEndInst,
2313 InstructionClassifier.visit(*NewEndInst), *IDL);
2314
2315 // Insert the first IRInstructionData of the new region after the
2316 // last IRInstructionData of the IRSimilarityCandidate.
2317 IDL->insert(Region.Candidate->end(), *NewEndIRID);
2318 }
2319 }
2320
2321 return none_of(*IRSC, [this](IRInstructionData &ID) {
2323 return true;
2324
2325 return !this->InstructionClassifier.visit(ID.Inst);
2326 });
2327}
2328
2329void IROutliner::pruneIncompatibleRegions(
2330 std::vector<IRSimilarityCandidate> &CandidateVec,
2331 OutlinableGroup &CurrentGroup) {
2332 bool PreviouslyOutlined;
2333
2334 // Sort from beginning to end, so the IRSimilarityCandidates are in order.
2335 stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
2336 const IRSimilarityCandidate &RHS) {
2337 return LHS.getStartIdx() < RHS.getStartIdx();
2338 });
2339
2340 IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
2341 // Since outlining a call and a branch instruction will be the same as only
2342 // outlinining a call instruction, we ignore it as a space saving.
2343 if (FirstCandidate.getLength() == 2) {
2344 if (isa<CallInst>(FirstCandidate.front()->Inst) &&
2345 isa<UncondBrInst, CondBrInst>(FirstCandidate.back()->Inst))
2346 return;
2347 }
2348
2349 unsigned CurrentEndIdx = 0;
2350 for (IRSimilarityCandidate &IRSC : CandidateVec) {
2351 PreviouslyOutlined = false;
2352 unsigned StartIdx = IRSC.getStartIdx();
2353 unsigned EndIdx = IRSC.getEndIdx();
2354 const Function &FnForCurrCand = *IRSC.getFunction();
2355
2356 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2357 if (Outlined.contains(Idx)) {
2358 PreviouslyOutlined = true;
2359 break;
2360 }
2361
2362 if (PreviouslyOutlined)
2363 continue;
2364
2365 // Check over the instructions, and if the basic block has its address
2366 // taken for use somewhere else, we do not outline that block.
2367 bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
2368 return ID.Inst->getParent()->hasAddressTaken();
2369 });
2370
2371 if (BBHasAddressTaken)
2372 continue;
2373
2374 if (FnForCurrCand.hasOptNone())
2375 continue;
2376
2377 if (FnForCurrCand.hasFnAttribute(Attribute::NoOutline)) {
2378 LLVM_DEBUG({
2379 dbgs() << "... Skipping function with nooutline attribute: "
2380 << FnForCurrCand.getName() << "\n";
2381 });
2382 continue;
2383 }
2384
2385 if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
2386 !OutlineFromLinkODRs)
2387 continue;
2388
2389 // Greedily prune out any regions that will overlap with already chosen
2390 // regions.
2391 if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
2392 continue;
2393
2394 bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
2396 return true;
2397
2398 return !this->InstructionClassifier.visit(ID.Inst);
2399 });
2400
2401 if (BadInst)
2402 continue;
2403
2404 OutlinableRegion *OS = new (RegionAllocator.Allocate())
2405 OutlinableRegion(IRSC, CurrentGroup);
2406 CurrentGroup.Regions.push_back(OS);
2407
2408 CurrentEndIdx = EndIdx;
2409 }
2410}
2411
2413IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
2414 InstructionCost RegionBenefit = 0;
2415 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2416 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2417 // We add the number of instructions in the region to the benefit as an
2418 // estimate as to how much will be removed.
2419 RegionBenefit += Region->getBenefit(TTI);
2420 LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit
2421 << " saved instructions to overfall benefit.\n");
2422 }
2423
2424 return RegionBenefit;
2425}
2426
2427/// For the \p OutputCanon number passed in find the value represented by this
2428/// canonical number. If it is from a PHINode, we pick the first incoming
2429/// value and return that Value instead.
2430///
2431/// \param Region - The OutlinableRegion to get the Value from.
2432/// \param OutputCanon - The canonical number to find the Value from.
2433/// \returns The Value represented by a canonical number \p OutputCanon in \p
2434/// Region.
2436 unsigned OutputCanon) {
2437 OutlinableGroup &CurrentGroup = *Region.Parent;
2438 // If the value is greater than the value in the tracker, we have a
2439 // PHINode and will instead use one of the incoming values to find the
2440 // type.
2441 if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
2442 auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
2443 assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
2444 "Could not find GVN set for PHINode number!");
2445 assert(It->second.second.size() > 0 && "PHINode does not have any values!");
2446 OutputCanon = *It->second.second.begin();
2447 }
2448 std::optional<unsigned> OGVN =
2449 Region.Candidate->fromCanonicalNum(OutputCanon);
2450 assert(OGVN && "Could not find GVN for Canonical Number?");
2451 std::optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
2452 assert(OV && "Could not find value for GVN?");
2453 return *OV;
2454}
2455
2457IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
2458 InstructionCost OverallCost = 0;
2459 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2460 TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
2461
2462 // Each output incurs a load after the call, so we add that to the cost.
2463 for (unsigned OutputCanon : Region->GVNStores) {
2464 Value *V = findOutputValueInRegion(*Region, OutputCanon);
2465 InstructionCost LoadCost =
2466 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2468
2469 LLVM_DEBUG(dbgs() << "Adding: " << LoadCost
2470 << " instructions to cost for output of type "
2471 << *V->getType() << "\n");
2472 OverallCost += LoadCost;
2473 }
2474 }
2475
2476 return OverallCost;
2477}
2478
2479/// Find the extra instructions needed to handle any output values for the
2480/// region.
2481///
2482/// \param [in] M - The Module to outline from.
2483/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze.
2484/// \param [in] TTI - The TargetTransformInfo used to collect information for
2485/// new instruction costs.
2486/// \returns the additional cost to handle the outputs.
2488 OutlinableGroup &CurrentGroup,
2490 InstructionCost OutputCost = 0;
2491 unsigned NumOutputBranches = 0;
2492
2493 OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
2494 IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
2495 DenseSet<BasicBlock *> CandidateBlocks;
2496 Candidate.getBasicBlocks(CandidateBlocks);
2497
2498 // Count the number of different output branches that point to blocks outside
2499 // of the region.
2500 DenseSet<BasicBlock *> FoundBlocks;
2501 for (IRInstructionData &ID : Candidate) {
2503 continue;
2504
2505 for (Value *V : ID.OperVals) {
2506 BasicBlock *BB = static_cast<BasicBlock *>(V);
2507 if (!CandidateBlocks.contains(BB) && FoundBlocks.insert(BB).second)
2508 NumOutputBranches++;
2509 }
2510 }
2511
2512 CurrentGroup.BranchesToOutside = NumOutputBranches;
2513
2514 for (const ArrayRef<unsigned> &OutputUse :
2515 CurrentGroup.OutputGVNCombinations) {
2516 for (unsigned OutputCanon : OutputUse) {
2517 Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
2518 InstructionCost StoreCost =
2519 TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
2521
2522 // An instruction cost is added for each store set that needs to occur for
2523 // various output combinations inside the function, plus a branch to
2524 // return to the exit block.
2525 LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
2526 << " instructions to cost for output of type "
2527 << *V->getType() << "\n");
2528 OutputCost += StoreCost * NumOutputBranches;
2529 }
2530
2531 InstructionCost BranchCost = TTI.getCFInstrCost(
2532 Instruction::UncondBr, TargetTransformInfo::TCK_CodeSize);
2533 LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
2534 << " a branch instruction\n");
2535 OutputCost += BranchCost * NumOutputBranches;
2536 }
2537
2538 // If there is more than one output scheme, we must have a comparison and
2539 // branch for each different item in the switch statement.
2540 if (CurrentGroup.OutputGVNCombinations.size() > 1) {
2541 InstructionCost ComparisonCost = TTI.getCmpSelInstrCost(
2542 Instruction::ICmp, Type::getInt32Ty(M.getContext()),
2545 InstructionCost BranchCost = TTI.getCFInstrCost(
2546 Instruction::CondBr, TargetTransformInfo::TCK_CodeSize);
2547
2548 unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size();
2549 InstructionCost TotalCost = ComparisonCost * BranchCost * DifferentBlocks;
2550
2551 LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
2552 << " instructions for each switch case for each different"
2553 << " output path in a function\n");
2554 OutputCost += TotalCost * NumOutputBranches;
2555 }
2556
2557 return OutputCost;
2558}
2559
2560void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) {
2561 InstructionCost RegionBenefit = findBenefitFromAllRegions(CurrentGroup);
2562 CurrentGroup.Benefit += RegionBenefit;
2563 LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n");
2564
2565 InstructionCost OutputReloadCost = findCostOutputReloads(CurrentGroup);
2566 CurrentGroup.Cost += OutputReloadCost;
2567 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2568
2569 InstructionCost AverageRegionBenefit =
2570 RegionBenefit / CurrentGroup.Regions.size();
2571 unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size();
2572 unsigned NumRegions = CurrentGroup.Regions.size();
2573 TargetTransformInfo &TTI =
2574 getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction());
2575
2576 // We add one region to the cost once, to account for the instructions added
2577 // inside of the newly created function.
2578 LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit
2579 << " instructions to cost for body of new function.\n");
2580 CurrentGroup.Cost += AverageRegionBenefit;
2581 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2582
2583 // For each argument, we must add an instruction for loading the argument
2584 // out of the register and into a value inside of the newly outlined function.
2585 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2586 << " instructions to cost for each argument in the new"
2587 << " function.\n");
2588 CurrentGroup.Cost +=
2589 OverallArgumentNum * TargetTransformInfo::TCC_Basic;
2590 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2591
2592 // Each argument needs to either be loaded into a register or onto the stack.
2593 // Some arguments will only be loaded into the stack once the argument
2594 // registers are filled.
2595 LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum
2596 << " instructions to cost for each argument in the new"
2597 << " function " << NumRegions << " times for the "
2598 << "needed argument handling at the call site.\n");
2599 CurrentGroup.Cost +=
2600 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions;
2601 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2602
2603 CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI);
2604 LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n");
2605}
2606
2607void IROutliner::updateOutputMapping(OutlinableRegion &Region,
2608 ArrayRef<Value *> Outputs,
2609 LoadInst *LI) {
2610 // For and load instructions following the call
2611 Value *Operand = LI->getPointerOperand();
2612 std::optional<unsigned> OutputIdx;
2613 // Find if the operand it is an output register.
2614 for (unsigned ArgIdx = Region.NumExtractedInputs;
2615 ArgIdx < Region.Call->arg_size(); ArgIdx++) {
2616 if (Operand == Region.Call->getArgOperand(ArgIdx)) {
2617 OutputIdx = ArgIdx - Region.NumExtractedInputs;
2618 break;
2619 }
2620 }
2621
2622 // If we found an output register, place a mapping of the new value
2623 // to the original in the mapping.
2624 if (!OutputIdx)
2625 return;
2626
2627 auto It = OutputMappings.find(Outputs[*OutputIdx]);
2628 if (It == OutputMappings.end()) {
2629 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
2630 << *Outputs[*OutputIdx] << "\n");
2631 OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
2632 } else {
2633 Value *Orig = It->second;
2634 LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
2635 << *Outputs[*OutputIdx] << "\n");
2636 OutputMappings.insert(std::make_pair(LI, Orig));
2637 }
2638}
2639
2640bool IROutliner::extractSection(OutlinableRegion &Region) {
2641 SetVector<Value *> ArgInputs, Outputs;
2642 assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
2643 BasicBlock *InitialStart = Region.StartBB;
2644 Function *OrigF = Region.StartBB->getParent();
2645 CodeExtractorAnalysisCache CEAC(*OrigF);
2646 Region.ExtractedFunction =
2647 Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
2648
2649 // If the extraction was successful, find the BasicBlock, and reassign the
2650 // OutlinableRegion blocks
2651 if (!Region.ExtractedFunction) {
2652 LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
2653 << "\n");
2654 Region.reattachCandidate();
2655 return false;
2656 }
2657
2658 // Get the block containing the called branch, and reassign the blocks as
2659 // necessary. If the original block still exists, it is because we ended on
2660 // a branch instruction, and so we move the contents into the block before
2661 // and assign the previous block correctly.
2662 User *InstAsUser = Region.ExtractedFunction->user_back();
2663 BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
2664 Region.PrevBB = RewrittenBB->getSinglePredecessor();
2665 assert(Region.PrevBB && "PrevBB is nullptr?");
2666 if (Region.PrevBB == InitialStart) {
2667 BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
2668 Instruction *BI = NewPrev->getTerminator();
2669 BI->eraseFromParent();
2670 moveBBContents(*InitialStart, *NewPrev);
2671 Region.PrevBB = NewPrev;
2672 InitialStart->eraseFromParent();
2673 }
2674
2675 Region.StartBB = RewrittenBB;
2676 Region.EndBB = RewrittenBB;
2677
2678 // The sequences of outlinable regions has now changed. We must fix the
2679 // IRInstructionDataList for consistency. Although they may not be illegal
2680 // instructions, they should not be compared with anything else as they
2681 // should not be outlined in this round. So marking these as illegal is
2682 // allowed.
2683 IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
2684 Instruction *BeginRewritten = &*RewrittenBB->begin();
2685 Instruction *EndRewritten = &*RewrittenBB->begin();
2686 Region.NewFront = new (InstDataAllocator.Allocate()) IRInstructionData(
2687 *BeginRewritten, InstructionClassifier.visit(*BeginRewritten), *IDL);
2688 Region.NewBack = new (InstDataAllocator.Allocate()) IRInstructionData(
2689 *EndRewritten, InstructionClassifier.visit(*EndRewritten), *IDL);
2690
2691 // Insert the first IRInstructionData of the new region in front of the
2692 // first IRInstructionData of the IRSimilarityCandidate.
2693 IDL->insert(Region.Candidate->begin(), *Region.NewFront);
2694 // Insert the first IRInstructionData of the new region after the
2695 // last IRInstructionData of the IRSimilarityCandidate.
2696 IDL->insert(Region.Candidate->end(), *Region.NewBack);
2697 // Remove the IRInstructionData from the IRSimilarityCandidate.
2698 IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
2699
2700 assert(RewrittenBB != nullptr &&
2701 "Could not find a predecessor after extraction!");
2702
2703 // Iterate over the new set of instructions to find the new call
2704 // instruction.
2705 for (Instruction &I : *RewrittenBB)
2706 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
2707 if (Region.ExtractedFunction == CI->getCalledFunction())
2708 Region.Call = CI;
2709 } else if (LoadInst *LI = dyn_cast<LoadInst>(&I))
2710 updateOutputMapping(Region, Outputs.getArrayRef(), LI);
2711 Region.reattachCandidate();
2712 return true;
2713}
2714
2715unsigned IROutliner::doOutline(Module &M) {
2716 // Find the possible similarity sections.
2717 InstructionClassifier.EnableBranches = !DisableBranches;
2718 InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
2719 InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
2720
2721 IRSimilarityIdentifier &Identifier = getIRSI(M);
2722 SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
2723
2724 // Sort them by size of extracted sections
2725 unsigned OutlinedFunctionNum = 0;
2726 // If we only have one SimilarityGroup in SimilarityCandidates, we do not have
2727 // to sort them by the potential number of instructions to be outlined
2728 if (SimilarityCandidates.size() > 1)
2729 llvm::stable_sort(SimilarityCandidates,
2730 [](const std::vector<IRSimilarityCandidate> &LHS,
2731 const std::vector<IRSimilarityCandidate> &RHS) {
2732 return LHS[0].getLength() * LHS.size() >
2733 RHS[0].getLength() * RHS.size();
2734 });
2735 // Creating OutlinableGroups for each SimilarityCandidate to be used in
2736 // each of the following for loops to avoid making an allocator.
2737 std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
2738
2739 DenseSet<unsigned> NotSame;
2740 std::vector<OutlinableGroup *> NegativeCostGroups;
2741 std::vector<OutlinableRegion *> OutlinedRegions;
2742 // Iterate over the possible sets of similarity.
2743 unsigned PotentialGroupIdx = 0;
2744 for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
2745 OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
2746
2747 // Remove entries that were previously outlined
2748 pruneIncompatibleRegions(CandidateVec, CurrentGroup);
2749
2750 // We pruned the number of regions to 0 to 1, meaning that it's not worth
2751 // trying to outlined since there is no compatible similar instance of this
2752 // code.
2753 if (CurrentGroup.Regions.size() < 2)
2754 continue;
2755
2756 // Determine if there are any values that are the same constant throughout
2757 // each section in the set.
2758 NotSame.clear();
2759 CurrentGroup.findSameConstants(NotSame);
2760
2761 if (CurrentGroup.IgnoreGroup)
2762 continue;
2763
2764 // Create a CodeExtractor for each outlinable region. Identify inputs and
2765 // outputs for each section using the code extractor and create the argument
2766 // types for the Aggregate Outlining Function.
2767 OutlinedRegions.clear();
2768 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2769 // Break the outlinable region out of its parent BasicBlock into its own
2770 // BasicBlocks (see function implementation).
2771 OS->splitCandidate();
2772
2773 // There's a chance that when the region is split, extra instructions are
2774 // added to the region. This makes the region no longer viable
2775 // to be split, so we ignore it for outlining.
2776 if (!OS->CandidateSplit)
2777 continue;
2778
2780 DenseSet<BasicBlock *> BlocksInRegion;
2781 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2782 OS->CE = new (ExtractorAllocator.Allocate())
2783 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2784 false, nullptr, {}, "outlined");
2785 findAddInputsOutputs(M, *OS, NotSame);
2786 if (!OS->IgnoreRegion)
2787 OutlinedRegions.push_back(OS);
2788
2789 // We recombine the blocks together now that we have gathered all the
2790 // needed information.
2791 OS->reattachCandidate();
2792 }
2793
2794 CurrentGroup.Regions = std::move(OutlinedRegions);
2795
2796 if (CurrentGroup.Regions.empty())
2797 continue;
2798
2799 CurrentGroup.collectGVNStoreSets(M);
2800
2801 if (CostModel)
2802 findCostBenefit(M, CurrentGroup);
2803
2804 // If we are adhering to the cost model, skip those groups where the cost
2805 // outweighs the benefits.
2806 if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
2807 OptimizationRemarkEmitter &ORE =
2808 getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
2809 ORE.emit([&]() {
2810 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2811 OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
2812 C->frontInstruction());
2813 R << "did not outline "
2814 << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2815 << " regions due to estimated increase of "
2816 << ore::NV("InstructionIncrease",
2817 CurrentGroup.Cost - CurrentGroup.Benefit)
2818 << " instructions at locations ";
2819 interleave(
2820 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2821 [&R](OutlinableRegion *Region) {
2822 R << ore::NV(
2823 "DebugLoc",
2824 Region->Candidate->frontInstruction()->getDebugLoc());
2825 },
2826 [&R]() { R << " "; });
2827 return R;
2828 });
2829 continue;
2830 }
2831
2832 NegativeCostGroups.push_back(&CurrentGroup);
2833 }
2834
2835 ExtractorAllocator.DestroyAll();
2836
2837 if (NegativeCostGroups.size() > 1)
2838 stable_sort(NegativeCostGroups,
2839 [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
2840 return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
2841 });
2842
2843 std::vector<Function *> FuncsToRemove;
2844 for (OutlinableGroup *CG : NegativeCostGroups) {
2845 OutlinableGroup &CurrentGroup = *CG;
2846
2847 OutlinedRegions.clear();
2848 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2849 // We check whether our region is compatible with what has already been
2850 // outlined, and whether we need to ignore this item.
2851 if (!isCompatibleWithAlreadyOutlinedCode(*Region))
2852 continue;
2853 OutlinedRegions.push_back(Region);
2854 }
2855
2856 if (OutlinedRegions.size() < 2)
2857 continue;
2858
2859 // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
2860 // we are still outlining enough regions to make up for the added cost.
2861 CurrentGroup.Regions = std::move(OutlinedRegions);
2862 if (CostModel) {
2863 CurrentGroup.Benefit = 0;
2864 CurrentGroup.Cost = 0;
2865 findCostBenefit(M, CurrentGroup);
2866 if (CurrentGroup.Cost >= CurrentGroup.Benefit)
2867 continue;
2868 }
2869 OutlinedRegions.clear();
2870 for (OutlinableRegion *Region : CurrentGroup.Regions) {
2871 Region->splitCandidate();
2872 if (!Region->CandidateSplit)
2873 continue;
2874 OutlinedRegions.push_back(Region);
2875 }
2876
2877 CurrentGroup.Regions = std::move(OutlinedRegions);
2878 if (CurrentGroup.Regions.size() < 2) {
2879 for (OutlinableRegion *R : CurrentGroup.Regions)
2880 R->reattachCandidate();
2881 continue;
2882 }
2883
2884 LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
2885 << " and benefit " << CurrentGroup.Benefit << "\n");
2886
2887 // Create functions out of all the sections, and mark them as outlined.
2888 OutlinedRegions.clear();
2889 for (OutlinableRegion *OS : CurrentGroup.Regions) {
2891 DenseSet<BasicBlock *> BlocksInRegion;
2892 OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
2893 OS->CE = new (ExtractorAllocator.Allocate())
2894 CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
2895 false, nullptr, {}, "outlined");
2896 bool FunctionOutlined = extractSection(*OS);
2897 if (FunctionOutlined) {
2898 unsigned StartIdx = OS->Candidate->getStartIdx();
2899 unsigned EndIdx = OS->Candidate->getEndIdx();
2900 for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
2901 Outlined.insert(Idx);
2902
2903 OutlinedRegions.push_back(OS);
2904 }
2905 }
2906
2907 LLVM_DEBUG(dbgs() << "Outlined " << OutlinedRegions.size()
2908 << " with benefit " << CurrentGroup.Benefit
2909 << " and cost " << CurrentGroup.Cost << "\n");
2910
2911 CurrentGroup.Regions = std::move(OutlinedRegions);
2912
2913 if (CurrentGroup.Regions.empty())
2914 continue;
2915
2916 OptimizationRemarkEmitter &ORE =
2917 getORE(*CurrentGroup.Regions[0]->Call->getFunction());
2918 ORE.emit([&]() {
2919 IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
2920 OptimizationRemark R(DEBUG_TYPE, "Outlined", C->front()->Inst);
2921 R << "outlined " << ore::NV(std::to_string(CurrentGroup.Regions.size()))
2922 << " regions with decrease of "
2923 << ore::NV("Benefit", CurrentGroup.Benefit - CurrentGroup.Cost)
2924 << " instructions at locations ";
2925 interleave(
2926 CurrentGroup.Regions.begin(), CurrentGroup.Regions.end(),
2927 [&R](OutlinableRegion *Region) {
2928 R << ore::NV("DebugLoc",
2929 Region->Candidate->frontInstruction()->getDebugLoc());
2930 },
2931 [&R]() { R << " "; });
2932 return R;
2933 });
2934
2935 deduplicateExtractedSections(M, CurrentGroup, FuncsToRemove,
2936 OutlinedFunctionNum);
2937 }
2938
2939 for (Function *F : FuncsToRemove)
2940 F->eraseFromParent();
2941
2942 return OutlinedFunctionNum;
2943}
2944
2946 CostModel = !NoCostModel;
2947 OutlineFromLinkODRs = EnableLinkOnceODRIROutlining;
2948
2949 return doOutline(M) > 0;
2950}
2951
2953 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2954
2955 std::function<TargetTransformInfo &(Function &)> GTTI =
2956 [&FAM](Function &F) -> TargetTransformInfo & {
2957 return FAM.getResult<TargetIRAnalysis>(F);
2958 };
2959
2960 std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
2961 [&AM](Module &M) -> IRSimilarityIdentifier & {
2962 return AM.getResult<IRSimilarityAnalysis>(M);
2963 };
2964
2965 std::unique_ptr<OptimizationRemarkEmitter> ORE;
2966 std::function<OptimizationRemarkEmitter &(Function &)> GORE =
2967 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
2968 ORE.reset(new OptimizationRemarkEmitter(&F));
2969 return *ORE;
2970 };
2971
2972 if (IROutliner(GTTI, GIRSI, GORE).run(M))
2973 return PreservedAnalyses::none();
2974 return PreservedAnalyses::all();
2975}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
ReachingDefInfo InstSet & ToRemove
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
std::pair< ArgLocWithBBCanon, CanonList > PHINodeData
static void remapExtractedInputs(const ArrayRef< Value * > ArgInputs, const DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &RemappedArgInputs)
Find the original value for the ArgInput values if any one of them was replaced during a previous ext...
static std::optional< unsigned > getGVNForPHINode(OutlinableRegion &Region, PHINode *PN, DenseSet< BasicBlock * > &Blocks, unsigned AggArgIdx)
Create a special GVN for PHINodes that will be used outside of the region.
static Value * getPassedArgumentAndAdjustArgumentLocation(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
static void findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region, const DenseMap< Value *, Value * > &OutputMappings, SmallVector< std::pair< unsigned, BasicBlock * > > &CanonNums, bool ReplacedWithOutlinedCall=true)
Find the canonical numbering for the incoming Values into the PHINode PN.
static cl::opt< bool > NoCostModel("ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, cl::desc("Debug option to outline greedily, without restriction that " "calculated benefit outweighs cost"))
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB)
Move the contents of SourceBB to before the last instruction of TargetBB.
static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID)
Checks that the next instruction in the InstructionDataList matches the next instruction in the modul...
static void moveFunctionData(Function &Old, Function &New, DenseMap< Value *, BasicBlock * > &NewEnds)
Move each BasicBlock in Old to New.
static cl::opt< bool > EnableLinkOnceODRIROutlining("enable-linkonceodr-ir-outlining", cl::Hidden, cl::desc("Enable the IR outliner on linkonceodr functions"), cl::init(false))
static void getCodeExtractorArguments(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, DenseSet< unsigned > &NotSame, DenseMap< Value *, Value * > &OutputMappings, SetVector< Value * > &ArgInputs, SetVector< Value * > &Outputs)
Find the input GVNs and the output values for a region of Instructions.
static void findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region, SetVector< Value * > &Outputs)
Create a mapping of the output arguments for the Region to the output arguments of the overall outlin...
static void alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, DenseMap< Value *, BasicBlock * > &EndBBs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
For the outlined section, move needed the StoreInsts for the output registers into their own block.
static bool collectRegionsConstants(OutlinableRegion &Region, DenseMap< unsigned, Constant * > &GVNToConstant, DenseSet< unsigned > &NotSame)
Find whether Region matches the global value numbering to Constant mapping found so far.
static PHINode * findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, const DenseMap< Value *, Value * > &OutputMappings, DenseSet< PHINode * > &UsedPHIs)
Find, or add PHINode PN to the combined PHINode Block OverallPHIBlock in order to condense the number...
static bool analyzeAndPruneOutputBlocks(DenseMap< Value *, BasicBlock * > &BlocksToPrune, OutlinableRegion &Region)
Remove empty output blocks from the outlined region.
static hash_code encodePHINodeData(PHINodeData &PND)
Encode PND as an integer for easy lookup based on the argument location, the parent BasicBlock canoni...
SmallVector< unsigned, 2 > CanonList
CallInst * replaceCalledFunction(Module &M, OutlinableRegion &Region)
Replace the extracted function in the Region with a call to the overall function constructed from the...
static void createAndInsertBasicBlocks(DenseMap< Value *, BasicBlock * > &OldMap, DenseMap< Value *, BasicBlock * > &NewMap, Function *ParentFunc, Twine BaseName)
Takes in a mapping, OldMap of ConstantValues to BasicBlocks, sorts keys, before creating a basic bloc...
static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN, SmallPtrSet< BasicBlock *, 1 > &Exits, DenseSet< BasicBlock * > &BlocksInRegion)
Check if the V has any uses outside of the region other than PN.
static void analyzeExitPHIsForOutputUses(BasicBlock *CurrentExitFromRegion, SmallPtrSet< BasicBlock *, 1 > &PotentialExitsFromRegion, DenseSet< BasicBlock * > &RegionBlocks, SetVector< Value * > &Outputs, DenseSet< Value * > &OutputsReplacedByPHINode, DenseSet< Value * > &OutputsWithNonPhiUses)
Test whether CurrentExitFromRegion contains any PhiNodes that should be considered outputs.
static void getSortedConstantKeys(std::vector< Value * > &SortedKeys, DenseMap< Value *, BasicBlock * > &Map)
A function to sort the keys of Map, which must be a mapping of constant values to basic blocks and re...
static InstructionCost findCostForOutputBlocks(Module &M, OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI)
Find the extra instructions needed to handle any output values for the region.
static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find, BasicBlock *Replace, DenseSet< BasicBlock * > &Included)
Rewrite the BranchInsts in the incoming blocks to PHIBlock that are found in Included to branch to Ba...
static void findExtractedInputToOverallInputMapping(OutlinableRegion &Region, std::vector< unsigned > &InputGVNs, SetVector< Value * > &ArgInputs)
Look over the inputs and map each input argument to an argument in the overall function for the Outli...
static void mapInputsToGVNs(IRSimilarityCandidate &C, SetVector< Value * > &CurrentInputs, const DenseMap< Value *, Value * > &OutputMappings, std::vector< unsigned > &EndInputNumbers)
Find the GVN for the inputs that have been found by the CodeExtractor.
static void replaceArgumentUses(OutlinableRegion &Region, DenseMap< Value *, BasicBlock * > &OutputBBs, const DenseMap< Value *, Value * > &OutputMappings, bool FirstFunction=false)
static DISubprogram * getSubprogramOrNull(OutlinableGroup &Group)
Get the subprogram if it exists for one of the outlined regions.
static Value * findOutputMapping(const DenseMap< Value *, Value * > OutputMappings, Value *Input)
Check the OutputMappings structure for value Input, if it exists it has been used as an output for ou...
static Value * findOutputValueInRegion(OutlinableRegion &Region, unsigned OutputCanon)
For the OutputCanon number passed in find the value represented by this canonical number.
static std::optional< bool > constantMatches(Value *V, unsigned GVN, DenseMap< unsigned, Constant * > &GVNToConstant)
Find whether V matches the Constants previously found for the GVN.
static void findConstants(IRSimilarityCandidate &C, DenseSet< unsigned > &NotSame, std::vector< unsigned > &Inputs)
Find the constants that will need to be lifted into arguments as they are not the same in each instan...
void replaceConstants(OutlinableRegion &Region)
Within an extracted function, replace the constants that need to be lifted into arguments with the ac...
std::pair< unsigned, unsigned > ArgLocWithBBCanon
static Value * getPassedArgumentInAlreadyOutlinedFunction(const Argument *A, const OutlinableRegion &Region)
For the function call now representing the Region, find the passed value to that call that represents...
void createSwitchStatement(Module &M, OutlinableGroup &OG, DenseMap< Value *, BasicBlock * > &EndBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
Create the switch statement for outlined function to differentiate between all the output blocks.
static BasicBlock * findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal)
Find or create a BasicBlock in the outlined function containing PhiBlocks for RetVal.
std::optional< unsigned > findDuplicateOutputBlock(DenseMap< Value *, BasicBlock * > &OutputBBs, std::vector< DenseMap< Value *, BasicBlock * > > &OutputStoreBBs)
It is possible that there is a basic block that already performs the same stores.
This header defines various interfaces for pass management in LLVM.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
A cache for the CodeExtractor analysis.
Utility class for extracting code into a new function.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
Definition Constant.h:43
Subprogram description. Uses SubclassData1.
static DebugLoc getDropped()
Definition DebugLoc.h:163
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:328
iterator end()
Definition DenseMap.h:81
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
void getDescendants(NodeT *R, SmallVectorImpl< NodeT * > &Result) const
Get all nodes dominated by R, including R itself.
void insertEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge insertion and update the tree.
void deleteEdge(NodeT *From, NodeT *To)
Inform the dominator tree about a CFG edge deletion and update the tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Class to represent function types.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition Function.cpp:638
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
const BasicBlock & getEntryBlock() const
Definition Function.h:809
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
const BasicBlock & back() const
Definition Function.h:862
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
size_t size() const
Definition Function.h:858
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:708
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition Function.cpp:666
size_t arg_size() const
Definition Function.h:901
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool hasLinkOnceODRLinkage() const
@ InternalLinkage
Rename collisions when linking (static functions).
Definition GlobalValue.h:60
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This class is a pass that identifies similarity in a Module, extracts instances of the similarity,...
Definition IROutliner.h:199
bool run(Module &M)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void replaceSuccessorWith(BasicBlock *OldBB, BasicBlock *NewBB)
Replace specified successor OldBB to point at the provided block.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
An instruction for reading from memory.
Value * getPointerOperand()
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
LLVM_ABI void getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, bool CannotUsePrivateLabel) const
Print the appropriate prefix and the specified global variable's name.
Definition Mangler.cpp:121
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void setIncomingBlock(unsigned i, BasicBlock *BB)
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition RegionInfo.h:362
iterator end()
Definition RegionInfo.h:559
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
A vector that has set insertion semantics.
Definition SetVector.h:57
ArrayRef< value_type > getArrayRef() const
Definition SetVector.h:91
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, InsertPosition InsertBefore=nullptr)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
@ TCK_CodeSize
Instruction code size.
LLVM_ABI bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
User * user_back()
Definition Value.h:412
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
bool erase(const ValueT &V)
Definition DenseSet.h:100
An opaque object representing a hash code.
Definition Hashing.h:78
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
iterator erase(iterator I)
Remove a node by iterator; never deletes.
iterator insert(iterator I, reference Node)
Insert a node by reference; never copies.
CallInst * Call
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
hash_code hash_value(const FixedPointSemantics &Val)
void interleave(ForwardIterator begin, ForwardIterator end, UnaryFunctor each_fn, NullaryFunctor between_fn)
An STL-style algorithm similar to std::for_each that applies a second functor between every pair of e...
Definition STLExtras.h:2274
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void RemapFunction(Function &F, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the operands, metadata, arguments, and instructions of a function.
auto predecessors(const MachineBasicBlock *BB)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:325
LLVM_ABI void updateLoopMetadataDebugLocations(Instruction &I, function_ref< Metadata *(Metadata *)> Updater)
Update the debug locations contained within the MD_loop metadata attached to the instruction I,...
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:305
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
The OutlinableGroup holds all the overarching information for outlining a set of regions that are str...
std::optional< unsigned > SwiftErrorArgument
The argument that needs to be marked with the swifterr attribute.
std::vector< Type * > ArgumentTypes
The argument types for the function created as the overall function to replace the extracted function...
DenseSet< ArrayRef< unsigned > > OutputGVNCombinations
A set containing the different GVN store sets needed.
void findSameConstants(DenseSet< unsigned > &NotSame)
For the Regions, we look at every Value.
InstructionCost Cost
The number of added instructions needed for the outlining of the Regions.
DenseMap< unsigned, std::pair< std::pair< unsigned, unsigned >, SmallVector< unsigned, 2 > > > PHINodeGVNToGVNs
unsigned PHINodeGVNTracker
Tracker counting backwards from the highest unsigned value possible to avoid conflicting with the GVN...
std::vector< OutlinableRegion * > Regions
The sections that could be outlined.
DenseMap< hash_code, unsigned > GVNsToPHINodeGVN
bool IgnoreGroup
Flag for whether we should not consider this group of OutlinableRegions for extraction.
DenseMap< Value *, BasicBlock * > EndBBs
The return blocks for the overall function.
unsigned BranchesToOutside
The number of branches in the region target a basic block that is outside of the region.
Function * OutlinedFunction
The Function for the collective overall function.
bool InputTypesSet
Flag for whether the ArgumentTypes have been defined after the extraction of the first region.
DenseMap< Value *, BasicBlock * > PHIBlocks
The PHIBlocks with their corresponding return block based on the return value as the key.
FunctionType * OutlinedFunctionType
The FunctionType for the overall function.
DenseMap< unsigned, unsigned > CanonicalNumberToAggArg
The mapping of the canonical numbering of the values in outlined sections to specific arguments.
void collectGVNStoreSets(Module &M)
For the regions, look at each set of GVN stores needed and account for each combination.
InstructionCost Benefit
The number of instructions that will be outlined by extracting Regions.
unsigned NumAggregateInputs
The number of input values in ArgumentTypes.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This provides the utilities for hashing an Instruction to an unsigned integer.
Instruction * Inst
The source Instruction that is being wrapped.
The OutlinableRegion holds all the information for a specific region, or sequence of instructions.
Definition IROutliner.h:63
CallInst * Call
The call site of the extracted region.
Definition IROutliner.h:123
CodeExtractor * CE
Used to create an outlined function.
Definition IROutliner.h:120
InstructionCost getBenefit(TargetTransformInfo &TTI)
Get the size of the code removed from the region.
BasicBlock * FollowBB
The BasicBlock that is after the start of the region BasicBlock, only defined when the region has bee...
Definition IROutliner.h:147
unsigned OutputBlockNum
The corresponding BasicBlock with the appropriate stores for this OutlinableRegion in the overall fun...
Definition IROutliner.h:78
bool CandidateSplit
Flag for whether we have split out the IRSimilarityCanidate.
Definition IROutliner.h:130
bool IgnoreRegion
Flag for whether we should not consider this region for extraction.
Definition IROutliner.h:133
void splitCandidate()
For the contained region, split the parent BasicBlock at the starting and ending instructions of the ...
Value * findCorrespondingValueIn(const OutlinableRegion &Other, Value *V)
Find a corresponding value for V in similar OutlinableRegion Other.
BasicBlock * findCorrespondingBlockIn(const OutlinableRegion &Other, BasicBlock *BB)
Find a corresponding BasicBlock for BB in similar OutlinableRegion Other.
BasicBlock * PrevBB
The BasicBlock that is before the start of the region BasicBlock, only defined when the region has be...
Definition IROutliner.h:137
BasicBlock * EndBB
The BasicBlock that contains the ending instruction of the region.
Definition IROutliner.h:143
IRSimilarityCandidate * Candidate
Describes the region of code.
Definition IROutliner.h:65
DenseMap< Value *, Value * > RemappedArguments
Values in the outlined functions will often be replaced by arguments.
Definition IROutliner.h:93
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
Definition IROutliner.h:153
Function * ExtractedFunction
The function for the extracted region.
Definition IROutliner.h:126
bool EndsInBranch
Marks whether this region ends in a branch, there is special handling required for the following basi...
Definition IROutliner.h:102
BasicBlock * StartBB
The BasicBlock that contains the starting instruction of the region.
Definition IROutliner.h:140
void reattachCandidate()
For the contained region, reattach the BasicBlock at the starting and ending instructions of the cont...