61#define DEBUG_TYPE "loop-unroll-and-jam"
63STATISTIC(NumUnrolledAndJammed,
"Number of loops unroll and jammed");
64STATISTIC(NumCompletelyUnrolledAndJammed,
"Number of loops unroll and jammed");
72 Loop *SubLoop = L.getSubLoops()[0];
88 if (BB == SubLoopPreHeader)
92 if (!ForeBlocks.
count(Succ))
143 if (VisitedInstr.
count(
I))
148 if (AftBlocks.
count(
I->getParent()))
149 for (
auto &U :
I->operands())
151 if (!ProcessInstr(II))
157 for (
auto &Phi : Header->phis()) {
158 Value *V = Phi.getIncomingValueForBlock(Latch);
160 if (!ProcessInstr(
I))
176 if (AftBlocks.
count(
I->getParent()))
177 I->moveBefore(InsertLoc);
218 unsigned TripMultiple,
bool UnrollRemainder,
225 assert(Header &&
"No header.");
226 assert(L->getSubLoops().size() == 1);
227 Loop *SubLoop = *L->begin();
230 if (TripCount == 0 && Count < 2) {
231 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; almost nothing to do\n");
232 return LoopUnrollResult::Unmodified;
237 assert(TripCount == 0 || TripCount % TripMultiple == 0);
240 bool CompletelyUnroll = (Count == TripCount);
243 if (TripMultiple % Count != 0) {
246 UnrollRemainder,
false,
247 LI, SE, DT, AC,
TTI,
true, EpilogueLoop)) {
248 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; remainder loop could not be "
249 "generated when assuming runtime trip count\n");
250 return LoopUnrollResult::Unmodified;
263 if (CompletelyUnroll) {
265 << Header->getName() <<
" with trip count " << TripCount
269 <<
"completely unroll and jammed loop with "
270 << NV(
"UnrollCount", TripCount) <<
" iterations");
272 auto DiagBuilder = [&]() {
275 return Diag <<
"unroll and jammed loop by a factor of "
276 << NV(
"UnrollCount", Count);
279 LLVM_DEBUG(
dbgs() <<
"UNROLL AND JAMMING loop %" << Header->getName()
281 if (TripMultiple != 1) {
282 LLVM_DEBUG(
dbgs() <<
" with " << TripMultiple <<
" trips per branch");
284 return DiagBuilder() <<
" with " << NV(
"TripMultiple", TripMultiple)
285 <<
" trips per branch";
289 ORE->
emit([&]() {
return DiagBuilder() <<
" with run-time trip count"; });
294 BasicBlock *Preheader = L->getLoopPreheader();
296 assert(Preheader &&
"No preheader");
297 assert(LatchBlock &&
"No latch block");
302 bool SubLoopContinueOnTrue = SubLoop->
contains(
316 std::vector<BasicBlock *> ForeBlocksFirst;
317 std::vector<BasicBlock *> ForeBlocksLast;
318 std::vector<BasicBlock *> SubLoopBlocksFirst;
319 std::vector<BasicBlock *> SubLoopBlocksLast;
320 std::vector<BasicBlock *> AftBlocksFirst;
321 std::vector<BasicBlock *> AftBlocksLast;
322 ForeBlocksFirst.push_back(Header);
324 SubLoopBlocksFirst.push_back(SubLoop->
getHeader());
327 AftBlocksLast.push_back(L->getExitingBlock());
333 Header, LatchBlock, ForeBlocksLast[0]->getTerminator(), AftBlocks);
346 if (Header->getParent()->shouldEmitDebugInfoForProfiling() &&
350 if (!
I.isDebugOrPseudoInst())
352 auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
354 I.setDebugLoc(*NewDIL);
357 <<
"Failed to create new discriminator: "
358 << DIL->getFilename() <<
" Line: " << DIL->getLine());
362 for (
unsigned It = 1; It != Count; ++It) {
368 NewLoops[SubLoop] = SubLoop;
373 Header->getParent()->insert(Header->getParent()->end(), New);
378 if (ForeBlocks.
count(*BB)) {
379 if (*BB == ForeBlocksFirst[0])
380 ForeBlocksFirst.push_back(New);
381 if (*BB == ForeBlocksLast[0])
382 ForeBlocksLast.push_back(New);
383 }
else if (SubLoopBlocks.
count(*BB)) {
384 if (*BB == SubLoopBlocksFirst[0])
385 SubLoopBlocksFirst.push_back(New);
386 if (*BB == SubLoopBlocksLast[0])
387 SubLoopBlocksLast.push_back(New);
388 }
else if (AftBlocks.
count(*BB)) {
389 if (*BB == AftBlocksFirst[0])
390 AftBlocksFirst.push_back(New);
391 if (*BB == AftBlocksLast[0])
392 AftBlocksLast.push_back(New);
398 PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]);
399 LastValueMap[*BB] = New;
402 PrevItValueMap[VI->second] =
403 const_cast<Value *
>(It == 1 ? VI->first : LastValueMap[VI->first]);
404 LastValueMap[VI->first] = VI->second;
410 if (*BB == ForeBlocksFirst[0])
412 else if (*BB == SubLoopBlocksFirst[0])
414 else if (*BB == AftBlocksFirst[0])
419 auto BBDomNode = DT->
getNode(*BB);
420 auto BBIDom = BBDomNode->
getIDom();
421 BasicBlock *OriginalBBIDom = BBIDom->getBlock();
423 assert(LastValueMap[cast<Value>(OriginalBBIDom)]);
425 New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)]));
433 if (
auto *II = dyn_cast<AssumeInst>(&
I))
440 for (
PHINode &Phi : ForeBlocksFirst[It]->phis()) {
441 Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]);
442 assert(OldValue &&
"should have incoming edge from Aft[It]");
443 Value *NewValue = OldValue;
444 if (
Value *PrevValue = PrevItValueMap[OldValue])
445 NewValue = PrevValue;
447 assert(Phi.getNumOperands() == 2);
448 Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]);
449 Phi.setIncomingValue(0, NewValue);
450 Phi.removeIncomingValue(1);
463 for (
PHINode &Phi : BB->phis()) {
464 for (
unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) {
465 if (Phi.getIncomingBlock(b) == OldBB) {
466 Value *OldValue = Phi.getIncomingValue(b);
467 if (
Value *LastValue = LastValueMap[OldValue])
468 Phi.setIncomingValue(b, LastValue);
469 Phi.setIncomingBlock(b, NewBB);
478 while (
PHINode *Phi = dyn_cast<PHINode>(Src->begin()))
479 Phi->moveBefore(insertPoint);
483 updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.
back(),
488 cast<BranchInst>(ForeBlocksLast.back()->getTerminator());
492 if (CompletelyUnroll) {
493 while (
PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) {
494 Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader));
495 Phi->eraseFromParent();
499 updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0],
500 AftBlocksLast.back(), LastValueMap);
503 for (
unsigned It = 1; It != Count; It++) {
506 cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator());
513 cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator());
514 SubTerm->
setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]);
515 SubTerm->
setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]);
516 SubLoopBlocksFirst[0]->replacePhiUsesWith(ForeBlocksLast[0],
517 ForeBlocksLast.back());
518 SubLoopBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
519 SubLoopBlocksLast.back());
521 for (
unsigned It = 1; It != Count; It++) {
525 cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator());
529 SubLoopBlocksFirst[It]->replacePhiUsesWith(ForeBlocksLast[It],
530 ForeBlocksLast.back());
531 SubLoopBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
532 SubLoopBlocksLast.back());
533 movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]);
537 BranchInst *AftTerm = cast<BranchInst>(AftBlocksLast.back()->getTerminator());
538 if (CompletelyUnroll) {
542 AftTerm->
setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]);
544 "Expecting the ContinueOnTrue successor of AftTerm to be LoopExit");
546 AftBlocksFirst[0]->replacePhiUsesWith(SubLoopBlocksLast[0],
547 SubLoopBlocksLast.back());
549 for (
unsigned It = 1; It != Count; It++) {
553 cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator());
557 AftBlocksFirst[It]->replacePhiUsesWith(SubLoopBlocksLast[It],
558 SubLoopBlocksLast.back());
559 movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]);
567 DTUpdates.
emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0],
568 SubLoopBlocksFirst[0]);
569 DTUpdates.
emplace_back(DominatorTree::UpdateKind::Delete,
570 SubLoopBlocksLast[0], AftBlocksFirst[0]);
572 DTUpdates.
emplace_back(DominatorTree::UpdateKind::Insert,
573 ForeBlocksLast.back(), SubLoopBlocksFirst[0]);
574 DTUpdates.
emplace_back(DominatorTree::UpdateKind::Insert,
575 SubLoopBlocksLast.back(), AftBlocksFirst[0]);
581 MergeBlocks.
insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
582 MergeBlocks.
insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
583 MergeBlocks.
insert(AftBlocksLast.begin(), AftBlocksLast.end());
597 NumCompletelyUnrolledAndJammed += CompletelyUnroll;
598 ++NumUnrolledAndJammed;
601 if (CompletelyUnroll)
614 if (!CompletelyUnroll)
615 assert(L->isLoopSimplifyForm());
620 return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled
621 : LoopUnrollResult::PartiallyUnrolled;
630 if (
auto *Ld = dyn_cast<LoadInst>(&
I)) {
634 }
else if (
auto *St = dyn_cast<StoreInst>(&
I)) {
638 }
else if (
I.mayReadOrWriteMemory()) {
647 unsigned UnrollLevel,
unsigned JamLevel,
651 for (
unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
653 auto JammedDir =
D->getDirection(CurLoopDepth);
665 unsigned UnrollLevel,
unsigned JamLevel,
668 for (
unsigned CurLoopDepth = UnrollLevel + 1; CurLoopDepth <= JamLevel;
670 auto JammedDir =
D->getDirection(CurLoopDepth);
679 return Sequentialized;
691 unsigned UnrollLevel,
unsigned JamLevel,
693 assert(UnrollLevel <= JamLevel &&
694 "Expecting JamLevel to be at least UnrollLevel");
699 if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
712 std::unique_ptr<Dependence>
D = DI.
depends(Src, Dst,
true);
715 assert(
D->isOrdered() &&
"Expected an output, flow or anti dep.");
717 if (
D->isConfused()) {
719 <<
" " << *Src <<
"\n"
720 <<
" " << *Dst <<
"\n");
728 for (
unsigned CurLoopDepth = 1; CurLoopDepth < UnrollLevel; ++CurLoopDepth)
732 auto UnrollDirection =
D->getDirection(UnrollLevel);
742 Sequentialized,
D.get()))
747 Sequentialized,
D.get()))
771 CurrentLoadsAndStores.
clear();
778 for (
auto *Earlier : EarlierLoadsAndStores) {
781 unsigned CommonLoopDepth = std::min(EarlierDepth, CurLoopDepth);
782 for (
auto *Later : CurrentLoadsAndStores) {
783 if (!
checkDependency(Earlier, Later, LoopDepth, CommonLoopDepth,
false,
789 size_t NumInsts = CurrentLoadsAndStores.
size();
790 for (
size_t I = 0;
I < NumInsts; ++
I) {
791 for (
size_t J =
I; J < NumInsts; ++J) {
793 LoopDepth, CurLoopDepth,
true, DI))
798 EarlierLoadsAndStores.append(CurrentLoadsAndStores.
begin(),
799 CurrentLoadsAndStores.
end());
809 const Loop *L = &Root;
812 if (!L->isLoopSimplifyForm())
815 if (!L->isRotatedForm())
818 if (L->getHeader()->hasAddressTaken()) {
823 unsigned SubLoopsSize = L->getSubLoops().size();
824 if (SubLoopsSize == 0)
828 if (SubLoopsSize != 1)
835 if (!L->getExitBlock()) {
836 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; only loops with single exit "
837 "blocks can be unrolled and jammed.\n");
842 if (!L->getExitingBlock()) {
843 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; only loops with single "
844 "exiting blocks can be unrolled and jammed.\n");
848 L = L->getSubLoops()[0];
855 while (!L->getSubLoops().empty())
856 L = L->getSubLoops()[0];
863 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Ineligible loop form\n");
925 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Incompatible loop layout\n");
932 if (AftBlocksMap[L].
size() != 1) {
933 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Can't currently handle "
934 "multiple blocks after the loop\n");
940 if (
any_of(L->getLoopsInPreorder(), [&SE](
Loop *SubLoop) {
941 return !hasIterationCountInvariantInParent(SubLoop, SE);
943 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Inner loop iteration count is "
944 "not consistent on each iteration\n");
952 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; Something may throw\n");
966 Loop *SubLoop = L->getSubLoops()[0];
968 Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](
Instruction *
I) {
971 if (AftBlocks.
count(
I->getParent())) {
978 if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory())
985 "instructions after subloop to before it\n");
994 LLVM_DEBUG(
dbgs() <<
"Won't unroll-and-jam; failed dependency check\n");
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
This file defines a set of templates that efficiently compute a dominator tree over a generic graph.
SmallPtrSet< BasicBlock *, 4 > BasicBlockSet
static bool partitionLoopBlocks(Loop &L, BasicBlockSet &ForeBlocks, BasicBlockSet &AftBlocks, DominatorTree &DT)
static Loop * getInnerMostLoop(Loop *L)
static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, BasicBlock *Latch, Instruction *InsertLoc, BasicBlockSet &AftBlocks)
static bool getLoadsAndStores(BasicBlockSet &Blocks, SmallVector< Instruction *, 4 > &MemInstr)
static bool preservesForwardDependence(Instruction *Src, Instruction *Dst, unsigned UnrollLevel, unsigned JamLevel, bool Sequentialized, Dependence *D)
static bool partitionOuterLoopBlocks(Loop &Root, Loop &JamLoop, BasicBlockSet &JamLoopBlocks, DenseMap< Loop *, BasicBlockSet > &ForeBlocksMap, DenseMap< Loop *, BasicBlockSet > &AftBlocksMap, DominatorTree &DT)
Partition blocks in a loop nest into blocks before and after each inner loop.
static bool isEligibleLoopForm(const Loop &Root)
static bool preservesBackwardDependence(Instruction *Src, Instruction *Dst, unsigned UnrollLevel, unsigned JamLevel, bool Sequentialized, Dependence *D)
static bool checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks, const DenseMap< Loop *, BasicBlockSet > &ForeBlocksMap, const DenseMap< Loop *, BasicBlockSet > &AftBlocksMap, DependenceInfo &DI, LoopInfo &LI)
static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, BasicBlockSet &AftBlocks, T Visit)
static bool checkDependency(Instruction *Src, Instruction *Dst, unsigned UnrollLevel, unsigned JamLevel, bool Sequentialized, DependenceInfo &DI)
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
const Instruction & back() const
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
unsigned getNumSuccessors() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
DependenceInfo - This class is the main dependence-analysis driver.
std::unique_ptr< Dependence > depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent)
depends - Tests for a dependence between the Src and Dst instructions.
Dependence - This class represents a dependence between two memory memory references in a function.
DomTreeNodeBase * getIDom() const
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdatesPermissive(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
SmallVector< const LoopT *, 4 > getLoopsInPreorder() const
Return all loops in the loop nest rooted by the loop in preorder, with siblings in forward program or...
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
block_iterator block_end() const
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
block_iterator block_begin() const
Store the result of a depth first search within basic blocks contained by a single loop.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
RPOIterator endRPO() const
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void erase(Loop *L)
Update LoopInfo after removing the last backedge from a loop.
Represents a single loop in the control flow graph.
bool isLoopSimplifyForm() const
Return true if the Loop is in the form that the LoopSimplify form transforms loops to,...
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, bool IgnoreTokens=true) const
Return true if this Loop and all inner subloops are in LCSSA form.
The main scalar evolution driver.
void forgetLoop(const Loop *L)
This method should be called by the client when it has changed a loop in a way that may effect Scalar...
void forgetBlockAndLoopDispositions(Value *V=nullptr)
Called when the client has changed the disposition of values in a loop or block.
Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...
void computeLoopSafetyInfo(const Loop *CurLoop) override
Computes safety information for a loop checks loop body & header for the possibility of may throw exc...
bool anyBlockMayThrow() const override
Returns true iff any block of the loop for which this info is contains an instruction that may throw ...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
auto successors(const MachineBasicBlock *BB)
cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
bool MergeBlockSuccessorsIntoGivenBlocks(SmallPtrSetImpl< BasicBlock * > &MergeBlocks, Loop *L=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
Merge block(s) sucessors, if possible.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI)
Perform some cleanup and simplifications on loops after unrolling.
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.