docs/doxygen/LoopUnrollRuntime_8cpp_source.html

//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements some loop unrolling utilities for loops with run-time

// trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time

// trip counts.

//

// The functions in this file are used to generate extra code when the

// run-time trip count modulo the unroll factor is not 0.  When this is the

// case, we need to generate code to execute these 'left over' iterations.

//

// The current strategy generates an if-then-else sequence prior to the

// unrolled loop to execute the 'left over' iterations before or after the

// unrolled loop.

//

//===----------------------------------------------------------------------===//


#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/Analysis/LoopIterator.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Cloning.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include "llvm/Transforms/Utils/UnrollLoop.h"

#include <cmath>


using namespace llvm;


#define DEBUG_TYPE "loop-unroll"


STATISTIC(NumRuntimeUnrolled,

          "Number of loops unrolled with run-time trip counts");

static cl::opt<bool> UnrollRuntimeMultiExit(

    "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,

    cl::desc("Allow runtime unrolling for loops with multiple exits, when "

             "epilog is generated"));

static cl::opt<bool> UnrollRuntimeOtherExitPredictable(

    "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,

    cl::desc("Assume the non latch exit block to be predictable"));


// Probability that the loop trip count is so small that after the prolog

// we do not enter the unrolled loop at all.

// It is unlikely that the loop trip count is smaller than the unroll factor;

// other than that, the choice of constant is not tuned yet.

static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};

// Probability that the loop trip count is so small that we skip the unrolled

// loop completely and immediately enter the epilogue loop.

// It is unlikely that the loop trip count is smaller than the unroll factor;

// other than that, the choice of constant is not tuned yet.

static const uint32_t EpilogHeaderWeights[] = {1, 127};


/// Connect the unrolling prolog code to the original loop.

/// The unrolling prolog code contains code to execute the

/// 'extra' iterations if the run-time trip count modulo the

/// unroll count is non-zero.

///

/// This function performs the following:

/// - Create PHI nodes at prolog end block to combine values

///   that exit the prolog code and jump around the prolog.

/// - Add a PHI operand to a PHI node at the loop exit block

///   for values that exit the prolog and go around the loop.

/// - Branch around the original loop if the trip count is less

///   than the unroll factor.

///


static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,

                          BasicBlock *PrologExit,

                          BasicBlock *OriginalLoopLatchExit,

                          BasicBlock *PreHeader, BasicBlock *NewPreHeader,

                          ValueToValueMapTy &VMap, DominatorTree *DT,

                          LoopInfo *LI, bool PreserveLCSSA,

                          ScalarEvolution &SE) {

  // Loop structure should be the following:

  // Preheader

  //  PrologHeader

  //  ...

  //  PrologLatch

  //  PrologExit

  //   NewPreheader

  //    Header

  //    ...

  //    Latch

  //      LatchExit

  BasicBlock *Latch = L->getLoopLatch();

  assert(Latch && "Loop must have a latch");

  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);


  // Create a PHI node for each outgoing value from the original loop

  // (which means it is an outgoing value from the prolog code too).

  // The new PHI node is inserted in the prolog end basic block.

  // The new PHI node value is added as an operand of a PHI node in either

  // the loop header or the loop exit block.

  for (BasicBlock *Succ : successors(Latch)) {

    for (PHINode &PN : Succ->phis()) {

      // Add a new PHI node to the prolog end block and add the

      // appropriate incoming values.

      // TODO: This code assumes that the PrologExit (or the LatchExit block for

      // prolog loop) contains only one predecessor from the loop, i.e. the

      // PrologLatch. When supporting multiple-exiting block loops, we can have

      // two or more blocks that have the LatchExit as the target in the

      // original loop.

      PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");

      NewPN->insertBefore(PrologExit->getFirstNonPHIIt());

      // Adding a value to the new PHI node from the original loop preheader.

      // This is the value that skips all the prolog code.

      if (L->contains(&PN)) {

        // Succ is loop header.

        NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),

                           PreHeader);

      } else {

        // Succ is LatchExit.

        NewPN->addIncoming(PoisonValue::get(PN.getType()), PreHeader);

      }


      Value *V = PN.getIncomingValueForBlock(Latch);

      if (Instruction *I = dyn_cast<Instruction>(V)) {

        if (L->contains(I)) {

          V = VMap.lookup(I);

        }

      }

      // Adding a value to the new PHI node from the last prolog block

      // that was created.

      NewPN->addIncoming(V, PrologLatch);


      // Update the existing PHI node operand with the value from the

      // new PHI node.  How this is done depends on if the existing

      // PHI node is in the original loop block, or the exit block.

      if (L->contains(&PN))

        PN.setIncomingValueForBlock(NewPreHeader, NewPN);

      else

        PN.addIncoming(NewPN, PrologExit);

      SE.forgetLcssaPhiWithNewPredecessor(L, &PN);

    }

  }


  // Make sure that created prolog loop is in simplified form

  SmallVector<BasicBlock *, 4> PrologExitPreds;

  Loop *PrologLoop = LI->getLoopFor(PrologLatch);

  if (PrologLoop) {

    for (BasicBlock *PredBB : predecessors(PrologExit))

      if (PrologLoop->contains(PredBB))

        PrologExitPreds.push_back(PredBB);


    SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,

                           nullptr, PreserveLCSSA);

  }


  // Create a branch around the original loop, which is taken if there are no

  // iterations remaining to be executed after running the prologue.

  Instruction *InsertPt = PrologExit->getTerminator();

  IRBuilder<> B(InsertPt);


  assert(Count != 0 && "nonsensical Count!");


  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)

  // This means %xtraiter is (BECount + 1) and all of the iterations of this

  // loop were executed by the prologue.  Note that if BECount <u (Count - 1)

  // then (BECount + 1) cannot unsigned-overflow.

  Value *BrLoopExit =

      B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));

  // Split the exit to maintain loop canonicalization guarantees

  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));

  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,

                         nullptr, PreserveLCSSA);

  // Add the branch to the exit block (around the unrolled loop)

  MDNode *BranchWeights = nullptr;

  if (hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume loop is nearly always entered.

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);

  }

  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,

                 BranchWeights);

  InsertPt->eraseFromParent();

  if (DT) {

    auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,

                                                  PrologExit);

    DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);

  }

}


/// Assume, due to our position in the remainder loop or its guard, anywhere

/// from 0 to \p N more iterations can possibly execute.  Among such cases in

/// the original loop (with loop probability \p OriginalLoopProb), what is the

/// probability of executing at least one more iteration?

static BranchProbability


probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N) {

  // OriginalLoopProb == 1 would produce a division by zero in the calculation

  // below.  The problem is that case indicates an always infinite loop, but a

  // remainder loop cannot be calculated at run time if the original loop is

  // infinite as infinity % UnrollCount is undefined.  We then choose

  // probabilities indicating that all remainder loop iterations will always

  // execute.

  //

  // Currently, the remainder loop here is an epilogue, which cannot be reached

  // if the original loop is infinite, so the aforementioned choice is

  // arbitrary.

  //

  // FIXME: Branch weights still need to be fixed in the case of prologues

  // (issue #135812).  In that case, the aforementioned choice seems reasonable

  // for the goal of maintaining the original loop's block frequencies.  That

  // is, an infinite loop's initial iterations are not skipped, and the prologue

  // loop body might have unique blocks that execute a finite number of times

  // if, for example, the original loop body contains conditionals like i <

  // UnrollCount.

  if (OriginalLoopProb == BranchProbability::getOne())

    return BranchProbability::getOne();


  // Each of these variables holds the original loop's probability that the

  // number of iterations it will execute is some m in the specified range.

  BranchProbability ProbOne = OriginalLoopProb;                // 1 <= m

  BranchProbability ProbTooMany = ProbOne.pow(N + 1);          // N + 1 <= m

  BranchProbability ProbNotTooMany = ProbTooMany.getCompl();   // 0 <= m <= N

  BranchProbability ProbOneNotTooMany = ProbOne - ProbTooMany; // 1 <= m <= N

  return ProbOneNotTooMany / ProbNotTooMany;

}


/// Connect the unrolling epilog code to the original loop.

/// The unrolling epilog code contains code to execute the

/// 'extra' iterations if the run-time trip count modulo the

/// unroll count is non-zero.

///

/// This function performs the following:

/// - Update PHI nodes at the epilog loop exit

/// - Create PHI nodes at the unrolling loop exit and epilog preheader to

///   combine values that exit the unrolling loop code and jump around it.

/// - Update PHI operands in the epilog loop by the new PHI nodes

/// - At the unrolling loop exit, branch around the epilog loop if extra iters

//    (ModVal) is zero.

/// - At the epilog preheader, add an llvm.assume call that extra iters is

///   non-zero.  If the unrolling loop exit is the predecessor, the above new

///   branch guarantees that assumption.  If the unrolling loop preheader is the

///   predecessor, then the required first iteration from the original loop has

///   yet to be executed, so it must be executed in the epilog loop.  If we

///   later unroll the epilog loop, that llvm.assume call somehow enables

///   ScalarEvolution to compute a epilog loop maximum trip count, which enables

///   eliminating the branch at the end of the final unrolled epilog iteration.

///


static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,

                          BasicBlock *Exit, BasicBlock *PreHeader,

                          BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,

                          ValueToValueMapTy &VMap, DominatorTree *DT,

                          LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,

                          unsigned Count, AssumptionCache &AC,

                          BranchProbability OriginalLoopProb) {

  BasicBlock *Latch = L->getLoopLatch();

  assert(Latch && "Loop must have a latch");

  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);


  // Loop structure should be the following:

  //

  // PreHeader

  // NewPreHeader

  //   Header

  //   ...

  //   Latch

  // NewExit (PN)

  // EpilogPreHeader

  //   EpilogHeader

  //   ...

  //   EpilogLatch

  // Exit (EpilogPN)


  // Update PHI nodes at Exit.

  for (PHINode &PN : NewExit->phis()) {

    // PN should be used in another PHI located in Exit block as

    // Exit was split by SplitBlockPredecessors into Exit and NewExit

    // Basically it should look like:

    // NewExit:

    //   PN = PHI [I, Latch]

    // ...

    // Exit:

    //   EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]

    //

    // Exits from non-latch blocks point to the original exit block and the

    // epilogue edges have already been added.

    //

    // There is EpilogPreHeader incoming block instead of NewExit as

    // NewExit was split 1 more time to get EpilogPreHeader.

    assert(PN.hasOneUse() && "The phi should have 1 use");

    PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());

    assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");


    Value *V = PN.getIncomingValueForBlock(Latch);

    Instruction *I = dyn_cast<Instruction>(V);

    if (I && L->contains(I))

      // If value comes from an instruction in the loop add VMap value.

      V = VMap.lookup(I);

    // For the instruction out of the loop, constant or undefined value

    // insert value itself.

    EpilogPN->addIncoming(V, EpilogLatch);


    assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&

          "EpilogPN should have EpilogPreHeader incoming block");

    // Change EpilogPreHeader incoming block to NewExit.

    EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),

                               NewExit);

    // Now PHIs should look like:

    // NewExit:

    //   PN = PHI [I, Latch]

    // ...

    // Exit:

    //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]

  }


  // Create PHI nodes at NewExit (from the unrolling loop Latch) and at

  // EpilogPreHeader (from PreHeader and NewExit).  Update corresponding PHI

  // nodes in epilog loop.

  for (BasicBlock *Succ : successors(Latch)) {

    // Skip this as we already updated phis in exit blocks.

    if (!L->contains(Succ))

      continue;


    // Succ here appears to always be just L->getHeader().  Otherwise, how do we

    // know its corresponding epilog block (from VMap) is EpilogHeader and thus

    // EpilogPreHeader is the right incoming block for VPN, as set below?

    // TODO: Can we thus avoid the enclosing loop over successors?

    assert(Succ == L->getHeader() &&

           "Expect the only in-loop successor of latch to be the loop header");


    for (PHINode &PN : Succ->phis()) {

      // Add new PHI nodes to the loop exit block.

      PHINode *NewPN0 = PHINode::Create(PN.getType(), /*NumReservedValues=*/1,

                                        PN.getName() + ".unr");

      NewPN0->insertBefore(NewExit->getFirstNonPHIIt());

      // Add value to the new PHI node from the unrolling loop latch.

      NewPN0->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);


      // Add new PHI nodes to EpilogPreHeader.

      PHINode *NewPN1 = PHINode::Create(PN.getType(), /*NumReservedValues=*/2,

                                        PN.getName() + ".epil.init");

      NewPN1->insertBefore(EpilogPreHeader->getFirstNonPHIIt());

      // Add value to the new PHI node from the unrolling loop preheader.

      NewPN1->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);

      // Add value to the new PHI node from the epilog loop guard.

      NewPN1->addIncoming(NewPN0, NewExit);


      // Update the existing PHI node operand with the value from the new PHI

      // node.  Corresponding instruction in epilog loop should be PHI.

      PHINode *VPN = cast<PHINode>(VMap[&PN]);

      VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN1);

    }

  }


  // In NewExit, branch around the epilog loop if no extra iters.

  Instruction *InsertPt = NewExit->getTerminator();

  IRBuilder<> B(InsertPt);

  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");

  assert(Exit && "Loop must have a single exit block only");

  // Split the epilogue exit to maintain loop canonicalization guarantees

  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));

  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,

                         PreserveLCSSA);

  // Add the branch to the exit block (around the epilog loop)

  MDNode *BranchWeights = nullptr;

  if (OriginalLoopProb.isUnknown() &&

      hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume equal distribution in interval [0, Count).

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(1, Count - 1);

  }

  BranchInst *RemainderLoopGuard =

      B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);

  if (!OriginalLoopProb.isUnknown()) {

    setBranchProbability(RemainderLoopGuard,

                         probOfNextInRemainder(OriginalLoopProb, Count - 1),

                         /*ForFirstTarget=*/true);

  }

  InsertPt->eraseFromParent();

  if (DT) {

    auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);

    DT->changeImmediateDominator(Exit, NewDom);

  }


  // In EpilogPreHeader, assume extra iters is non-zero.

  IRBuilder<> B2(EpilogPreHeader, EpilogPreHeader->getFirstNonPHIIt());

  Value *ModIsNotNull = B2.CreateIsNotNull(ModVal, "lcmp.mod");

  AssumeInst *AI = cast<AssumeInst>(B2.CreateAssumption(ModIsNotNull));

  AC.registerAssumption(AI);

}


/// Create a clone of the blocks in a loop and connect them together. A new

/// loop will be created including all cloned blocks, and the iterator of the

/// new loop switched to count NewIter down to 0.

/// The cloned blocks should be inserted between InsertTop and InsertBot.

/// InsertTop should be new preheader, InsertBot new loop exit.

/// Returns the new cloned loop that is created.


static Loop *CloneLoopBlocks(Loop *L, Value *NewIter,

                             const bool UseEpilogRemainder,

                             const bool UnrollRemainder, BasicBlock *InsertTop,

                             BasicBlock *InsertBot, BasicBlock *Preheader,

                             std::vector<BasicBlock *> &NewBlocks,

                             LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,

                             DominatorTree *DT, LoopInfo *LI, unsigned Count,

                             std::optional<unsigned> OriginalTripCount,

                             BranchProbability OriginalLoopProb) {

  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";

  BasicBlock *Header = L->getHeader();

  BasicBlock *Latch = L->getLoopLatch();

  Function *F = Header->getParent();

  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();

  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();

  Loop *ParentLoop = L->getParentLoop();

  NewLoopsMap NewLoops;

  NewLoops[ParentLoop] = ParentLoop;


  // For each block in the original loop, create a new copy,

  // and update the value map with the newly created values.

  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {

    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);

    NewBlocks.push_back(NewBB);


    addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);


    VMap[*BB] = NewBB;

    if (Header == *BB) {

      // For the first block, add a CFG connection to this newly

      // created block.

      InsertTop->getTerminator()->setSuccessor(0, NewBB);

    }


    if (DT) {

      if (Header == *BB) {

        // The header is dominated by the preheader.

        DT->addNewBlock(NewBB, InsertTop);

      } else {

        // Copy information from original loop to unrolled loop.

        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();

        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));

      }

    }


    if (Latch == *BB) {

      // For the last block, create a loop back to cloned head.

      VMap.erase((*BB)->getTerminator());

      // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.

      // Subtle: NewIter can be 0 if we wrapped when computing the trip count,

      // thus we must compare the post-increment (wrapping) value.

      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);

      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());

      IRBuilder<> Builder(LatchBR);

      PHINode *NewIdx =

          PHINode::Create(NewIter->getType(), 2, suffix + ".iter");

      NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());

      auto *Zero = ConstantInt::get(NewIdx->getType(), 0);

      auto *One = ConstantInt::get(NewIdx->getType(), 1);

      Value *IdxNext =

          Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");

      Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");

      MDNode *BranchWeights = nullptr;

      if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&

          hasBranchWeightMD(*LatchBR)) {

        uint32_t ExitWeight;

        uint32_t BackEdgeWeight;

        if (Count >= 3) {

          // Note: We do not enter this loop for zero-remainders. The check

          // is at the end of the loop. We assume equal distribution between

          // possible remainders in [1, Count).

          ExitWeight = 1;

          BackEdgeWeight = (Count - 2) / 2;

        } else {

          // Unnecessary backedge, should never be taken. The conditional

          // jump should be optimized away later.

          ExitWeight = 1;

          BackEdgeWeight = 0;

        }

        MDBuilder MDB(Builder.getContext());

        BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);

      }

      BranchInst *RemainderLoopLatch =

          Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);

      if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {

        // Compute the total frequency of the original loop body from the

        // remainder iterations.  Once we've reached them, the first of them

        // always executes, so its frequency and probability are 1.

        double FreqRemIters = 1;

        if (Count > 2) {

          BranchProbability ProbReaching = BranchProbability::getOne();

          for (unsigned N = Count - 2; N >= 1; --N) {

            ProbReaching *= probOfNextInRemainder(OriginalLoopProb, N);

            FreqRemIters += double(ProbReaching.getNumerator()) /

                            ProbReaching.getDenominator();

          }

        }

        // Solve for the loop probability that would produce that frequency.

        // Sum(i=0..inf)(Prob^i) = 1/(1-Prob) = FreqRemIters.

        double ProbDouble = 1 - 1 / FreqRemIters;

        BranchProbability Prob = BranchProbability::getBranchProbability(

            std::round(ProbDouble * BranchProbability::getDenominator()),

            BranchProbability::getDenominator());

        setBranchProbability(RemainderLoopLatch, Prob, /*ForFirstTarget=*/true);

      }

      NewIdx->addIncoming(Zero, InsertTop);

      NewIdx->addIncoming(IdxNext, NewBB);

      LatchBR->eraseFromParent();

    }

  }


  // Change the incoming values to the ones defined in the preheader or

  // cloned loop.

  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {

    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);

    unsigned idx = NewPHI->getBasicBlockIndex(Preheader);

    NewPHI->setIncomingBlock(idx, InsertTop);

    BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);

    idx = NewPHI->getBasicBlockIndex(Latch);

    Value *InVal = NewPHI->getIncomingValue(idx);

    NewPHI->setIncomingBlock(idx, NewLatch);

    if (Value *V = VMap.lookup(InVal))

      NewPHI->setIncomingValue(idx, V);

  }


  Loop *NewLoop = NewLoops[L];

  assert(NewLoop && "L should have been cloned");


  if (OriginalTripCount && UseEpilogRemainder)

    setLoopEstimatedTripCount(NewLoop, *OriginalTripCount % Count);


  // Add unroll disable metadata to disable future unrolling for this loop.

  if (!UnrollRemainder)

    NewLoop->setLoopAlreadyUnrolled();

  return NewLoop;

}


/// Returns true if we can profitably unroll the multi-exit loop L. Currently,

/// we return true only if UnrollRuntimeMultiExit is set to true.


static bool canProfitablyRuntimeUnrollMultiExitLoop(

    Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,

    bool UseEpilogRemainder) {


  // The main pain point with multi-exit loop unrolling is that once unrolled,

  // we will not be able to merge all blocks into a straight line code.

  // There are branches within the unrolled loop that go to the OtherExits.

  // The second point is the increase in code size, but this is true

  // irrespective of multiple exits.


  // Note: Both the heuristics below are coarse grained. We are essentially

  // enabling unrolling of loops that have a single side exit other than the

  // normal LatchExit (i.e. exiting into a deoptimize block).

  // The heuristics considered are:

  // 1. low number of branches in the unrolled version.

  // 2. high predictability of these extra branches.

  // We avoid unrolling loops that have more than two exiting blocks. This

  // limits the total number of branches in the unrolled loop to be atmost

  // the unroll factor (since one of the exiting blocks is the latch block).

  SmallVector<BasicBlock*, 4> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);

  if (ExitingBlocks.size() > 2)

    return false;


  // Allow unrolling of loops with no non latch exit blocks.

  if (OtherExits.size() == 0)

    return true;


  // The second heuristic is that L has one exit other than the latchexit and

  // that exit is a deoptimize block. We know that deoptimize blocks are rarely

  // taken, which also implies the branch leading to the deoptimize block is

  // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we

  // assume the other exit branch is predictable even if it has no deoptimize

  // call.

  return (OtherExits.size() == 1 &&

          (UnrollRuntimeOtherExitPredictable ||

           OtherExits[0]->getPostdominatingDeoptimizeCall()));

  // TODO: These can be fine-tuned further to consider code size or deopt states

  // that are captured by the deoptimize exit block.

  // Also, we can extend this to support more cases, if we actually

  // know of kinds of multiexit loops that would benefit from unrolling.

}


/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain

/// accounting for the possibility of unsigned overflow in the 2s complement

/// domain. Preconditions:

/// 1) TripCount = BECount + 1 (allowing overflow)

/// 2) Log2(Count) <= BitWidth(BECount)


static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,

                                  Value *TripCount, unsigned Count) {

  // Note that TripCount is BECount + 1.

  if (isPowerOf2_32(Count))

    // If the expression is zero, then either:

    //  1. There are no iterations to be run in the prolog/epilog loop.

    // OR

    //  2. The addition computing TripCount overflowed.

    //

    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so

    // the number of iterations that remain to be run in the original loop is a

    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a

    // precondition of this method).

    return B.CreateAnd(TripCount, Count - 1, "xtraiter");


  // As (BECount + 1) can potentially unsigned overflow we count

  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.

  Constant *CountC = ConstantInt::get(BECount->getType(), Count);

  Value *ModValTmp = B.CreateURem(BECount, CountC);

  Value *ModValAdd = B.CreateAdd(ModValTmp,

                                 ConstantInt::get(ModValTmp->getType(), 1));

  // At that point (BECount % Count) + 1 could be equal to Count.

  // To handle this case we need to take mod by Count one more time.

  return B.CreateURem(ModValAdd, CountC, "xtraiter");

}


/// Insert code in the prolog/epilog code when unrolling a loop with a

/// run-time trip-count.

///

/// This method assumes that the loop unroll factor is total number

/// of loop bodies in the loop after unrolling. (Some folks refer

/// to the unroll factor as the number of *extra* copies added).

/// We assume also that the loop unroll factor is a power-of-two. So, after

/// unrolling the loop, the number of loop bodies executed is 2,

/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch

/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for

/// the switch instruction is generated.

///

/// ***Prolog case***

///        extraiters = tripcount % loopfactor

///        if (extraiters == 0) jump Loop:

///        else jump Prol:

/// Prol:  LoopBody;

///        extraiters -= 1                 // Omitted if unroll factor is 2.

///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.

///        if (tripcount < loopfactor) jump End:

/// Loop:

/// ...

/// End:

///

/// ***Epilog case***

///        extraiters = tripcount % loopfactor

///        if (tripcount < loopfactor) jump LoopExit:

///        unroll_iters = tripcount - extraiters

/// Loop:  LoopBody; (executes unroll_iter times);

///        unroll_iter -= 1

///        if (unroll_iter != 0) jump Loop:

/// LoopExit:

///        if (extraiters == 0) jump EpilExit:

/// Epil:  LoopBody; (executes extraiters times)

///        extraiters -= 1                 // Omitted if unroll factor is 2.

///        if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.

/// EpilExit:


bool llvm::UnrollRuntimeLoopRemainder(

    Loop *L, unsigned Count, bool AllowExpensiveTripCount,

    bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,

    LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,

    const TargetTransformInfo *TTI, bool PreserveLCSSA,

    unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit,

    Loop **ResultLoop, std::optional<unsigned> OriginalTripCount,

    BranchProbability OriginalLoopProb) {

  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");

  LLVM_DEBUG(L->dump());

  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"

                                : dbgs() << "Using prolog remainder.\n");


  // Make sure the loop is in canonical form.

  if (!L->isLoopSimplifyForm()) {

    LLVM_DEBUG(dbgs() << "Not in simplify form!\n");

    return false;

  }


  // Guaranteed by LoopSimplifyForm.

  BasicBlock *Latch = L->getLoopLatch();

  BasicBlock *Header = L->getHeader();


  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());


  if (!LatchBR || LatchBR->isUnconditional()) {

    // The loop-rotate pass can be helpful to avoid this in many cases.

    LLVM_DEBUG(

        dbgs()

        << "Loop latch not terminated by a conditional branch.\n");

    return false;

  }


  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;

  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);


  if (L->contains(LatchExit)) {

    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the

    // targets of the Latch be an exit block out of the loop.

    LLVM_DEBUG(

        dbgs()

        << "One of the loop latch successors must be the exit block.\n");

    return false;

  }


  // These are exit blocks other than the target of the latch exiting block.

  SmallVector<BasicBlock *, 4> OtherExits;

  L->getUniqueNonLatchExitBlocks(OtherExits);

  // Support only single exit and exiting block unless multi-exit loop

  // unrolling is enabled.

  if (!L->getExitingBlock() || OtherExits.size()) {

    // We rely on LCSSA form being preserved when the exit blocks are transformed.

    // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)

    if (!PreserveLCSSA)

      return false;


    // Priority goes to UnrollRuntimeMultiExit if it's supplied.

    if (UnrollRuntimeMultiExit.getNumOccurrences()) {

      if (!UnrollRuntimeMultiExit)

        return false;

    } else {

      // Otherwise perform multi-exit unrolling, if either the target indicates

      // it is profitable or the general profitability heuristics apply.

      if (!RuntimeUnrollMultiExit &&

          !canProfitablyRuntimeUnrollMultiExitLoop(L, OtherExits, LatchExit,

                                                   UseEpilogRemainder)) {

        LLVM_DEBUG(dbgs() << "Multiple exit/exiting blocks in loop and "

                             "multi-exit unrolling not enabled!\n");

        return false;

      }

    }

  }

  // Use Scalar Evolution to compute the trip count. This allows more loops to

  // be unrolled than relying on induction var simplification.

  if (!SE)

    return false;


  // Only unroll loops with a computable trip count.

  // We calculate the backedge count by using getExitCount on the Latch block,

  // which is proven to be the only exiting block in this loop. This is same as

  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all

  // exiting blocks).

  const SCEV *BECountSC = SE->getExitCount(L, Latch);

  if (isa<SCEVCouldNotCompute>(BECountSC)) {

    LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");

    return false;

  }


  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();


  // Add 1 since the backedge count doesn't include the first loop iteration.

  // (Note that overflow can occur, this is handled explicitly below)

  const SCEV *TripCountSC =

      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));

  if (isa<SCEVCouldNotCompute>(TripCountSC)) {

    LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");

    return false;

  }


  BasicBlock *PreHeader = L->getLoopPreheader();

  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

  const DataLayout &DL = Header->getDataLayout();

  SCEVExpander Expander(*SE, DL, "loop-unroll");

  if (!AllowExpensiveTripCount &&

      Expander.isHighCostExpansion(TripCountSC, L, SCEVExpansionBudget, TTI,

                                   PreHeaderBR)) {

    LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");

    return false;

  }


  // This constraint lets us deal with an overflowing trip count easily; see the

  // comment on ModVal below.

  if (Log2_32(Count) > BEWidth) {

    LLVM_DEBUG(

        dbgs()

        << "Count failed constraint on overflow trip count calculation.\n");

    return false;

  }


  // Loop structure is the following:

  //

  // PreHeader

  //   Header

  //   ...

  //   Latch

  // LatchExit


  BasicBlock *NewPreHeader;

  BasicBlock *NewExit = nullptr;

  BasicBlock *PrologExit = nullptr;

  BasicBlock *EpilogPreHeader = nullptr;

  BasicBlock *PrologPreHeader = nullptr;


  if (UseEpilogRemainder) {

    // If epilog remainder

    // Split PreHeader to insert a branch around loop for unrolling.

    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);

    NewPreHeader->setName(PreHeader->getName() + ".new");

    // Split LatchExit to create phi nodes from branch above.

    NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,

                                     nullptr, PreserveLCSSA);

    // NewExit gets its DebugLoc from LatchExit, which is not part of the

    // original Loop.

    // Fix this by setting Loop's DebugLoc to NewExit.

    auto *NewExitTerminator = NewExit->getTerminator();

    NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());

    // Split NewExit to insert epilog remainder loop.

    EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);

    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");


    // If the latch exits from multiple level of nested loops, then

    // by assumption there must be another loop exit which branches to the

    // outer loop and we must adjust the loop for the newly inserted blocks

    // to account for the fact that our epilogue is still in the same outer

    // loop. Note that this leaves loopinfo temporarily out of sync with the

    // CFG until the actual epilogue loop is inserted.

    if (auto *ParentL = L->getParentLoop())

      if (LI->getLoopFor(LatchExit) != ParentL) {

        LI->removeBlock(NewExit);

        ParentL->addBasicBlockToLoop(NewExit, *LI);

        LI->removeBlock(EpilogPreHeader);

        ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);

      }


  } else {

    // If prolog remainder

    // Split the original preheader twice to insert prolog remainder loop

    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);

    PrologPreHeader->setName(Header->getName() + ".prol.preheader");

    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),

                            DT, LI);

    PrologExit->setName(Header->getName() + ".prol.loopexit");

    // Split PrologExit to get NewPreHeader.

    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);

    NewPreHeader->setName(PreHeader->getName() + ".new");

  }

  // Loop structure should be the following:

  //  Epilog             Prolog

  //

  // PreHeader         PreHeader

  // *NewPreHeader     *PrologPreHeader

  //   Header          *PrologExit

  //   ...             *NewPreHeader

  //   Latch             Header

  // *NewExit            ...

  // *EpilogPreHeader    Latch

  // LatchExit              LatchExit


  // Calculate conditions for branch around loop for unrolling

  // in epilog case and around prolog remainder loop in prolog case.

  // Compute the number of extra iterations required, which is:

  //  extra iterations = run-time trip count % loop unroll factor

  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

  IRBuilder<> B(PreHeaderBR);

  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),

                                            PreHeaderBR);

  Value *BECount;

  // If there are other exits before the latch, that may cause the latch exit

  // branch to never be executed, and the latch exit count may be poison.

  // In this case, freeze the TripCount and base BECount on the frozen

  // TripCount. We will introduce two branches using these values, and it's

  // important that they see a consistent value (which would not be guaranteed

  // if were frozen independently.)

  if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&

      !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {

    TripCount = B.CreateFreeze(TripCount);

    BECount =

        B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType()));

  } else {

    // If we don't need to freeze, use SCEVExpander for BECount as well, to

    // allow slightly better value reuse.

    BECount =

        Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);

  }


  Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);


  Value *BranchVal =

      UseEpilogRemainder ? B.CreateICmpULT(BECount,

                                           ConstantInt::get(BECount->getType(),

                                                            Count - 1)) :

                           B.CreateIsNotNull(ModVal, "lcmp.mod");

  BasicBlock *RemainderLoop =

      UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;

  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;

  // Branch to either remainder (extra iterations) loop or unrolling loop.

  MDNode *BranchWeights = nullptr;

  if ((OriginalLoopProb.isUnknown() || !UseEpilogRemainder) &&

      hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume loop is nearly always entered.

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);

  }

  BranchInst *UnrollingLoopGuard =

      B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);

  if (!OriginalLoopProb.isUnknown() && UseEpilogRemainder) {

    // The original loop's first iteration always happens.  Compute the

    // probability of the original loop executing Count-1 iterations after that

    // to complete the first iteration of the unrolled loop.

    BranchProbability ProbOne = OriginalLoopProb;

    BranchProbability ProbRest = ProbOne.pow(Count - 1);

    setBranchProbability(UnrollingLoopGuard, ProbRest,

                         /*ForFirstTarget=*/false);

  }

  PreHeaderBR->eraseFromParent();

  if (DT) {

    if (UseEpilogRemainder)

      DT->changeImmediateDominator(EpilogPreHeader, PreHeader);

    else

      DT->changeImmediateDominator(PrologExit, PreHeader);

  }

  Function *F = Header->getParent();

  // Get an ordered list of blocks in the loop to help with the ordering of the

  // cloned blocks in the prolog/epilog code

  LoopBlocksDFS LoopBlocks(L);

  LoopBlocks.perform(LI);


  //

  // For each extra loop iteration, create a copy of the loop's basic blocks

  // and generate a condition that branches to the copy depending on the

  // number of 'left over' iterations.

  //

  std::vector<BasicBlock *> NewBlocks;

  ValueToValueMapTy VMap;


  // Clone all the basic blocks in the loop. If Count is 2, we don't clone

  // the loop, otherwise we create a cloned loop to execute the extra

  // iterations. This function adds the appropriate CFG connections.

  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;

  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;

  Loop *remainderLoop =

      CloneLoopBlocks(L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop,

                      InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT,

                      LI, Count, OriginalTripCount, OriginalLoopProb);


  // Insert the cloned blocks into the function.

  F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());


  // Now the loop blocks are cloned and the other exiting blocks from the

  // remainder are connected to the original Loop's exit blocks. The remaining

  // work is to update the phi nodes in the original loop, and take in the

  // values from the cloned region.

  for (auto *BB : OtherExits) {

    // Given we preserve LCSSA form, we know that the values used outside the

    // loop will be used through these phi nodes at the exit blocks that are

    // transformed below.

    for (PHINode &PN : BB->phis()) {

     unsigned oldNumOperands = PN.getNumIncomingValues();

     // Add the incoming values from the remainder code to the end of the phi

     // node.

     for (unsigned i = 0; i < oldNumOperands; i++){

       auto *PredBB =PN.getIncomingBlock(i);

       if (PredBB == Latch)

         // The latch exit is handled separately, see connectX

         continue;

       if (!L->contains(PredBB))

         // Even if we had dedicated exits, the code above inserted an

         // extra branch which can reach the latch exit.

         continue;


       auto *V = PN.getIncomingValue(i);

       if (Instruction *I = dyn_cast<Instruction>(V))

         if (L->contains(I))

           V = VMap.lookup(I);

       PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));

     }

   }

#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)

    for (BasicBlock *SuccBB : successors(BB)) {

      assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&

             "Breaks the definition of dedicated exits!");

    }

#endif

  }


  // Update the immediate dominator of the exit blocks and blocks that are

  // reachable from the exit blocks. This is needed because we now have paths

  // from both the original loop and the remainder code reaching the exit

  // blocks. While the IDom of these exit blocks were from the original loop,

  // now the IDom is the preheader (which decides whether the original loop or

  // remainder code should run) unless the block still has just the original

  // predecessor (such as NewExit in the case of an epilog remainder).

  if (DT && !L->getExitingBlock()) {

    SmallVector<BasicBlock *, 16> ChildrenToUpdate;

    // NB! We have to examine the dom children of all loop blocks, not just

    // those which are the IDom of the exit blocks. This is because blocks

    // reachable from the exit blocks can have their IDom as the nearest common

    // dominator of the exit blocks.

    for (auto *BB : L->blocks()) {

      auto *DomNodeBB = DT->getNode(BB);

      for (auto *DomChild : DomNodeBB->children()) {

        auto *DomChildBB = DomChild->getBlock();

        if (!L->contains(LI->getLoopFor(DomChildBB)) &&

            DomChildBB->getUniquePredecessor() != BB)

          ChildrenToUpdate.push_back(DomChildBB);

      }

    }

    for (auto *BB : ChildrenToUpdate)

      DT->changeImmediateDominator(BB, PreHeader);

  }


  // Loop structure should be the following:

  //  Epilog             Prolog

  //

  // PreHeader         PreHeader

  // NewPreHeader      PrologPreHeader

  //   Header            PrologHeader

  //   ...               ...

  //   Latch             PrologLatch

  // NewExit           PrologExit

  // EpilogPreHeader   NewPreHeader

  //   EpilogHeader      Header

  //   ...               ...

  //   EpilogLatch       Latch

  // LatchExit              LatchExit


  // Rewrite the cloned instruction operands to use the values created when the

  // clone is created.

  for (BasicBlock *BB : NewBlocks) {

    Module *M = BB->getModule();

    for (Instruction &I : *BB) {

      RemapInstruction(&I, VMap,

                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);

      RemapDbgRecordRange(M, I.getDbgRecordRange(), VMap,

                          RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);

    }

  }


  if (UseEpilogRemainder) {

    // Connect the epilog code to the original loop and update the

    // PHI functions.

    ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,

                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count, *AC,

                  OriginalLoopProb);


    // Update counter in loop for unrolling.

    // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.

    // Subtle: TestVal can be 0 if we wrapped when computing the trip count,

    // thus we must compare the post-increment (wrapping) value.

    IRBuilder<> B2(NewPreHeader->getTerminator());

    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");

    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());

    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");

    NewIdx->insertBefore(Header->getFirstNonPHIIt());

    B2.SetInsertPoint(LatchBR);

    auto *Zero = ConstantInt::get(NewIdx->getType(), 0);

    auto *One = ConstantInt::get(NewIdx->getType(), 1);

    Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");

    auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;

    Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");

    NewIdx->addIncoming(Zero, NewPreHeader);

    NewIdx->addIncoming(IdxNext, Latch);

    LatchBR->setCondition(IdxCmp);

  } else {

    // Connect the prolog code to the original loop and update the

    // PHI functions.

    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,

                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);

  }


  // If this loop is nested, then the loop unroller changes the code in the any

  // of its parent loops, so the Scalar Evolution pass needs to be run again.

  SE->forgetTopmostLoop(L);


  // Verify that the Dom Tree and Loop Info are correct.

#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)

  if (DT) {

    assert(DT->verify(DominatorTree::VerificationLevel::Full));

    LI->verify(*DT);

  }

#endif


  // For unroll factor 2 remainder loop will have 1 iteration.

  if (Count == 2 && DT && LI && SE) {

    // TODO: This code could probably be pulled out into a helper function

    // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.

    BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();

    assert(RemainderLatch);

    SmallVector<BasicBlock *> RemainderBlocks(remainderLoop->getBlocks());

    breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);

    remainderLoop = nullptr;


    // Simplify loop values after breaking the backedge

    const DataLayout &DL = L->getHeader()->getDataLayout();

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    for (BasicBlock *BB : RemainderBlocks) {

      for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {

        if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))

          if (LI->replacementPreservesLCSSAForm(&Inst, V))

            Inst.replaceAllUsesWith(V);

        if (isInstructionTriviallyDead(&Inst))

          DeadInsts.emplace_back(&Inst);

      }

      // We can't do recursive deletion until we're done iterating, as we might

      // have a phi which (potentially indirectly) uses instructions later in

      // the block we're iterating through.

      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);

    }


    // Merge latch into exit block.

    auto *ExitBB = RemainderLatch->getSingleSuccessor();

    assert(ExitBB && "required after breaking cond br backedge");

    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

    MergeBlockIntoPredecessor(ExitBB, &DTU, LI);

  }


  // Canonicalize to LoopSimplifyForm both original and remainder loops. We

  // cannot rely on the LoopUnrollPass to do this because it only does

  // canonicalization for parent/subloops and not the sibling loops.

  if (OtherExits.size() > 0) {

    // Generate dedicated exit blocks for the original loop, to preserve

    // LoopSimplifyForm.

    formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);

    // Generate dedicated exit blocks for the remainder loop if one exists, to

    // preserve LoopSimplifyForm.

    if (remainderLoop)

      formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);

  }


  auto UnrollResult = LoopUnrollResult::Unmodified;

  if (remainderLoop && UnrollRemainder) {

    LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");

    UnrollLoopOptions ULO;

    ULO.Count = Count - 1;

    ULO.Force = false;

    ULO.Runtime = false;

    ULO.AllowExpensiveTripCount = false;

    ULO.UnrollRemainder = false;

    ULO.ForgetAllSCEV = ForgetAllSCEV;

    assert(!getLoopConvergenceHeart(L) &&

           "A loop with a convergence heart does not allow runtime unrolling.");

    UnrollResult = UnrollLoop(remainderLoop, ULO, LI, SE, DT, AC, TTI,

                              /*ORE*/ nullptr, PreserveLCSSA);

  }


  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)

    *ResultLoop = remainderLoop;

  NumRuntimeUnrolled++;

  return true;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

BasicBlockUtils.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Cloning.h

CommandLine.h

DomTreeUpdater.h

Dominators.h

BasicBlock.h

Module.h
Module.h This file contains the declarations for the Module class.

InstructionSimplify.h

LoopIterator.h

ConnectEpilog
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count, AssumptionCache &AC, BranchProbability OriginalLoopProb)
Connect the unrolling epilog code to the original loop.
Definition LoopUnrollRuntime.cpp:256

UnrolledLoopHeaderWeights
static const uint32_t UnrolledLoopHeaderWeights[]
Definition LoopUnrollRuntime.cpp:63

CreateTripRemainder
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
Definition LoopUnrollRuntime.cpp:592

CloneLoopBlocks
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count, std::optional< unsigned > OriginalTripCount, BranchProbability OriginalLoopProb)
Create a clone of the blocks in a loop and connect them together.
Definition LoopUnrollRuntime.cpp:405

UnrollRuntimeOtherExitPredictable
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))

canProfitablyRuntimeUnrollMultiExitLoop
static bool canProfitablyRuntimeUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
Definition LoopUnrollRuntime.cpp:544

EpilogHeaderWeights
static const uint32_t EpilogHeaderWeights[]
Definition LoopUnrollRuntime.cpp:68

UnrollRuntimeMultiExit
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))

probOfNextInRemainder
static BranchProbability probOfNextInRemainder(BranchProbability OriginalLoopProb, unsigned N)
Assume, due to our position in the remainder loop or its guard, anywhere from 0 to N more iterations ...
Definition LoopUnrollRuntime.cpp:204

ConnectProlog
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling prolog code to the original loop.
Definition LoopUnrollRuntime.cpp:83

LoopUtils.h

F
#define F(x, y, z)
Definition MD5.cpp:55

I
#define I(x, y, z)
Definition MD5.cpp:58

MDBuilder.h

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

ScalarEvolutionExpander.h

ScalarEvolution.h

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

Local.h

UnrollLoop.h

ValueTracking.h

llvm::AssumeInst
This represents the llvm.assume intrinsic.
Definition IntrinsicInst.h:1752

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition AssumptionCache.h:44

llvm::AssumptionCache::registerAssumption
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Definition AssumptionCache.cpp:194

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528

llvm::BasicBlock::getFirstNonPHIIt
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
Definition BasicBlock.cpp:337

llvm::BasicBlock::getSingleSuccessor
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition BasicBlock.cpp:467

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition Instructions.h:3058

llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition Instructions.h:3139

llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition Instructions.h:3146

llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition Instructions.h:3131

llvm::BranchProbability
Definition BranchProbability.h:32

llvm::BranchProbability::getBranchProbability
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
Definition BranchProbability.cpp:51

llvm::BranchProbability::getDenominator
static uint32_t getDenominator()
Definition BranchProbability.h:73

llvm::BranchProbability::pow
BranchProbability pow(unsigned N) const
Compute pow(Probability, N).
Definition BranchProbability.cpp:113

llvm::BranchProbability::getOne
static BranchProbability getOne()
Definition BranchProbability.h:52

llvm::BranchProbability::getNumerator
uint32_t getNumerator() const
Definition BranchProbability.h:72

llvm::BranchProbability::isUnknown
bool isUnknown() const
Definition BranchProbability.h:49

llvm::BranchProbability::getCompl
BranchProbability getCompl() const
Definition BranchProbability.h:76

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:697

llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition InstrTypes.h:698

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getAllOnesValue
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Definition Constants.cpp:420

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63

llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition GenericDomTree.h:90

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition GenericDomTree.h:89

llvm::DomTreeUpdater
Definition DomTreeUpdater.h:34

llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition GenericDomTree.h:905

llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition GenericDomTree.h:723

llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition GenericDomTree.h:687

llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition GenericDomTree.h:401

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165

llvm::DominatorTree::findNearestCommonDominator
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition Dominators.cpp:357

llvm::Function
Definition Function.h:64

llvm::IRBuilderBase::CreateAssumption
LLVM_ABI CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles={})
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition IRBuilder.cpp:464

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1403

llvm::IRBuilderBase::CreateIsNotNull
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition IRBuilder.h:2659

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788

llvm::Instruction
Definition Instruction.h:69

llvm::Instruction::insertBefore
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
Definition Instruction.cpp:119

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:108

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition Instruction.h:510

llvm::Instruction::setSuccessor
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition Instruction.cpp:1313

llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition GenericLoopInfo.h:124

llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition GenericLoopInfoImpl.h:256

llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition GenericLoopInfo.h:173

llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition LoopIterator.h:97

llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition LoopIterator.h:101

llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition GenericLoopInfoImpl.h:749

llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition GenericLoopInfo.h:606

llvm::LoopInfo
Definition LoopInfo.h:408

llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition LoopInfo.h:441

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition LoopInfo.cpp:538

llvm::MDBuilder
Definition MDBuilder.h:37

llvm::MDBuilder::createBranchWeights
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38

llvm::MDNode
Metadata node.
Definition Metadata.h:1078

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PHINode
Definition Instructions.h:2639

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2774

llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition Instructions.h:2822

llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition Instructions.h:2753

llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition Instructions.h:2717

llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition Instructions.h:2714

llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition Instructions.h:2808

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2674

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:1888

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition ScalarEvolutionExpander.h:64

llvm::SCEVExpander::isHighCostExpansion
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Definition ScalarEvolutionExpander.h:251

llvm::SCEVExpander::expandCodeFor
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Definition ScalarEvolutionExpander.cpp:1511

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:72

llvm::SCEV::getType
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Definition ScalarEvolution.cpp:383

llvm::ScalarEvolution
The main scalar evolution driver.
Definition ScalarEvolution.h:448

llvm::ScalarEvolution::getConstant
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
Definition ScalarEvolution.cpp:470

llvm::ScalarEvolution::loopHasNoAbnormalExits
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
Definition ScalarEvolution.h:1396

llvm::ScalarEvolution::forgetTopmostLoop
LLVM_ABI void forgetTopmostLoop(const Loop *L)
Definition ScalarEvolution.cpp:8581

llvm::ScalarEvolution::forgetLcssaPhiWithNewPredecessor
LLVM_ABI void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V)
Forget LCSSA phi node V of loop L to which a new predecessor was added, such that it may no longer be...
Definition ScalarEvolution.cpp:8600

llvm::ScalarEvolution::getExitCount
LLVM_ABI const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition ScalarEvolution.cpp:8359

llvm::ScalarEvolution::getAddExpr
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition ScalarEvolution.cpp:2515

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:574

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:944

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:417

llvm::SmallVectorTemplateCommon::size
size_t size() const
Definition SmallVector.h:80

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:83

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1203

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:223

llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition ValueMap.h:167

llvm::ValueMap::erase
bool erase(const KeyT &Val)
Definition ValueMap.h:192

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256

llvm::Value::setName
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:382

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:314

llvm::cl::opt
Definition CommandLine.h:1455

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

uint32_t

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:445

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition AddressRanges.h:18

llvm::RecursivelyDeleteTriviallyDeadInstructions
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533

llvm::CloneBasicBlock
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition CloneFunction.cpp:111

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition MachineBasicBlock.h:1437

llvm::NewLoopsMap
SmallDenseMap< const Loop *, Loop *, 4 > NewLoopsMap
Definition UnrollLoop.h:41

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632

llvm::simplifyInstruction
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Definition InstructionSimplify.cpp:7507

llvm::setBranchProbability
bool setBranchProbability(BranchInst *B, BranchProbability P, bool ForFirstTarget)
Set branch weight metadata for B to indicate that P and 1 - P are the probabilities of control flowin...
Definition LoopUtils.cpp:1003

llvm::isInstructionTriviallyDead
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331

llvm::RemapDbgRecordRange
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
Definition ValueMapper.h:317

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::getLoopConvergenceHeart
LLVM_ABI CallBase * getLoopConvergenceHeart(const Loop *TheLoop)
Find the convergence heart of the loop.
Definition LoopInfo.cpp:1132

llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98

llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.
Definition UnrollLoop.h:60

llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.
Definition UnrollLoop.h:69

llvm::breakLoopBackedge
LLVM_ABI void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition LoopUtils.cpp:711

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::SplitBlockPredecessors
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition BasicBlockUtils.cpp:1369

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:218

llvm::MergeBlockIntoPredecessor
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
Definition BasicBlockUtils.cpp:218

llvm::formDedicatedExitBlocks
LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition LoopUtils.cpp:58

llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition ValueMapper.h:289

llvm::isGuaranteedNotToBeUndefOrPoison
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition ValueTracking.cpp:7704

llvm::ValueToValueMapTy
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
Definition MemorySSAUpdater.h:51

llvm::setLoopEstimatedTripCount
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
Definition LoopUtils.cpp:927

llvm::addClonedBlockToLoopInfo
LLVM_ABI const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition LoopUnroll.cpp:150

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::SplitBlock
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition BasicBlockUtils.cpp:1034

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition MachineBasicBlock.h:1438

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897

llvm::hasBranchWeightMD
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Definition ProfDataUtils.cpp:131

llvm::SplitEdge
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition BasicBlockUtils.cpp:644

llvm::UnrollRuntimeLoopRemainder
LLVM_ABI bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, unsigned SCEVExpansionBudget, bool RuntimeUnrollMultiExit, Loop **ResultLoop=nullptr, std::optional< unsigned > OriginalTripCount=std::nullopt, BranchProbability OriginalLoopProb=BranchProbability::getUnknown())
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition LoopUnrollRuntime.cpp:657

llvm::UnrollLoop
LLVM_ABI LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr, AAResults *AA=nullptr)
Unroll the given loop by Count.
Definition LoopUnroll.cpp:460

raw_ostream.h

N
#define N

llvm::UnrollLoopOptions
Definition UnrollLoop.h:72

llvm::UnrollLoopOptions::Runtime
bool Runtime
Definition UnrollLoop.h:75

llvm::UnrollLoopOptions::Count
unsigned Count
Definition UnrollLoop.h:73

llvm::UnrollLoopOptions::UnrollRemainder
bool UnrollRemainder
Definition UnrollLoop.h:77

llvm::UnrollLoopOptions::Force
bool Force
Definition UnrollLoop.h:74

llvm::UnrollLoopOptions::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Definition UnrollLoop.h:76

llvm::UnrollLoopOptions::ForgetAllSCEV
bool ForgetAllSCEV
Definition UnrollLoop.h:78

llvm::cl::desc
Definition CommandLine.h:411