doxygen/LoopUnrollRuntime_8cpp_source.html

//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements some loop unrolling utilities for loops with run-time

// trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time

// trip counts.

//

// The functions in this file are used to generate extra code when the

// run-time trip count modulo the unroll factor is not 0.  When this is the

// case, we need to generate code to execute these 'left over' iterations.

//

// The current strategy generates an if-then-else sequence prior to the

// unrolled loop to execute the 'left over' iterations before or after the

// unrolled loop.

//

//===----------------------------------------------------------------------===//


#include "llvm/ADT/Statistic.h"

#include "llvm/Analysis/DomTreeUpdater.h"

#include "llvm/Analysis/InstructionSimplify.h"

#include "llvm/Analysis/LoopIterator.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/Cloning.h"

#include "llvm/Transforms/Utils/Local.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"

#include "llvm/Transforms/Utils/UnrollLoop.h"

#include <algorithm>


using namespace llvm;


#define DEBUG_TYPE "loop-unroll"


STATISTIC(NumRuntimeUnrolled,

          "Number of loops unrolled with run-time trip counts");

static cl::opt<bool> UnrollRuntimeMultiExit(

    "unroll-runtime-multi-exit", cl::init(false), cl::Hidden,

    cl::desc("Allow runtime unrolling for loops with multiple exits, when "

             "epilog is generated"));

static cl::opt<bool> UnrollRuntimeOtherExitPredictable(

    "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden,

    cl::desc("Assume the non latch exit block to be predictable"));


// Probability that the loop trip count is so small that after the prolog

// we do not enter the unrolled loop at all.

// It is unlikely that the loop trip count is smaller than the unroll factor;

// other than that, the choice of constant is not tuned yet.

static const uint32_t UnrolledLoopHeaderWeights[] = {1, 127};

// Probability that the loop trip count is so small that we skip the unrolled

// loop completely and immediately enter the epilogue loop.

// It is unlikely that the loop trip count is smaller than the unroll factor;

// other than that, the choice of constant is not tuned yet.

static const uint32_t EpilogHeaderWeights[] = {1, 127};


/// Connect the unrolling prolog code to the original loop.

/// The unrolling prolog code contains code to execute the

/// 'extra' iterations if the run-time trip count modulo the

/// unroll count is non-zero.

///

/// This function performs the following:

/// - Create PHI nodes at prolog end block to combine values

///   that exit the prolog code and jump around the prolog.

/// - Add a PHI operand to a PHI node at the loop exit block

///   for values that exit the prolog and go around the loop.

/// - Branch around the original loop if the trip count is less

///   than the unroll factor.

///

static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,

                          BasicBlock *PrologExit,

                          BasicBlock *OriginalLoopLatchExit,

                          BasicBlock *PreHeader, BasicBlock *NewPreHeader,

                          ValueToValueMapTy &VMap, DominatorTree *DT,

                          LoopInfo *LI, bool PreserveLCSSA,

                          ScalarEvolution &SE) {

  // Loop structure should be the following:

  // Preheader

  //  PrologHeader

  //  ...

  //  PrologLatch

  //  PrologExit

  //   NewPreheader

  //    Header

  //    ...

  //    Latch

  //      LatchExit

  BasicBlock *Latch = L->getLoopLatch();

  assert(Latch && "Loop must have a latch");

  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);


  // Create a PHI node for each outgoing value from the original loop

  // (which means it is an outgoing value from the prolog code too).

  // The new PHI node is inserted in the prolog end basic block.

  // The new PHI node value is added as an operand of a PHI node in either

  // the loop header or the loop exit block.

  for (BasicBlock *Succ : successors(Latch)) {

    for (PHINode &PN : Succ->phis()) {

      // Add a new PHI node to the prolog end block and add the

      // appropriate incoming values.

      // TODO: This code assumes that the PrologExit (or the LatchExit block for

      // prolog loop) contains only one predecessor from the loop, i.e. the

      // PrologLatch. When supporting multiple-exiting block loops, we can have

      // two or more blocks that have the LatchExit as the target in the

      // original loop.

      PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");

      NewPN->insertBefore(PrologExit->getFirstNonPHIIt());

      // Adding a value to the new PHI node from the original loop preheader.

      // This is the value that skips all the prolog code.

      if (L->contains(&PN)) {

        // Succ is loop header.

        NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),

                           PreHeader);

      } else {

        // Succ is LatchExit.

        NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);

      }


      Value *V = PN.getIncomingValueForBlock(Latch);

      if (Instruction *I = dyn_cast<Instruction>(V)) {

        if (L->contains(I)) {

          V = VMap.lookup(I);

        }

      }

      // Adding a value to the new PHI node from the last prolog block

      // that was created.

      NewPN->addIncoming(V, PrologLatch);


      // Update the existing PHI node operand with the value from the

      // new PHI node.  How this is done depends on if the existing

      // PHI node is in the original loop block, or the exit block.

      if (L->contains(&PN))

        PN.setIncomingValueForBlock(NewPreHeader, NewPN);

      else

        PN.addIncoming(NewPN, PrologExit);

      SE.forgetValue(&PN);

    }

  }


  // Make sure that created prolog loop is in simplified form

  SmallVector<BasicBlock *, 4> PrologExitPreds;

  Loop *PrologLoop = LI->getLoopFor(PrologLatch);

  if (PrologLoop) {

    for (BasicBlock *PredBB : predecessors(PrologExit))

      if (PrologLoop->contains(PredBB))

        PrologExitPreds.push_back(PredBB);


    SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,

                           nullptr, PreserveLCSSA);

  }


  // Create a branch around the original loop, which is taken if there are no

  // iterations remaining to be executed after running the prologue.

  Instruction *InsertPt = PrologExit->getTerminator();

  IRBuilder<> B(InsertPt);


  assert(Count != 0 && "nonsensical Count!");


  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)

  // This means %xtraiter is (BECount + 1) and all of the iterations of this

  // loop were executed by the prologue.  Note that if BECount <u (Count - 1)

  // then (BECount + 1) cannot unsigned-overflow.

  Value *BrLoopExit =

      B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));

  // Split the exit to maintain loop canonicalization guarantees

  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));

  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,

                         nullptr, PreserveLCSSA);

  // Add the branch to the exit block (around the unrolled loop)

  MDNode *BranchWeights = nullptr;

  if (hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume loop is nearly always entered.

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(UnrolledLoopHeaderWeights);

  }

  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader,

                 BranchWeights);

  InsertPt->eraseFromParent();

  if (DT) {

    auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,

                                                  PrologExit);

    DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);

  }

}


/// Connect the unrolling epilog code to the original loop.

/// The unrolling epilog code contains code to execute the

/// 'extra' iterations if the run-time trip count modulo the

/// unroll count is non-zero.

///

/// This function performs the following:

/// - Update PHI nodes at the unrolling loop exit and epilog loop exit

/// - Create PHI nodes at the unrolling loop exit to combine

///   values that exit the unrolling loop code and jump around it.

/// - Update PHI operands in the epilog loop by the new PHI nodes

/// - Branch around the epilog loop if extra iters (ModVal) is zero.

///

static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,

                          BasicBlock *Exit, BasicBlock *PreHeader,

                          BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,

                          ValueToValueMapTy &VMap, DominatorTree *DT,

                          LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE,

                          unsigned Count) {

  BasicBlock *Latch = L->getLoopLatch();

  assert(Latch && "Loop must have a latch");

  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);


  // Loop structure should be the following:

  //

  // PreHeader

  // NewPreHeader

  //   Header

  //   ...

  //   Latch

  // NewExit (PN)

  // EpilogPreHeader

  //   EpilogHeader

  //   ...

  //   EpilogLatch

  // Exit (EpilogPN)


  // Update PHI nodes at NewExit and Exit.

  for (PHINode &PN : NewExit->phis()) {

    // PN should be used in another PHI located in Exit block as

    // Exit was split by SplitBlockPredecessors into Exit and NewExit

    // Basically it should look like:

    // NewExit:

    //   PN = PHI [I, Latch]

    // ...

    // Exit:

    //   EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]

    //

    // Exits from non-latch blocks point to the original exit block and the

    // epilogue edges have already been added.

    //

    // There is EpilogPreHeader incoming block instead of NewExit as

    // NewExit was spilt 1 more time to get EpilogPreHeader.

    assert(PN.hasOneUse() && "The phi should have 1 use");

    PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());

    assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");


    // Add incoming PreHeader from branch around the Loop

    PN.addIncoming(UndefValue::get(PN.getType()), PreHeader);

    SE.forgetValue(&PN);


    Value *V = PN.getIncomingValueForBlock(Latch);

    Instruction *I = dyn_cast<Instruction>(V);

    if (I && L->contains(I))

      // If value comes from an instruction in the loop add VMap value.

      V = VMap.lookup(I);

    // For the instruction out of the loop, constant or undefined value

    // insert value itself.

    EpilogPN->addIncoming(V, EpilogLatch);


    assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&

          "EpilogPN should have EpilogPreHeader incoming block");

    // Change EpilogPreHeader incoming block to NewExit.

    EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),

                               NewExit);

    // Now PHIs should look like:

    // NewExit:

    //   PN = PHI [I, Latch], [undef, PreHeader]

    // ...

    // Exit:

    //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]

  }


  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).

  // Update corresponding PHI nodes in epilog loop.

  for (BasicBlock *Succ : successors(Latch)) {

    // Skip this as we already updated phis in exit blocks.

    if (!L->contains(Succ))

      continue;

    for (PHINode &PN : Succ->phis()) {

      // Add new PHI nodes to the loop exit block and update epilog

      // PHIs with the new PHI values.

      PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr");

      NewPN->insertBefore(NewExit->getFirstNonPHIIt());

      // Adding a value to the new PHI node from the unrolling loop preheader.

      NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);

      // Adding a value to the new PHI node from the unrolling loop latch.

      NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);


      // Update the existing PHI node operand with the value from the new PHI

      // node.  Corresponding instruction in epilog loop should be PHI.

      PHINode *VPN = cast<PHINode>(VMap[&PN]);

      VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);

    }

  }


  Instruction *InsertPt = NewExit->getTerminator();

  IRBuilder<> B(InsertPt);

  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");

  assert(Exit && "Loop must have a single exit block only");

  // Split the epilogue exit to maintain loop canonicalization guarantees

  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));

  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr,

                         PreserveLCSSA);

  // Add the branch to the exit block (around the unrolling loop)

  MDNode *BranchWeights = nullptr;

  if (hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume equal distribution in interval [0, Count).

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(1, Count - 1);

  }

  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit, BranchWeights);

  InsertPt->eraseFromParent();

  if (DT) {

    auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);

    DT->changeImmediateDominator(Exit, NewDom);

  }


  // Split the main loop exit to maintain canonicalization guarantees.

  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};

  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr,

                         PreserveLCSSA);

}


/// Create a clone of the blocks in a loop and connect them together. A new

/// loop will be created including all cloned blocks, and the iterator of the

/// new loop switched to count NewIter down to 0.

/// The cloned blocks should be inserted between InsertTop and InsertBot.

/// InsertTop should be new preheader, InsertBot new loop exit.

/// Returns the new cloned loop that is created.

static Loop *

CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,

                const bool UnrollRemainder,

                BasicBlock *InsertTop,

                BasicBlock *InsertBot, BasicBlock *Preheader,

                             std::vector<BasicBlock *> &NewBlocks,

                             LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,

                             DominatorTree *DT, LoopInfo *LI, unsigned Count) {

  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";

  BasicBlock *Header = L->getHeader();

  BasicBlock *Latch = L->getLoopLatch();

  Function *F = Header->getParent();

  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();

  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();

  Loop *ParentLoop = L->getParentLoop();

  NewLoopsMap NewLoops;

  NewLoops[ParentLoop] = ParentLoop;


  // For each block in the original loop, create a new copy,

  // and update the value map with the newly created values.

  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {

    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);

    NewBlocks.push_back(NewBB);


    addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);


    VMap[*BB] = NewBB;

    if (Header == *BB) {

      // For the first block, add a CFG connection to this newly

      // created block.

      InsertTop->getTerminator()->setSuccessor(0, NewBB);

    }


    if (DT) {

      if (Header == *BB) {

        // The header is dominated by the preheader.

        DT->addNewBlock(NewBB, InsertTop);

      } else {

        // Copy information from original loop to unrolled loop.

        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();

        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));

      }

    }


    if (Latch == *BB) {

      // For the last block, create a loop back to cloned head.

      VMap.erase((*BB)->getTerminator());

      // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.

      // Subtle: NewIter can be 0 if we wrapped when computing the trip count,

      // thus we must compare the post-increment (wrapping) value.

      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);

      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());

      IRBuilder<> Builder(LatchBR);

      PHINode *NewIdx =

          PHINode::Create(NewIter->getType(), 2, suffix + ".iter");

      NewIdx->insertBefore(FirstLoopBB->getFirstNonPHIIt());

      auto *Zero = ConstantInt::get(NewIdx->getType(), 0);

      auto *One = ConstantInt::get(NewIdx->getType(), 1);

      Value *IdxNext =

          Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");

      Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");

      MDNode *BranchWeights = nullptr;

      if (hasBranchWeightMD(*LatchBR)) {

        uint32_t ExitWeight;

        uint32_t BackEdgeWeight;

        if (Count >= 3) {

          // Note: We do not enter this loop for zero-remainders. The check

          // is at the end of the loop. We assume equal distribution between

          // possible remainders in [1, Count).

          ExitWeight = 1;

          BackEdgeWeight = (Count - 2) / 2;

        } else {

          // Unnecessary backedge, should never be taken. The conditional

          // jump should be optimized away later.

          ExitWeight = 1;

          BackEdgeWeight = 0;

        }

        MDBuilder MDB(Builder.getContext());

        BranchWeights = MDB.createBranchWeights(BackEdgeWeight, ExitWeight);

      }

      Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot, BranchWeights);

      NewIdx->addIncoming(Zero, InsertTop);

      NewIdx->addIncoming(IdxNext, NewBB);

      LatchBR->eraseFromParent();

    }

  }


  // Change the incoming values to the ones defined in the preheader or

  // cloned loop.

  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {

    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);

    unsigned idx = NewPHI->getBasicBlockIndex(Preheader);

    NewPHI->setIncomingBlock(idx, InsertTop);

    BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);

    idx = NewPHI->getBasicBlockIndex(Latch);

    Value *InVal = NewPHI->getIncomingValue(idx);

    NewPHI->setIncomingBlock(idx, NewLatch);

    if (Value *V = VMap.lookup(InVal))

      NewPHI->setIncomingValue(idx, V);

  }


  Loop *NewLoop = NewLoops[L];

  assert(NewLoop && "L should have been cloned");

  MDNode *LoopID = NewLoop->getLoopID();


  // Only add loop metadata if the loop is not going to be completely

  // unrolled.

  if (UnrollRemainder)

    return NewLoop;


  std::optional<MDNode *> NewLoopID = makeFollowupLoopID(

      LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});

  if (NewLoopID) {

    NewLoop->setLoopID(*NewLoopID);


    // Do not setLoopAlreadyUnrolled if loop attributes have been defined

    // explicitly.

    return NewLoop;

  }


  // Add unroll disable metadata to disable future unrolling for this loop.

  NewLoop->setLoopAlreadyUnrolled();

  return NewLoop;

}


/// Returns true if we can profitably unroll the multi-exit loop L. Currently,

/// we return true only if UnrollRuntimeMultiExit is set to true.

static bool canProfitablyUnrollMultiExitLoop(

    Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,

    bool UseEpilogRemainder) {


  // Priority goes to UnrollRuntimeMultiExit if it's supplied.

  if (UnrollRuntimeMultiExit.getNumOccurrences())

    return UnrollRuntimeMultiExit;


  // The main pain point with multi-exit loop unrolling is that once unrolled,

  // we will not be able to merge all blocks into a straight line code.

  // There are branches within the unrolled loop that go to the OtherExits.

  // The second point is the increase in code size, but this is true

  // irrespective of multiple exits.


  // Note: Both the heuristics below are coarse grained. We are essentially

  // enabling unrolling of loops that have a single side exit other than the

  // normal LatchExit (i.e. exiting into a deoptimize block).

  // The heuristics considered are:

  // 1. low number of branches in the unrolled version.

  // 2. high predictability of these extra branches.

  // We avoid unrolling loops that have more than two exiting blocks. This

  // limits the total number of branches in the unrolled loop to be atmost

  // the unroll factor (since one of the exiting blocks is the latch block).

  SmallVector<BasicBlock*, 4> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);

  if (ExitingBlocks.size() > 2)

    return false;


  // Allow unrolling of loops with no non latch exit blocks.

  if (OtherExits.size() == 0)

    return true;


  // The second heuristic is that L has one exit other than the latchexit and

  // that exit is a deoptimize block. We know that deoptimize blocks are rarely

  // taken, which also implies the branch leading to the deoptimize block is

  // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we

  // assume the other exit branch is predictable even if it has no deoptimize

  // call.

  return (OtherExits.size() == 1 &&

          (UnrollRuntimeOtherExitPredictable ||

           OtherExits[0]->getPostdominatingDeoptimizeCall()));

  // TODO: These can be fine-tuned further to consider code size or deopt states

  // that are captured by the deoptimize exit block.

  // Also, we can extend this to support more cases, if we actually

  // know of kinds of multiexit loops that would benefit from unrolling.

}


/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain

/// accounting for the possibility of unsigned overflow in the 2s complement

/// domain. Preconditions:

/// 1) TripCount = BECount + 1 (allowing overflow)

/// 2) Log2(Count) <= BitWidth(BECount)

static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,

                                  Value *TripCount, unsigned Count) {

  // Note that TripCount is BECount + 1.

  if (isPowerOf2_32(Count))

    // If the expression is zero, then either:

    //  1. There are no iterations to be run in the prolog/epilog loop.

    // OR

    //  2. The addition computing TripCount overflowed.

    //

    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so

    // the number of iterations that remain to be run in the original loop is a

    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a

    // precondition of this method).

    return B.CreateAnd(TripCount, Count - 1, "xtraiter");


  // As (BECount + 1) can potentially unsigned overflow we count

  // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.

  Constant *CountC = ConstantInt::get(BECount->getType(), Count);

  Value *ModValTmp = B.CreateURem(BECount, CountC);

  Value *ModValAdd = B.CreateAdd(ModValTmp,

                                 ConstantInt::get(ModValTmp->getType(), 1));

  // At that point (BECount % Count) + 1 could be equal to Count.

  // To handle this case we need to take mod by Count one more time.

  return B.CreateURem(ModValAdd, CountC, "xtraiter");

}


/// Insert code in the prolog/epilog code when unrolling a loop with a

/// run-time trip-count.

///

/// This method assumes that the loop unroll factor is total number

/// of loop bodies in the loop after unrolling. (Some folks refer

/// to the unroll factor as the number of *extra* copies added).

/// We assume also that the loop unroll factor is a power-of-two. So, after

/// unrolling the loop, the number of loop bodies executed is 2,

/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch

/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for

/// the switch instruction is generated.

///

/// ***Prolog case***

///        extraiters = tripcount % loopfactor

///        if (extraiters == 0) jump Loop:

///        else jump Prol:

/// Prol:  LoopBody;

///        extraiters -= 1                 // Omitted if unroll factor is 2.

///        if (extraiters != 0) jump Prol: // Omitted if unroll factor is 2.

///        if (tripcount < loopfactor) jump End:

/// Loop:

/// ...

/// End:

///

/// ***Epilog case***

///        extraiters = tripcount % loopfactor

///        if (tripcount < loopfactor) jump LoopExit:

///        unroll_iters = tripcount - extraiters

/// Loop:  LoopBody; (executes unroll_iter times);

///        unroll_iter -= 1

///        if (unroll_iter != 0) jump Loop:

/// LoopExit:

///        if (extraiters == 0) jump EpilExit:

/// Epil:  LoopBody; (executes extraiters times)

///        extraiters -= 1                 // Omitted if unroll factor is 2.

///        if (extraiters != 0) jump Epil: // Omitted if unroll factor is 2.

/// EpilExit:


bool llvm::UnrollRuntimeLoopRemainder(

    Loop *L, unsigned Count, bool AllowExpensiveTripCount,

    bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV,

    LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,

    const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop) {

  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");

  LLVM_DEBUG(L->dump());

  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"

                                : dbgs() << "Using prolog remainder.\n");


  // Make sure the loop is in canonical form.

  if (!L->isLoopSimplifyForm()) {

    LLVM_DEBUG(dbgs() << "Not in simplify form!\n");

    return false;

  }


  // Guaranteed by LoopSimplifyForm.

  BasicBlock *Latch = L->getLoopLatch();

  BasicBlock *Header = L->getHeader();


  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());


  if (!LatchBR || LatchBR->isUnconditional()) {

    // The loop-rotate pass can be helpful to avoid this in many cases.

    LLVM_DEBUG(

        dbgs()

        << "Loop latch not terminated by a conditional branch.\n");

    return false;

  }


  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;

  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);


  if (L->contains(LatchExit)) {

    // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the

    // targets of the Latch be an exit block out of the loop.

    LLVM_DEBUG(

        dbgs()

        << "One of the loop latch successors must be the exit block.\n");

    return false;

  }


  // These are exit blocks other than the target of the latch exiting block.

  SmallVector<BasicBlock *, 4> OtherExits;

  L->getUniqueNonLatchExitBlocks(OtherExits);

  // Support only single exit and exiting block unless multi-exit loop

  // unrolling is enabled.

  if (!L->getExitingBlock() || OtherExits.size()) {

    // We rely on LCSSA form being preserved when the exit blocks are transformed.

    // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)

    if (!PreserveLCSSA)

      return false;


    if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,

                                          UseEpilogRemainder)) {

      LLVM_DEBUG(

          dbgs()

          << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "

             "enabled!\n");

      return false;

    }

  }

  // Use Scalar Evolution to compute the trip count. This allows more loops to

  // be unrolled than relying on induction var simplification.

  if (!SE)

    return false;


  // Only unroll loops with a computable trip count.

  // We calculate the backedge count by using getExitCount on the Latch block,

  // which is proven to be the only exiting block in this loop. This is same as

  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all

  // exiting blocks).

  const SCEV *BECountSC = SE->getExitCount(L, Latch);

  if (isa<SCEVCouldNotCompute>(BECountSC)) {

    LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");

    return false;

  }


  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();


  // Add 1 since the backedge count doesn't include the first loop iteration.

  // (Note that overflow can occur, this is handled explicitly below)

  const SCEV *TripCountSC =

      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));

  if (isa<SCEVCouldNotCompute>(TripCountSC)) {

    LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");

    return false;

  }


  BasicBlock *PreHeader = L->getLoopPreheader();

  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

  const DataLayout &DL = Header->getModule()->getDataLayout();

  SCEVExpander Expander(*SE, DL, "loop-unroll");

  if (!AllowExpensiveTripCount &&

      Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget,

                                   TTI, PreHeaderBR)) {

    LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");

    return false;

  }


  // This constraint lets us deal with an overflowing trip count easily; see the

  // comment on ModVal below.

  if (Log2_32(Count) > BEWidth) {

    LLVM_DEBUG(

        dbgs()

        << "Count failed constraint on overflow trip count calculation.\n");

    return false;

  }


  // Loop structure is the following:

  //

  // PreHeader

  //   Header

  //   ...

  //   Latch

  // LatchExit


  BasicBlock *NewPreHeader;

  BasicBlock *NewExit = nullptr;

  BasicBlock *PrologExit = nullptr;

  BasicBlock *EpilogPreHeader = nullptr;

  BasicBlock *PrologPreHeader = nullptr;


  if (UseEpilogRemainder) {

    // If epilog remainder

    // Split PreHeader to insert a branch around loop for unrolling.

    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);

    NewPreHeader->setName(PreHeader->getName() + ".new");

    // Split LatchExit to create phi nodes from branch above.

    NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,

                                     nullptr, PreserveLCSSA);

    // NewExit gets its DebugLoc from LatchExit, which is not part of the

    // original Loop.

    // Fix this by setting Loop's DebugLoc to NewExit.

    auto *NewExitTerminator = NewExit->getTerminator();

    NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());

    // Split NewExit to insert epilog remainder loop.

    EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);

    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");


    // If the latch exits from multiple level of nested loops, then

    // by assumption there must be another loop exit which branches to the

    // outer loop and we must adjust the loop for the newly inserted blocks

    // to account for the fact that our epilogue is still in the same outer

    // loop. Note that this leaves loopinfo temporarily out of sync with the

    // CFG until the actual epilogue loop is inserted.

    if (auto *ParentL = L->getParentLoop())

      if (LI->getLoopFor(LatchExit) != ParentL) {

        LI->removeBlock(NewExit);

        ParentL->addBasicBlockToLoop(NewExit, *LI);

        LI->removeBlock(EpilogPreHeader);

        ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);

      }


  } else {

    // If prolog remainder

    // Split the original preheader twice to insert prolog remainder loop

    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);

    PrologPreHeader->setName(Header->getName() + ".prol.preheader");

    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),

                            DT, LI);

    PrologExit->setName(Header->getName() + ".prol.loopexit");

    // Split PrologExit to get NewPreHeader.

    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);

    NewPreHeader->setName(PreHeader->getName() + ".new");

  }

  // Loop structure should be the following:

  //  Epilog             Prolog

  //

  // PreHeader         PreHeader

  // *NewPreHeader     *PrologPreHeader

  //   Header          *PrologExit

  //   ...             *NewPreHeader

  //   Latch             Header

  // *NewExit            ...

  // *EpilogPreHeader    Latch

  // LatchExit              LatchExit


  // Calculate conditions for branch around loop for unrolling

  // in epilog case and around prolog remainder loop in prolog case.

  // Compute the number of extra iterations required, which is:

  //  extra iterations = run-time trip count % loop unroll factor

  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());

  IRBuilder<> B(PreHeaderBR);

  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),

                                            PreHeaderBR);

  Value *BECount;

  // If there are other exits before the latch, that may cause the latch exit

  // branch to never be executed, and the latch exit count may be poison.

  // In this case, freeze the TripCount and base BECount on the frozen

  // TripCount. We will introduce two branches using these values, and it's

  // important that they see a consistent value (which would not be guaranteed

  // if were frozen independently.)

  if ((!OtherExits.empty() || !SE->loopHasNoAbnormalExits(L)) &&

      !isGuaranteedNotToBeUndefOrPoison(TripCount, AC, PreHeaderBR, DT)) {

    TripCount = B.CreateFreeze(TripCount);

    BECount =

        B.CreateAdd(TripCount, Constant::getAllOnesValue(TripCount->getType()));

  } else {

    // If we don't need to freeze, use SCEVExpander for BECount as well, to

    // allow slightly better value reuse.

    BECount =

        Expander.expandCodeFor(BECountSC, BECountSC->getType(), PreHeaderBR);

  }


  Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);


  Value *BranchVal =

      UseEpilogRemainder ? B.CreateICmpULT(BECount,

                                           ConstantInt::get(BECount->getType(),

                                                            Count - 1)) :

                           B.CreateIsNotNull(ModVal, "lcmp.mod");

  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;

  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;

  // Branch to either remainder (extra iterations) loop or unrolling loop.

  MDNode *BranchWeights = nullptr;

  if (hasBranchWeightMD(*Latch->getTerminator())) {

    // Assume loop is nearly always entered.

    MDBuilder MDB(B.getContext());

    BranchWeights = MDB.createBranchWeights(EpilogHeaderWeights);

  }

  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop, BranchWeights);

  PreHeaderBR->eraseFromParent();

  if (DT) {

    if (UseEpilogRemainder)

      DT->changeImmediateDominator(NewExit, PreHeader);

    else

      DT->changeImmediateDominator(PrologExit, PreHeader);

  }

  Function *F = Header->getParent();

  // Get an ordered list of blocks in the loop to help with the ordering of the

  // cloned blocks in the prolog/epilog code

  LoopBlocksDFS LoopBlocks(L);

  LoopBlocks.perform(LI);


  //

  // For each extra loop iteration, create a copy of the loop's basic blocks

  // and generate a condition that branches to the copy depending on the

  // number of 'left over' iterations.

  //

  std::vector<BasicBlock *> NewBlocks;

  ValueToValueMapTy VMap;


  // Clone all the basic blocks in the loop. If Count is 2, we don't clone

  // the loop, otherwise we create a cloned loop to execute the extra

  // iterations. This function adds the appropriate CFG connections.

  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;

  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;

  Loop *remainderLoop = CloneLoopBlocks(

      L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,

      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI, Count);


  // Insert the cloned blocks into the function.

  F->splice(InsertBot->getIterator(), F, NewBlocks[0]->getIterator(), F->end());


  // Now the loop blocks are cloned and the other exiting blocks from the

  // remainder are connected to the original Loop's exit blocks. The remaining

  // work is to update the phi nodes in the original loop, and take in the

  // values from the cloned region.

  for (auto *BB : OtherExits) {

    // Given we preserve LCSSA form, we know that the values used outside the

    // loop will be used through these phi nodes at the exit blocks that are

    // transformed below.

    for (PHINode &PN : BB->phis()) {

     unsigned oldNumOperands = PN.getNumIncomingValues();

     // Add the incoming values from the remainder code to the end of the phi

     // node.

     for (unsigned i = 0; i < oldNumOperands; i++){

       auto *PredBB =PN.getIncomingBlock(i);

       if (PredBB == Latch)

         // The latch exit is handled seperately, see connectX

         continue;

       if (!L->contains(PredBB))

         // Even if we had dedicated exits, the code above inserted an

         // extra branch which can reach the latch exit.

         continue;


       auto *V = PN.getIncomingValue(i);

       if (Instruction *I = dyn_cast<Instruction>(V))

         if (L->contains(I))

           V = VMap.lookup(I);

       PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));

     }

   }

#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)

    for (BasicBlock *SuccBB : successors(BB)) {

      assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&

             "Breaks the definition of dedicated exits!");

    }

#endif

  }


  // Update the immediate dominator of the exit blocks and blocks that are

  // reachable from the exit blocks. This is needed because we now have paths

  // from both the original loop and the remainder code reaching the exit

  // blocks. While the IDom of these exit blocks were from the original loop,

  // now the IDom is the preheader (which decides whether the original loop or

  // remainder code should run).

  if (DT && !L->getExitingBlock()) {

    SmallVector<BasicBlock *, 16> ChildrenToUpdate;

    // NB! We have to examine the dom children of all loop blocks, not just

    // those which are the IDom of the exit blocks. This is because blocks

    // reachable from the exit blocks can have their IDom as the nearest common

    // dominator of the exit blocks.

    for (auto *BB : L->blocks()) {

      auto *DomNodeBB = DT->getNode(BB);

      for (auto *DomChild : DomNodeBB->children()) {

        auto *DomChildBB = DomChild->getBlock();

        if (!L->contains(LI->getLoopFor(DomChildBB)))

          ChildrenToUpdate.push_back(DomChildBB);

      }

    }

    for (auto *BB : ChildrenToUpdate)

      DT->changeImmediateDominator(BB, PreHeader);

  }


  // Loop structure should be the following:

  //  Epilog             Prolog

  //

  // PreHeader         PreHeader

  // NewPreHeader      PrologPreHeader

  //   Header            PrologHeader

  //   ...               ...

  //   Latch             PrologLatch

  // NewExit           PrologExit

  // EpilogPreHeader   NewPreHeader

  //   EpilogHeader      Header

  //   ...               ...

  //   EpilogLatch       Latch

  // LatchExit              LatchExit


  // Rewrite the cloned instruction operands to use the values created when the

  // clone is created.

  for (BasicBlock *BB : NewBlocks) {

    Module *M = BB->getModule();

    for (Instruction &I : *BB) {

      RemapInstruction(&I, VMap,

                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);

      RemapDbgVariableRecordRange(M, I.getDbgRecordRange(), VMap,

                                  RF_NoModuleLevelChanges |

                                      RF_IgnoreMissingLocals);

    }

  }


  if (UseEpilogRemainder) {

    // Connect the epilog code to the original loop and update the

    // PHI functions.

    ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader, EpilogPreHeader,

                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE, Count);


    // Update counter in loop for unrolling.

    // Use an incrementing IV.  Pre-incr/post-incr is backedge/trip count.

    // Subtle: TestVal can be 0 if we wrapped when computing the trip count,

    // thus we must compare the post-increment (wrapping) value.

    IRBuilder<> B2(NewPreHeader->getTerminator());

    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");

    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());

    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter");

    NewIdx->insertBefore(Header->getFirstNonPHIIt());

    B2.SetInsertPoint(LatchBR);

    auto *Zero = ConstantInt::get(NewIdx->getType(), 0);

    auto *One = ConstantInt::get(NewIdx->getType(), 1);

    Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");

    auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;

    Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");

    NewIdx->addIncoming(Zero, NewPreHeader);

    NewIdx->addIncoming(IdxNext, Latch);

    LatchBR->setCondition(IdxCmp);

  } else {

    // Connect the prolog code to the original loop and update the

    // PHI functions.

    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,

                  NewPreHeader, VMap, DT, LI, PreserveLCSSA, *SE);

  }


  // If this loop is nested, then the loop unroller changes the code in the any

  // of its parent loops, so the Scalar Evolution pass needs to be run again.

  SE->forgetTopmostLoop(L);


  // Verify that the Dom Tree and Loop Info are correct.

#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)

  if (DT) {

    assert(DT->verify(DominatorTree::VerificationLevel::Full));

    LI->verify(*DT);

  }

#endif


  // For unroll factor 2 remainder loop will have 1 iteration.

  if (Count == 2 && DT && LI && SE) {

    // TODO: This code could probably be pulled out into a helper function

    // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.

    BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();

    assert(RemainderLatch);

    SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),

                                             remainderLoop->getBlocks().end());

    breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);

    remainderLoop = nullptr;


    // Simplify loop values after breaking the backedge

    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();

    SmallVector<WeakTrackingVH, 16> DeadInsts;

    for (BasicBlock *BB : RemainderBlocks) {

      for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {

        if (Value *V = simplifyInstruction(&Inst, {DL, nullptr, DT, AC}))

          if (LI->replacementPreservesLCSSAForm(&Inst, V))

            Inst.replaceAllUsesWith(V);

        if (isInstructionTriviallyDead(&Inst))

          DeadInsts.emplace_back(&Inst);

      }

      // We can't do recursive deletion until we're done iterating, as we might

      // have a phi which (potentially indirectly) uses instructions later in

      // the block we're iterating through.

      RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);

    }


    // Merge latch into exit block.

    auto *ExitBB = RemainderLatch->getSingleSuccessor();

    assert(ExitBB && "required after breaking cond br backedge");

    DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

    MergeBlockIntoPredecessor(ExitBB, &DTU, LI);

  }


  // Canonicalize to LoopSimplifyForm both original and remainder loops. We

  // cannot rely on the LoopUnrollPass to do this because it only does

  // canonicalization for parent/subloops and not the sibling loops.

  if (OtherExits.size() > 0) {

    // Generate dedicated exit blocks for the original loop, to preserve

    // LoopSimplifyForm.

    formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);

    // Generate dedicated exit blocks for the remainder loop if one exists, to

    // preserve LoopSimplifyForm.

    if (remainderLoop)

      formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);

  }


  auto UnrollResult = LoopUnrollResult::Unmodified;

  if (remainderLoop && UnrollRemainder) {

    LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");

    UnrollResult =

        UnrollLoop(remainderLoop,

                   {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false,

                    /*AllowExpensiveTripCount*/ false,

                    /*UnrollRemainder*/ false, ForgetAllSCEV},

                   LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA);

  }


  if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)

    *ResultLoop = remainderLoop;

  NumRuntimeUnrolled++;

  return true;

}

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:74

BasicBlockUtils.h

BasicBlock.h

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Cloning.h

CommandLine.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DomTreeUpdater.h

Dominators.h

InstructionSimplify.h

LoopIterator.h

canProfitablyUnrollMultiExitLoop
static bool canProfitablyUnrollMultiExitLoop(Loop *L, SmallVectorImpl< BasicBlock * > &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder)
Returns true if we can profitably unroll the multi-exit loop L.
Definition: LoopUnrollRuntime.cpp:465

CloneLoopBlocks
static Loop * CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector< BasicBlock * > &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, unsigned Count)
Create a clone of the blocks in a loop and connect them together.
Definition: LoopUnrollRuntime.cpp:339

ConnectEpilog
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *PreHeader, BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE, unsigned Count)
Connect the unrolling epilog code to the original loop.
Definition: LoopUnrollRuntime.cpp:211

UnrolledLoopHeaderWeights
static const uint32_t UnrolledLoopHeaderWeights[]
Definition: LoopUnrollRuntime.cpp:63

CreateTripRemainder
static Value * CreateTripRemainder(IRBuilder<> &B, Value *BECount, Value *TripCount, unsigned Count)
Calculate ModVal = (BECount + 1) % Count on the abstract integer domain accounting for the possibilit...
Definition: LoopUnrollRuntime.cpp:517

UnrollRuntimeOtherExitPredictable
static cl::opt< bool > UnrollRuntimeOtherExitPredictable("unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, cl::desc("Assume the non latch exit block to be predictable"))

EpilogHeaderWeights
static const uint32_t EpilogHeaderWeights[]
Definition: LoopUnrollRuntime.cpp:68

UnrollRuntimeMultiExit
static cl::opt< bool > UnrollRuntimeMultiExit("unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated"))

ConnectProlog
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, BasicBlock *PrologExit, BasicBlock *OriginalLoopLatchExit, BasicBlock *PreHeader, BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, ScalarEvolution &SE)
Connect the unrolling prolog code to the original loop.
Definition: LoopUnrollRuntime.cpp:83

LoopUtils.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MDBuilder.h

Module.h
Module.h This file contains the declarations for the Module class.

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

ScalarEvolutionExpander.h

ScalarEvolution.h

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167

Local.h

UnrollLoop.h

ValueTracking.h

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:60

llvm::BasicBlock::phis
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499

llvm::BasicBlock::getFirstNonPHIIt
InstListType::const_iterator getFirstNonPHIIt() const
Iterator returning form of getFirstNonPHI.
Definition: BasicBlock.cpp:367

llvm::BasicBlock::getSingleSuccessor
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3439

llvm::BranchInst::setCondition
void setCondition(Value *V)
Definition: Instructions.h:3540

llvm::BranchInst::getSuccessor
BasicBlock * getSuccessor(unsigned i) const
Definition: Instructions.h:3547

llvm::BranchInst::isUnconditional
bool isUnconditional() const
Definition: Instructions.h:3532

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41

llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DomTreeNodeBase::getIDom
DomTreeNodeBase * getIDom() const
Definition: GenericDomTree.h:90

llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:89

llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28

llvm::DominatorTreeBase::verify
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
Definition: GenericDomTree.h:819

llvm::DominatorTreeBase::changeImmediateDominator
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
Definition: GenericDomTree.h:672

llvm::DominatorTreeBase::addNewBlock
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Definition: GenericDomTree.h:636

llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:367

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::DominatorTree::findNearestCommonDominator
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344

llvm::Function
Definition: Function.h:62

llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344

llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120

llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:176

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666

llvm::Instruction
Definition: Instruction.h:49

llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:110

llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:152

llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:105

llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451

llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:1244

llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: GenericLoopInfo.h:124

llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: GenericLoopInfoImpl.h:245

llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: GenericLoopInfo.h:173

llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97

llvm::LoopBlocksDFS::beginRPO
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:136

llvm::LoopBlocksDFS::RPOIterator
std::vector< BasicBlock * >::const_reverse_iterator RPOIterator
Definition: LoopIterator.h:101

llvm::LoopBlocksDFS::perform
void perform(const LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:1222

llvm::LoopBlocksDFS::endRPO
RPOIterator endRPO() const
Definition: LoopIterator.h:140

llvm::LoopInfoBase::verify
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
Definition: GenericLoopInfoImpl.h:707

llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: GenericLoopInfo.h:602

llvm::LoopInfo
Definition: LoopInfo.h:407

llvm::LoopInfo::replacementPreservesLCSSAForm
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:525

llvm::Loop::setLoopAlreadyUnrolled
void setLoopAlreadyUnrolled()
Add llvm.loop.unroll.disable to this loop's loop id metadata.
Definition: LoopInfo.cpp:537

llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:501

llvm::MDBuilder
Definition: MDBuilder.h:36

llvm::MDBuilder::createBranchWeights
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37

llvm::MDNode
Metadata node.
Definition: Metadata.h:1067

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PHINode
Definition: Instructions.h:2973

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:3134

llvm::PHINode::setIncomingValueForBlock
void setIncomingValueForBlock(const BasicBlock *BB, Value *V)
Set every incoming value(s) for block BB to V.
Definition: Instructions.h:3181

llvm::PHINode::setIncomingBlock
void setIncomingBlock(unsigned i, BasicBlock *BB)
Definition: Instructions.h:3113

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:3024

llvm::PHINode::setIncomingValue
void setIncomingValue(unsigned i, Value *V)
Definition: Instructions.h:3077

llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:3074

llvm::PHINode::getBasicBlockIndex
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
Definition: Instructions.h:3167

llvm::SCEVExpander
This class uses information about analyze scalars to rewrite expressions in canonical form.
Definition: ScalarEvolutionExpander.h:61

llvm::SCEVExpander::isHighCostExpansion
bool isHighCostExpansion(ArrayRef< const SCEV * > Exprs, Loop *L, unsigned Budget, const TargetTransformInfo *TTI, const Instruction *At)
Return true for expressions that can't be evaluated at runtime within given Budget.
Definition: ScalarEvolutionExpander.h:243

llvm::SCEVExpander::expandCodeFor
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Definition: ScalarEvolutionExpander.cpp:1397

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75

llvm::SCEV::getType
Type * getType() const
Return the LLVM type of this SCEV expression.
Definition: ScalarEvolution.cpp:380

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:479

llvm::ScalarEvolution::loopHasNoAbnormalExits
bool loopHasNoAbnormalExits(const Loop *L)
Return true if the loop has no abnormal exits.
Definition: ScalarEvolution.h:1304

llvm::ScalarEvolution::forgetTopmostLoop
void forgetTopmostLoop(const Loop *L)
Definition: ScalarEvolution.cpp:8463

llvm::ScalarEvolution::forgetValue
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
Definition: ScalarEvolution.cpp:8467

llvm::ScalarEvolution::getExitCount
const SCEV * getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind=Exact)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
Definition: ScalarEvolution.cpp:8265

llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2512

llvm::SmallDenseMap
Definition: DenseMap.h:910

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:213

llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808

llvm::ValueMap< const Value *, WeakTrackingVH >

llvm::ValueMap::lookup
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164

llvm::ValueMap::erase
bool erase(const KeyT &Val)
Definition: ValueMap.h:190

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::setName
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::cl::opt
Definition: CommandLine.h:1430

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:109

uint32_t

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::RecursivelyDeleteTriviallyDeadInstructions
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540

llvm::successors
auto successors(const MachineBasicBlock *BB)
Definition: MachineBasicBlock.h:1306

llvm::makeFollowupLoopID
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656

llvm::RemapDbgVariableRecordRange
void RemapDbgVariableRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DbgVariableRecord V using the value map VM.
Definition: ValueMapper.h:285

llvm::simplifyInstruction
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:7129

llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:400

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324

llvm::CloneBasicBlock
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
Definition: CloneFunction.cpp:42

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275

llvm::RF_IgnoreMissingLocals
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94

llvm::RF_NoModuleLevelChanges
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::SCEVCheapExpansionBudget
cl::opt< unsigned > SCEVCheapExpansionBudget

llvm::RemapInstruction
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:264

llvm::breakLoopBackedge
void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA)
Remove the backedge of the specified loop.
Definition: LoopUtils.cpp:724

llvm::SplitBlockPredecessors
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
Definition: BasicBlockUtils.cpp:1417

llvm::LLVMLoopUnrollFollowupAll
const char *const LLVMLoopUnrollFollowupAll
Definition: UnrollLoop.h:42

llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:209

llvm::MergeBlockIntoPredecessor
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
Definition: BasicBlockUtils.cpp:180

llvm::formDedicatedExitBlocks
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:57

llvm::isGuaranteedNotToBeUndefOrPoison
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
Definition: ValueTracking.cpp:7318

llvm::LLVMLoopUnrollFollowupRemainder
const char *const LLVMLoopUnrollFollowupRemainder
Definition: UnrollLoop.h:45

llvm::addClonedBlockToLoopInfo
const Loop * addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, NewLoopsMap &NewLoops)
Adds ClonedBB to LoopInfo, creates a new loop for ClonedBB if necessary and adds a mapping from the o...
Definition: LoopUnroll.cpp:147

llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:1083

llvm::predecessors
auto predecessors(const MachineBasicBlock *BB)
Definition: MachineBasicBlock.h:1307

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879

llvm::UnrollLoop
LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop=nullptr)
Unroll the given loop by Count.
Definition: LoopUnroll.cpp:295

llvm::hasBranchWeightMD
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Definition: ProfDataUtils.cpp:80

llvm::SplitEdge
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
Definition: BasicBlockUtils.cpp:761

llvm::UnrollRuntimeLoopRemainder
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, bool UseEpilogRemainder, bool UnrollRemainder, bool ForgetAllSCEV, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop=nullptr)
Insert code in the prolog/epilog code when unrolling a loop with a run-time trip-count.
Definition: LoopUnrollRuntime.cpp:582

raw_ostream.h

llvm::cl::desc
Definition: CommandLine.h:416