doxygen/LoopUnrollAndJamPass_8cpp_source.html

//===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass implements an unroll and jam pass. Most of the work is done by

// Utils/UnrollLoopAndJam.cpp.

//===----------------------------------------------------------------------===//


#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"

#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/PriorityWorklist.h"

#include "llvm/ADT/SmallPtrSet.h"

#include "llvm/ADT/StringRef.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/CodeMetrics.h"

#include "llvm/Analysis/DependenceAnalysis.h"

#include "llvm/Analysis/LoopAnalysisManager.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/LoopNestAnalysis.h"

#include "llvm/Analysis/LoopPass.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/Analysis/ScalarEvolution.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/Dominators.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Metadata.h"

#include "llvm/IR/PassManager.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Scalar/LoopPassManager.h"

#include "llvm/Transforms/Utils/LoopPeel.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include "llvm/Transforms/Utils/UnrollLoop.h"

#include <cassert>

#include <cstdint>


namespace llvm {

class Instruction;

class Value;

} // namespace llvm


using namespace llvm;


#define DEBUG_TYPE "loop-unroll-and-jam"


/// @{

/// Metadata attribute names

static const char *const LLVMLoopUnrollAndJamFollowupAll =

    "llvm.loop.unroll_and_jam.followup_all";

static const char *const LLVMLoopUnrollAndJamFollowupInner =

    "llvm.loop.unroll_and_jam.followup_inner";

static const char *const LLVMLoopUnrollAndJamFollowupOuter =

    "llvm.loop.unroll_and_jam.followup_outer";

static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner =

    "llvm.loop.unroll_and_jam.followup_remainder_inner";

static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter =

    "llvm.loop.unroll_and_jam.followup_remainder_outer";

/// @}


static cl::opt<bool>

    AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden,

                      cl::desc("Allows loops to be unroll-and-jammed."));


static cl::opt<unsigned> UnrollAndJamCount(

    "unroll-and-jam-count", cl::Hidden,

    cl::desc("Use this unroll count for all loops including those with "

             "unroll_and_jam_count pragma values, for testing purposes"));


static cl::opt<unsigned> UnrollAndJamThreshold(

    "unroll-and-jam-threshold", cl::init(60), cl::Hidden,

    cl::desc("Threshold to use for inner loop when doing unroll and jam."));


static cl::opt<unsigned> PragmaUnrollAndJamThreshold(

    "pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden,

    cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or "

             "unroll_count pragma."));


// Returns the loop hint metadata node with the given name (for example,

// "llvm.loop.unroll.count").  If no such metadata node exists, then nullptr is

// returned.

static MDNode *getUnrollMetadataForLoop(const Loop *L, StringRef Name) {

  if (MDNode *LoopID = L->getLoopID())

    return GetUnrollMetadata(LoopID, Name);

  return nullptr;

}


// Returns true if the loop has any metadata starting with Prefix. For example a

// Prefix of "llvm.loop.unroll." returns true if we have any unroll metadata.

static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix) {

  if (MDNode *LoopID = L->getLoopID()) {

    // First operand should refer to the loop id itself.

    assert(LoopID->getNumOperands() > 0 && "requires at least one operand");

    assert(LoopID->getOperand(0) == LoopID && "invalid loop id");


    for (unsigned I = 1, E = LoopID->getNumOperands(); I < E; ++I) {

      MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(I));

      if (!MD)

        continue;


      MDString *S = dyn_cast<MDString>(MD->getOperand(0));

      if (!S)

        continue;


      if (S->getString().starts_with(Prefix))

        return true;

    }

  }

  return false;

}


// Returns true if the loop has an unroll_and_jam(enable) pragma.

static bool hasUnrollAndJamEnablePragma(const Loop *L) {

  return getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.enable");

}


// If loop has an unroll_and_jam_count pragma return the (necessarily

// positive) value from the pragma.  Otherwise return 0.

static unsigned unrollAndJamCountPragmaValue(const Loop *L) {

  MDNode *MD = getUnrollMetadataForLoop(L, "llvm.loop.unroll_and_jam.count");

  if (MD) {

    assert(MD->getNumOperands() == 2 &&

           "Unroll count hint metadata should have two operands.");

    unsigned Count =

        mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();

    assert(Count >= 1 && "Unroll count must be positive.");

    return Count;

  }

  return 0;

}


// Returns loop size estimation for unrolled loop.

static uint64_t

getUnrollAndJammedLoopSize(unsigned LoopSize,

                           TargetTransformInfo::UnrollingPreferences &UP) {

  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");

  return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;

}


// Calculates unroll and jam count and writes it to UP.Count. Returns true if

// unroll count was set explicitly.

static bool computeUnrollAndJamCount(

    Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT,

    LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE,

    const SmallPtrSetImpl<const Value *> &EphValues,

    OptimizationRemarkEmitter *ORE, unsigned OuterTripCount,

    unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE,

    unsigned InnerTripCount, unsigned InnerLoopSize,

    TargetTransformInfo::UnrollingPreferences &UP,

    TargetTransformInfo::PeelingPreferences &PP) {

  unsigned OuterLoopSize = OuterUCE.getRolledLoopSize();

  // First up use computeUnrollCount from the loop unroller to get a count

  // for unrolling the outer loop, plus any loops requiring explicit

  // unrolling we leave to the unroller. This uses UP.Threshold /

  // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values.

  // We have already checked that the loop has no unroll.* pragmas.

  unsigned MaxTripCount = 0;

  bool UseUpperBound = false;

  bool ExplicitUnroll = computeUnrollCount(

    L, TTI, DT, LI, AC, SE, EphValues, ORE, OuterTripCount, MaxTripCount,

      /*MaxOrZero*/ false, OuterTripMultiple, OuterUCE, UP, PP,

      UseUpperBound);

  if (ExplicitUnroll || UseUpperBound) {

    // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it

    // for the unroller instead.

    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by "

                         "computeUnrollCount\n");

    UP.Count = 0;

    return false;

  }


  // Override with any explicit Count from the "unroll-and-jam-count" option.

  bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0;

  if (UserUnrollCount) {

    UP.Count = UnrollAndJamCount;

    UP.Force = true;

    if (UP.AllowRemainder &&

        getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&

        getUnrollAndJammedLoopSize(InnerLoopSize, UP) <

            UP.UnrollAndJamInnerLoopThreshold)

      return true;

  }


  // Check for unroll_and_jam pragmas

  unsigned PragmaCount = unrollAndJamCountPragmaValue(L);

  if (PragmaCount > 0) {

    UP.Count = PragmaCount;

    UP.Runtime = true;

    UP.Force = true;

    if ((UP.AllowRemainder || (OuterTripMultiple % PragmaCount == 0)) &&

        getUnrollAndJammedLoopSize(OuterLoopSize, UP) < UP.Threshold &&

        getUnrollAndJammedLoopSize(InnerLoopSize, UP) <

            UP.UnrollAndJamInnerLoopThreshold)

      return true;

  }


  bool PragmaEnableUnroll = hasUnrollAndJamEnablePragma(L);

  bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount;

  bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount;


  // If the loop has an unrolling pragma, we want to be more aggressive with

  // unrolling limits.

  if (ExplicitUnrollAndJam)

    UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold;


  if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=

                                UP.UnrollAndJamInnerLoopThreshold) {

    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and "

                         "inner loop too large\n");

    UP.Count = 0;

    return false;

  }


  // We have a sensible limit for the outer loop, now adjust it for the inner

  // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set

  // explicitly, we want to stick to it.

  if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) {

    while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >=

                                UP.UnrollAndJamInnerLoopThreshold)

      UP.Count--;

  }


  // If we are explicitly unroll and jamming, we are done. Otherwise there are a

  // number of extra performance heuristics to check.

  if (ExplicitUnrollAndJam)

    return true;


  // If the inner loop count is known and small, leave the entire loop nest to

  // be the unroller

  if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) {

    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is "

                         "being left for the unroller\n");

    UP.Count = 0;

    return false;

  }


  // Check for situations where UnJ is likely to be unprofitable. Including

  // subloops with more than 1 block.

  if (SubLoop->getBlocks().size() != 1) {

    LLVM_DEBUG(

        dbgs() << "Won't unroll-and-jam; More than one inner loop block\n");

    UP.Count = 0;

    return false;

  }


  // Limit to loops where there is something to gain from unrolling and

  // jamming the loop. In this case, look for loads that are invariant in the

  // outer loop and can become shared.

  unsigned NumInvariant = 0;

  for (BasicBlock *BB : SubLoop->getBlocks()) {

    for (Instruction &I : *BB) {

      if (auto *Ld = dyn_cast<LoadInst>(&I)) {

        Value *V = Ld->getPointerOperand();

        const SCEV *LSCEV = SE.getSCEVAtScope(V, L);

        if (SE.isLoopInvariant(LSCEV, L))

          NumInvariant++;

      }

    }

  }

  if (NumInvariant == 0) {

    LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n");

    UP.Count = 0;

    return false;

  }


  return false;

}


static LoopUnrollResult

tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,

                      ScalarEvolution &SE, const TargetTransformInfo &TTI,

                      AssumptionCache &AC, DependenceInfo &DI,

                      OptimizationRemarkEmitter &ORE, int OptLevel) {

  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(

      L, SE, TTI, nullptr, nullptr, ORE, OptLevel, std::nullopt, std::nullopt,

      std::nullopt, std::nullopt, std::nullopt, std::nullopt);

  TargetTransformInfo::PeelingPreferences PP =

      gatherPeelingPreferences(L, SE, TTI, std::nullopt, std::nullopt);


  TransformationMode EnableMode = hasUnrollAndJamTransformation(L);

  if (EnableMode & TM_Disable)

    return LoopUnrollResult::Unmodified;

  if (EnableMode & TM_ForcedByUser)

    UP.UnrollAndJam = true;


  if (AllowUnrollAndJam.getNumOccurrences() > 0)

    UP.UnrollAndJam = AllowUnrollAndJam;

  if (UnrollAndJamThreshold.getNumOccurrences() > 0)

    UP.UnrollAndJamInnerLoopThreshold = UnrollAndJamThreshold;

  // Exit early if unrolling is disabled.

  if (!UP.UnrollAndJam || UP.UnrollAndJamInnerLoopThreshold == 0)

    return LoopUnrollResult::Unmodified;


  LLVM_DEBUG(dbgs() << "Loop Unroll and Jam: F["

                    << L->getHeader()->getParent()->getName() << "] Loop %"

                    << L->getHeader()->getName() << "\n");


  // A loop with any unroll pragma (enabling/disabling/count/etc) is left for

  // the unroller, so long as it does not explicitly have unroll_and_jam

  // metadata. This means #pragma nounroll will disable unroll and jam as well

  // as unrolling

  if (hasAnyUnrollPragma(L, "llvm.loop.unroll.") &&

      !hasAnyUnrollPragma(L, "llvm.loop.unroll_and_jam.")) {

    LLVM_DEBUG(dbgs() << "  Disabled due to pragma.\n");

    return LoopUnrollResult::Unmodified;

  }


  if (!isSafeToUnrollAndJam(L, SE, DT, DI, *LI)) {

    LLVM_DEBUG(dbgs() << "  Disabled due to not being safe.\n");

    return LoopUnrollResult::Unmodified;

  }


  // Approximate the loop size and collect useful info

  SmallPtrSet<const Value *, 32> EphValues;

  CodeMetrics::collectEphemeralValues(L, &AC, EphValues);

  Loop *SubLoop = L->getSubLoops()[0];

  UnrollCostEstimator InnerUCE(SubLoop, TTI, EphValues, UP.BEInsns);

  UnrollCostEstimator OuterUCE(L, TTI, EphValues, UP.BEInsns);


  if (!InnerUCE.canUnroll() || !OuterUCE.canUnroll()) {

    LLVM_DEBUG(dbgs() << "  Loop not considered unrollable\n");

    return LoopUnrollResult::Unmodified;

  }


  unsigned InnerLoopSize = InnerUCE.getRolledLoopSize();

  LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterUCE.getRolledLoopSize()

                    << "\n");

  LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSize << "\n");


  if (InnerUCE.NumInlineCandidates != 0 || OuterUCE.NumInlineCandidates != 0) {

    LLVM_DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");

    return LoopUnrollResult::Unmodified;

  }

  // FIXME: The call to canUnroll() allows some controlled convergent

  // operations, but we block them here for future changes.

  if (InnerUCE.Convergence != ConvergenceKind::None ||

      OuterUCE.Convergence != ConvergenceKind::None) {

    LLVM_DEBUG(

        dbgs() << "  Not unrolling loop with convergent instructions.\n");

    return LoopUnrollResult::Unmodified;

  }


  // Save original loop IDs for after the transformation.

  MDNode *OrigOuterLoopID = L->getLoopID();

  MDNode *OrigSubLoopID = SubLoop->getLoopID();


  // To assign the loop id of the epilogue, assign it before unrolling it so it

  // is applied to every inner loop of the epilogue. We later apply the loop ID

  // for the jammed inner loop.

  std::optional<MDNode *> NewInnerEpilogueLoopID = makeFollowupLoopID(

      OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,

                        LLVMLoopUnrollAndJamFollowupRemainderInner});

  if (NewInnerEpilogueLoopID)

    SubLoop->setLoopID(*NewInnerEpilogueLoopID);


  // Find trip count and trip multiple

  BasicBlock *Latch = L->getLoopLatch();

  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch();

  unsigned OuterTripCount = SE.getSmallConstantTripCount(L, Latch);

  unsigned OuterTripMultiple = SE.getSmallConstantTripMultiple(L, Latch);

  unsigned InnerTripCount = SE.getSmallConstantTripCount(SubLoop, SubLoopLatch);


  // Decide if, and by how much, to unroll

  bool IsCountSetExplicitly = computeUnrollAndJamCount(

    L, SubLoop, TTI, DT, LI, &AC, SE, EphValues, &ORE, OuterTripCount,

      OuterTripMultiple, OuterUCE, InnerTripCount, InnerLoopSize, UP, PP);

  if (UP.Count <= 1)

    return LoopUnrollResult::Unmodified;

  // Unroll factor (Count) must be less or equal to TripCount.

  if (OuterTripCount && UP.Count > OuterTripCount)

    UP.Count = OuterTripCount;


  Loop *EpilogueOuterLoop = nullptr;

  LoopUnrollResult UnrollResult = UnrollAndJamLoop(

      L, UP.Count, OuterTripCount, OuterTripMultiple, UP.UnrollRemainder, LI,

      &SE, &DT, &AC, &TTI, &ORE, &EpilogueOuterLoop);


  // Assign new loop attributes.

  if (EpilogueOuterLoop) {

    std::optional<MDNode *> NewOuterEpilogueLoopID = makeFollowupLoopID(

        OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,

                          LLVMLoopUnrollAndJamFollowupRemainderOuter});

    if (NewOuterEpilogueLoopID)

      EpilogueOuterLoop->setLoopID(*NewOuterEpilogueLoopID);

  }


  std::optional<MDNode *> NewInnerLoopID =

      makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,

                                           LLVMLoopUnrollAndJamFollowupInner});

  if (NewInnerLoopID)

    SubLoop->setLoopID(*NewInnerLoopID);

  else

    SubLoop->setLoopID(OrigSubLoopID);


  if (UnrollResult == LoopUnrollResult::PartiallyUnrolled) {

    std::optional<MDNode *> NewOuterLoopID = makeFollowupLoopID(

        OrigOuterLoopID,

        {LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});

    if (NewOuterLoopID) {

      L->setLoopID(*NewOuterLoopID);


      // Do not setLoopAlreadyUnrolled if a followup was given.

      return UnrollResult;

    }

  }


  // If loop has an unroll count pragma or unrolled by explicitly set count

  // mark loop as unrolled to prevent unrolling beyond that requested.

  if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly)

    L->setLoopAlreadyUnrolled();


  return UnrollResult;

}


static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,

                                  ScalarEvolution &SE,

                                  const TargetTransformInfo &TTI,

                                  AssumptionCache &AC, DependenceInfo &DI,

                                  OptimizationRemarkEmitter &ORE, int OptLevel,

                                  LPMUpdater &U) {

  bool DidSomething = false;

  ArrayRef<Loop *> Loops = LN.getLoops();

  Loop *OutmostLoop = &LN.getOutermostLoop();


  // Add the loop nests in the reverse order of LN. See method

  // declaration.

  SmallPriorityWorklist<Loop *, 4> Worklist;

  appendLoopsToWorklist(Loops, Worklist);

  while (!Worklist.empty()) {

    Loop *L = Worklist.pop_back_val();

    std::string LoopName = std::string(L->getName());

    LoopUnrollResult Result =

        tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);

    if (Result != LoopUnrollResult::Unmodified)

      DidSomething = true;

    if (L == OutmostLoop && Result == LoopUnrollResult::FullyUnrolled)

      U.markLoopAsDeleted(*L, LoopName);

  }


  return DidSomething;

}


PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,

                                            LoopAnalysisManager &AM,

                                            LoopStandardAnalysisResults &AR,

                                            LPMUpdater &U) {

  Function &F = *LN.getParent();


  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);

  OptimizationRemarkEmitter ORE(&F);


  if (!tryToUnrollAndJamLoop(LN, AR.DT, AR.LI, AR.SE, AR.TTI, AR.AC, DI, ORE,

                             OptLevel, U))

    return PreservedAnalyses::all();


  auto PA = getLoopPassPreservedAnalyses();

  PA.preserve<LoopNestAnalysis>();

  return PA;

}

ArrayRef.h

AssumptionCache.h

BasicBlock.h

Casting.h

CodeMetrics.h

CommandLine.h

Compiler.h

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

DependenceAnalysis.h

Dominators.h

Name
std::string Name
Definition: ELFObjHandler.cpp:77

Function.h

Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:374

Instructions.h

LoopAnalysisManager.h
This header provides classes for managing per-loop analyses.

LoopInfo.h

LoopNestAnalysis.h
This file defines the interface for the loop nest analysis.

LoopPassManager.h
This header provides classes for managing a pipeline of passes over loops in LLVM IR.

LoopPass.h

LoopPeel.h

LLVMLoopUnrollAndJamFollowupInner
static const char *const LLVMLoopUnrollAndJamFollowupInner
Definition: LoopUnrollAndJamPass.cpp:60

LLVMLoopUnrollAndJamFollowupRemainderInner
static const char *const LLVMLoopUnrollAndJamFollowupRemainderInner
Definition: LoopUnrollAndJamPass.cpp:64

LLVMLoopUnrollAndJamFollowupRemainderOuter
static const char *const LLVMLoopUnrollAndJamFollowupRemainderOuter
Definition: LoopUnrollAndJamPass.cpp:66

getUnrollMetadataForLoop
static MDNode * getUnrollMetadataForLoop(const Loop *L, StringRef Name)
Definition: LoopUnrollAndJamPass.cpp:91

LLVMLoopUnrollAndJamFollowupOuter
static const char *const LLVMLoopUnrollAndJamFollowupOuter
Definition: LoopUnrollAndJamPass.cpp:62

computeUnrollAndJamCount
static bool computeUnrollAndJamCount(Loop *L, Loop *SubLoop, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, const UnrollCostEstimator &OuterUCE, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
Definition: LoopUnrollAndJamPass.cpp:151

AllowUnrollAndJam
static cl::opt< bool > AllowUnrollAndJam("allow-unroll-and-jam", cl::Hidden, cl::desc("Allows loops to be unroll-and-jammed."))

getUnrollAndJammedLoopSize
static uint64_t getUnrollAndJammedLoopSize(unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP)
Definition: LoopUnrollAndJamPass.cpp:143

UnrollAndJamCount
static cl::opt< unsigned > UnrollAndJamCount("unroll-and-jam-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_and_jam_count pragma values, for testing purposes"))

tryToUnrollAndJamLoop
static LoopUnrollResult tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel)
Definition: LoopUnrollAndJamPass.cpp:279

hasAnyUnrollPragma
static bool hasAnyUnrollPragma(const Loop *L, StringRef Prefix)
Definition: LoopUnrollAndJamPass.cpp:99

PragmaUnrollAndJamThreshold
static cl::opt< unsigned > PragmaUnrollAndJamThreshold("pragma-unroll-and-jam-threshold", cl::init(1024), cl::Hidden, cl::desc("Unrolled size limit for loops with an unroll_and_jam(full) or " "unroll_count pragma."))

UnrollAndJamThreshold
static cl::opt< unsigned > UnrollAndJamThreshold("unroll-and-jam-threshold", cl::init(60), cl::Hidden, cl::desc("Threshold to use for inner loop when doing unroll and jam."))

unrollAndJamCountPragmaValue
static unsigned unrollAndJamCountPragmaValue(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:128

hasUnrollAndJamEnablePragma
static bool hasUnrollAndJamEnablePragma(const Loop *L)
Definition: LoopUnrollAndJamPass.cpp:122

LLVMLoopUnrollAndJamFollowupAll
static const char *const LLVMLoopUnrollAndJamFollowupAll
Definition: LoopUnrollAndJamPass.cpp:58

LoopUnrollAndJamPass.h

LoopUtils.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Metadata.h
This file contains the declarations for metadata subclasses.

OptimizationRemarkEmitter.h

PassManager.h
This header defines various interfaces for pass management in LLVM.

PriorityWorklist.h
This file provides a priority worklist.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

ScalarEvolution.h

SmallPtrSet.h
This file defines the SmallPtrSet class.

StringRef.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

UnrollLoop.h

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::DependenceInfo
DependenceInfo - This class is the main dependence-analysis driver.
Definition: DependenceAnalysis.h:293

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::Function
Definition: Function.h:64

llvm::Instruction
Definition: Instruction.h:68

llvm::LPMUpdater
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
Definition: LoopPassManager.h:229

llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: GenericLoopInfoImpl.h:245

llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: GenericLoopInfo.h:173

llvm::LoopInfo
Definition: LoopInfo.h:412

llvm::LoopNestAnalysis
This analysis provides information for a loop nest.
Definition: LoopNestAnalysis.h:202

llvm::LoopNest
This class represents a loop nest and can be used to query its properties.
Definition: LoopNestAnalysis.h:28

llvm::LoopNest::getLoops
ArrayRef< Loop * > getLoops() const
Get the loops in the nest.
Definition: LoopNestAnalysis.h:117

llvm::LoopNest::getParent
Function * getParent() const
Return the function to which the loop-nest belongs.
Definition: LoopNestAnalysis.h:176

llvm::LoopNest::getOutermostLoop
Loop & getOutermostLoop() const
Return the outermost loop in the loop nest.
Definition: LoopNestAnalysis.h:80

llvm::LoopUnrollAndJamPass::run
PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition: LoopUnrollAndJamPass.cpp:452

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::Loop::setLoopID
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition: LoopInfo.cpp:526

llvm::Loop::getLoopID
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:502

llvm::MDNode
Metadata node.
Definition: Metadata.h:1067

llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428

llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434

llvm::MDString
A single uniqued string.
Definition: Metadata.h:720

llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:610

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:34

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PriorityWorklist::pop_back_val
T pop_back_val()
Definition: PriorityWorklist.h:153

llvm::PriorityWorklist::empty
bool empty() const
Determine if the PriorityWorklist is empty or not.
Definition: PriorityWorklist.h:67

llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::ScalarEvolution::getSCEVAtScope
const SCEV * getSCEVAtScope(const SCEV *S, const Loop *L)
Return a SCEV expression for the specified value at the specified scope in the program.
Definition: ScalarEvolution.cpp:9775

llvm::ScalarEvolution::getSmallConstantTripMultiple
unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
Definition: ScalarEvolution.cpp:8232

llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13922

llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8196

llvm::SmallPriorityWorklist
A version of PriorityWorklist that selects small size optimized data structures for the vector and ma...
Definition: PriorityWorklist.h:257

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::StringRef::starts_with
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:214

llvm::UnrollCostEstimator
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:127

llvm::UnrollCostEstimator::Convergence
ConvergenceKind Convergence
Definition: UnrollLoop.h:133

llvm::UnrollCostEstimator::canUnroll
bool canUnroll() const
Whether it is legal to unroll this loop.
Definition: LoopUnrollPass.cpp:708

llvm::UnrollCostEstimator::NumInlineCandidates
unsigned NumInlineCandidates
Definition: UnrollLoop.h:132

llvm::UnrollCostEstimator::getRolledLoopSize
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:143

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::cl::opt
Definition: CommandLine.h:1423

uint64_t

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::isSafeToUnrollAndJam
bool isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, DependenceInfo &DI, LoopInfo &LI)
Definition: LoopUnrollAndJam.cpp:859

llvm::ConvergenceKind::None
@ None

llvm::makeFollowupLoopID
std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
Definition: LoopUtils.cpp:263

llvm::gatherPeelingPreferences
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:872

llvm::hasUnrollAndJamTransformation
TransformationMode hasUnrollAndJamTransformation(const Loop *L)
Definition: LoopUtils.cpp:373

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::LoopUnrollResult
LoopUnrollResult
Represents the result of a UnrollLoop invocation.
Definition: UnrollLoop.h:56

llvm::LoopUnrollResult::PartiallyUnrolled
@ PartiallyUnrolled
The loop was partially unrolled – we still have a loop, but with a smaller trip count.

llvm::LoopUnrollResult::Unmodified
@ Unmodified
The loop was not modified.

llvm::LoopUnrollResult::FullyUnrolled
@ FullyUnrolled
The loop was fully unrolled into straight-line code.

llvm::computeUnrollCount
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
Definition: LoopUnrollPass.cpp:922

llvm::TransformationMode
TransformationMode
The mode sets how eager a transformation should be applied.
Definition: LoopUtils.h:277

llvm::TM_ForcedByUser
@ TM_ForcedByUser
The transformation was directed by the user, e.g.
Definition: LoopUtils.h:294

llvm::TM_Disable
@ TM_Disable
The transformation should not be applied.
Definition: LoopUtils.h:286

llvm::appendLoopsToWorklist
void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist< Loop *, 4 > &)
Utility that implements appending of loops onto a worklist given a range.
Definition: LoopUtils.cpp:1754

llvm::gatherUnrollingPreferences
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
Definition: LoopUnrollPass.cpp:189

llvm::getLoopPassPreservedAnalyses
PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
Definition: LoopAnalysisManager.cpp:138

llvm::UnrollAndJamLoop
LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, Loop **EpilogueLoop=nullptr)
Definition: LoopUnrollAndJam.cpp:216

llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:1089

raw_ostream.h

llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71

llvm::LoopStandardAnalysisResults
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
Definition: LoopAnalysisManager.h:53

llvm::LoopStandardAnalysisResults::SE
ScalarEvolution & SE
Definition: LoopAnalysisManager.h:58

llvm::LoopStandardAnalysisResults::TTI
TargetTransformInfo & TTI
Definition: LoopAnalysisManager.h:60

llvm::LoopStandardAnalysisResults::AC
AssumptionCache & AC
Definition: LoopAnalysisManager.h:55

llvm::LoopStandardAnalysisResults::LI
LoopInfo & LI
Definition: LoopAnalysisManager.h:57

llvm::LoopStandardAnalysisResults::DT
DominatorTree & DT
Definition: LoopAnalysisManager.h:56

llvm::LoopStandardAnalysisResults::AA
AAResults & AA
Definition: LoopAnalysisManager.h:54

llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:646

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:531

llvm::TargetTransformInfo::UnrollingPreferences::Count
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
Definition: TargetTransformInfo.h:565

llvm::TargetTransformInfo::UnrollingPreferences::Threshold
unsigned Threshold
The cost threshold for the unrolled loop.
Definition: TargetTransformInfo.h:539

llvm::TargetTransformInfo::UnrollingPreferences::Force
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
Definition: TargetTransformInfo.h:600

llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJamInnerLoopThreshold
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
Definition: TargetTransformInfo.h:611

llvm::TargetTransformInfo::UnrollingPreferences::AllowRemainder
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
Definition: TargetTransformInfo.h:594

llvm::TargetTransformInfo::UnrollingPreferences::UnrollAndJam
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Definition: TargetTransformInfo.h:606

llvm::TargetTransformInfo::UnrollingPreferences::UnrollRemainder
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
Definition: TargetTransformInfo.h:604

llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:585

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:592

llvm::cl::desc
Definition: CommandLine.h:409