doxygen/PPCTargetTransformInfo_8cpp_source.html

//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "PPCTargetTransformInfo.h"

#include "llvm/Analysis/CodeMetrics.h"

#include "llvm/Analysis/TargetLibraryInfo.h"

#include "llvm/Analysis/TargetTransformInfo.h"

#include "llvm/CodeGen/BasicTTIImpl.h"

#include "llvm/CodeGen/CostTable.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetSchedule.h"

#include "llvm/IR/IntrinsicsPowerPC.h"

#include "llvm/IR/ProfDataUtils.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Transforms/InstCombine/InstCombiner.h"

#include "llvm/Transforms/Utils/Local.h"

#include <optional>


using namespace llvm;


#define DEBUG_TYPE "ppctti"


static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",

cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);


static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",

cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);


static cl::opt<bool>

EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),

                cl::desc("Enable using coldcc calling conv for cold "

                         "internal functions"));


static cl::opt<bool>

LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),

               cl::desc("Do not add instruction count to lsr cost model"));


// The latency of mtctr is only justified if there are more than 4

// comparisons that will be removed as a result.

static cl::opt<unsigned>

SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,

                      cl::desc("Loops with a constant trip count smaller than "

                               "this value will not use the count register."));


//===----------------------------------------------------------------------===//

//

// PPC cost model.

//

//===----------------------------------------------------------------------===//


TargetTransformInfo::PopcntSupportKind

PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {

  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");

  if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)

    return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?

             TTI::PSK_SlowHardware : TTI::PSK_FastHardware;

  return TTI::PSK_Software;

}


std::optional<Instruction *>

PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

  Intrinsic::ID IID = II.getIntrinsicID();

  switch (IID) {

  default:

    break;

  case Intrinsic::ppc_altivec_lvx:

  case Intrinsic::ppc_altivec_lvxl:

    // Turn PPC lvx -> load if the pointer is known aligned.

    if (getOrEnforceKnownAlignment(

            II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,

            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {

      Value *Ptr = II.getArgOperand(0);

      return new LoadInst(II.getType(), Ptr, "", false, Align(16));

    }

    break;

  case Intrinsic::ppc_vsx_lxvw4x:

  case Intrinsic::ppc_vsx_lxvd2x: {

    // Turn PPC VSX loads into normal loads.

    Value *Ptr = II.getArgOperand(0);

    return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));

  }

  case Intrinsic::ppc_altivec_stvx:

  case Intrinsic::ppc_altivec_stvxl:

    // Turn stvx -> store if the pointer is known aligned.

    if (getOrEnforceKnownAlignment(

            II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,

            &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {

      Value *Ptr = II.getArgOperand(1);

      return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));

    }

    break;

  case Intrinsic::ppc_vsx_stxvw4x:

  case Intrinsic::ppc_vsx_stxvd2x: {

    // Turn PPC VSX stores into normal stores.

    Value *Ptr = II.getArgOperand(1);

    return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));

  }

  case Intrinsic::ppc_altivec_vperm:

    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.

    // Note that ppc_altivec_vperm has a big-endian bias, so when creating

    // a vectorshuffle for little endian, we must undo the transformation

    // performed on vec_perm in altivec.h.  That is, we must complement

    // the permutation mask with respect to 31 and reverse the order of

    // V1 and V2.

    if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {

      assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&

             "Bad type for intrinsic!");


      // Check that all of the elements are integer constants or undefs.

      bool AllEltsOk = true;

      for (unsigned i = 0; i != 16; ++i) {

        Constant *Elt = Mask->getAggregateElement(i);

        if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {

          AllEltsOk = false;

          break;

        }

      }


      if (AllEltsOk) {

        // Cast the input vectors to byte vectors.

        Value *Op0 =

            IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());

        Value *Op1 =

            IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());

        Value *Result = UndefValue::get(Op0->getType());


        // Only extract each element once.

        Value *ExtractedElts[32];

        memset(ExtractedElts, 0, sizeof(ExtractedElts));


        for (unsigned i = 0; i != 16; ++i) {

          if (isa<UndefValue>(Mask->getAggregateElement(i)))

            continue;

          unsigned Idx =

              cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();

          Idx &= 31; // Match the hardware behavior.

          if (DL.isLittleEndian())

            Idx = 31 - Idx;


          if (!ExtractedElts[Idx]) {

            Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;

            Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;

            ExtractedElts[Idx] = IC.Builder.CreateExtractElement(

                Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));

          }


          // Insert this value into the result vector.

          Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],

                                                  IC.Builder.getInt32(i));

        }

        return CastInst::Create(Instruction::BitCast, Result, II.getType());

      }

    }

    break;

  }

  return std::nullopt;

}


InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

                                          TTI::TargetCostKind CostKind) {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCost(Imm, Ty, CostKind);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  if (Imm == 0)

    return TTI::TCC_Free;


  if (Imm.getBitWidth() <= 64) {

    if (isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Basic;


    if (isInt<32>(Imm.getSExtValue())) {

      // A constant that can be materialized using lis.

      if ((Imm.getZExtValue() & 0xFFFF) == 0)

        return TTI::TCC_Basic;


      return 2 * TTI::TCC_Basic;

    }

  }


  return 4 * TTI::TCC_Basic;

}


InstructionCost PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

                                                const APInt &Imm, Type *Ty,

                                                TTI::TargetCostKind CostKind) {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  switch (IID) {

  default:

    return TTI::TCC_Free;

  case Intrinsic::sadd_with_overflow:

  case Intrinsic::uadd_with_overflow:

  case Intrinsic::ssub_with_overflow:

  case Intrinsic::usub_with_overflow:

    if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Free;

    break;

  case Intrinsic::experimental_stackmap:

    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

      return TTI::TCC_Free;

    break;

  case Intrinsic::experimental_patchpoint_void:

  case Intrinsic::experimental_patchpoint:

    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

      return TTI::TCC_Free;

    break;

  }

  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);

}


InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

                                              const APInt &Imm, Type *Ty,

                                              TTI::TargetCostKind CostKind,

                                              Instruction *Inst) {

  if (DisablePPCConstHoist)

    return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);


  assert(Ty->isIntegerTy());


  unsigned BitSize = Ty->getPrimitiveSizeInBits();

  if (BitSize == 0)

    return ~0U;


  unsigned ImmIdx = ~0U;

  bool ShiftedFree = false, RunFree = false, UnsignedFree = false,

       ZeroFree = false;

  switch (Opcode) {

  default:

    return TTI::TCC_Free;

  case Instruction::GetElementPtr:

    // Always hoist the base address of a GetElementPtr. This prevents the

    // creation of new constants for every base constant that gets constant

    // folded with the offset.

    if (Idx == 0)

      return 2 * TTI::TCC_Basic;

    return TTI::TCC_Free;

  case Instruction::And:

    RunFree = true; // (for the rotate-and-mask instructions)

    [[fallthrough]];

  case Instruction::Add:

  case Instruction::Or:

  case Instruction::Xor:

    ShiftedFree = true;

    [[fallthrough]];

  case Instruction::Sub:

  case Instruction::Mul:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

    ImmIdx = 1;

    break;

  case Instruction::ICmp:

    UnsignedFree = true;

    ImmIdx = 1;

    // Zero comparisons can use record-form instructions.

    [[fallthrough]];

  case Instruction::Select:

    ZeroFree = true;

    break;

  case Instruction::PHI:

  case Instruction::Call:

  case Instruction::Ret:

  case Instruction::Load:

  case Instruction::Store:

    break;

  }


  if (ZeroFree && Imm == 0)

    return TTI::TCC_Free;


  if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {

    if (isInt<16>(Imm.getSExtValue()))

      return TTI::TCC_Free;


    if (RunFree) {

      if (Imm.getBitWidth() <= 32 &&

          (isShiftedMask_32(Imm.getZExtValue()) ||

           isShiftedMask_32(~Imm.getZExtValue())))

        return TTI::TCC_Free;


      if (ST->isPPC64() &&

          (isShiftedMask_64(Imm.getZExtValue()) ||

           isShiftedMask_64(~Imm.getZExtValue())))

        return TTI::TCC_Free;

    }


    if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))

      return TTI::TCC_Free;


    if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)

      return TTI::TCC_Free;

  }


  return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);

}


// Check if the current Type is an MMA vector type. Valid MMA types are

// v256i1 and v512i1 respectively.

static bool isMMAType(Type *Ty) {

  return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&

         (Ty->getPrimitiveSizeInBits() > 128);

}


InstructionCost PPCTTIImpl::getInstructionCost(const User *U,

                                               ArrayRef<const Value *> Operands,

                                               TTI::TargetCostKind CostKind) {

  // We already implement getCastInstrCost and getMemoryOpCost where we perform

  // the vector adjustment there.

  if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))

    return BaseT::getInstructionCost(U, Operands, CostKind);


  if (U->getType()->isVectorTy()) {

    // Instructions that need to be split should cost more.

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());

    return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);

  }


  return BaseT::getInstructionCost(U, Operands, CostKind);

}


bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

                                          AssumptionCache &AC,

                                          TargetLibraryInfo *LibInfo,

                                          HardwareLoopInfo &HWLoopInfo) {

  const PPCTargetMachine &TM = ST->getTargetMachine();

  TargetSchedModel SchedModel;

  SchedModel.init(ST);


  // Do not convert small short loops to CTR loop.

  unsigned ConstTripCount = SE.getSmallConstantTripCount(L);

  if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {

    SmallPtrSet<const Value *, 32> EphValues;

    CodeMetrics::collectEphemeralValues(L, &AC, EphValues);

    CodeMetrics Metrics;

    for (BasicBlock *BB : L->blocks())

      Metrics.analyzeBasicBlock(BB, *this, EphValues);

    // 6 is an approximate latency for the mtctr instruction.

    if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))

      return false;

  }


  // Check that there is no hardware loop related intrinsics in the loop.

  for (auto *BB : L->getBlocks())

    for (auto &I : *BB)

      if (auto *Call = dyn_cast<IntrinsicInst>(&I))

        if (Call->getIntrinsicID() == Intrinsic::set_loop_iterations ||

            Call->getIntrinsicID() == Intrinsic::loop_decrement)

          return false;


  SmallVector<BasicBlock*, 4> ExitingBlocks;

  L->getExitingBlocks(ExitingBlocks);


  // If there is an exit edge known to be frequently taken,

  // we should not transform this loop.

  for (auto &BB : ExitingBlocks) {

    Instruction *TI = BB->getTerminator();

    if (!TI) continue;


    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {

      uint64_t TrueWeight = 0, FalseWeight = 0;

      if (!BI->isConditional() ||

          !extractBranchWeights(*BI, TrueWeight, FalseWeight))

        continue;


      // If the exit path is more frequent than the loop path,

      // we return here without further analysis for this loop.

      bool TrueIsExit = !L->contains(BI->getSuccessor(0));

      if (( TrueIsExit && FalseWeight < TrueWeight) ||

          (!TrueIsExit && FalseWeight > TrueWeight))

        return false;

    }

  }


  LLVMContext &C = L->getHeader()->getContext();

  HWLoopInfo.CountType = TM.isPPC64() ?

    Type::getInt64Ty(C) : Type::getInt32Ty(C);

  HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);

  return true;

}


void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

                                         TTI::UnrollingPreferences &UP,

                                         OptimizationRemarkEmitter *ORE) {

  if (ST->getCPUDirective() == PPC::DIR_A2) {

    // The A2 is in-order with a deep pipeline, and concatenation unrolling

    // helps expose latency-hiding opportunities to the instruction scheduler.

    UP.Partial = UP.Runtime = true;


    // We unroll a lot on the A2 (hundreds of instructions), and the benefits

    // often outweigh the cost of a division to compute the trip count.

    UP.AllowExpensiveTripCount = true;

  }


  BaseT::getUnrollingPreferences(L, SE, UP, ORE);

}


void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,

                                       TTI::PeelingPreferences &PP) {

  BaseT::getPeelingPreferences(L, SE, PP);

}

// This function returns true to allow using coldcc calling convention.

// Returning true results in coldcc being used for functions which are cold at

// all call sites when the callers of the functions are not calling any other

// non coldcc functions.

bool PPCTTIImpl::useColdCCForColdCall(Function &F) {

  return EnablePPCColdCC;

}


bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {

  // On the A2, always unroll aggressively.

  if (ST->getCPUDirective() == PPC::DIR_A2)

    return true;


  return LoopHasReductions;

}


PPCTTIImpl::TTI::MemCmpExpansionOptions

PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {

  TTI::MemCmpExpansionOptions Options;

  Options.LoadSizes = {8, 4, 2, 1};

  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

  return Options;

}


bool PPCTTIImpl::enableInterleavedAccessVectorization() {

  return true;

}


unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {

  assert(ClassID == GPRRC || ClassID == FPRRC ||

         ClassID == VRRC || ClassID == VSXRC);

  if (ST->hasVSX()) {

    assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);

    return ClassID == VSXRC ? 64 : 32;

  }

  assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);

  return 32;

}


unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {

  if (Vector)

    return ST->hasVSX() ? VSXRC : VRRC;

  else if (Ty && (Ty->getScalarType()->isFloatTy() ||

                  Ty->getScalarType()->isDoubleTy()))

    return ST->hasVSX() ? VSXRC : FPRRC;

  else if (Ty && (Ty->getScalarType()->isFP128Ty() ||

                  Ty->getScalarType()->isPPC_FP128Ty()))

    return VRRC;

  else if (Ty && Ty->getScalarType()->isHalfTy())

    return VSXRC;

  else

    return GPRRC;

}


const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {


  switch (ClassID) {

    default:

      llvm_unreachable("unknown register class");

      return "PPC::unknown register class";

    case GPRRC:       return "PPC::GPRRC";

    case FPRRC:       return "PPC::FPRRC";

    case VRRC:        return "PPC::VRRC";

    case VSXRC:       return "PPC::VSXRC";

  }

}


TypeSize

PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

  switch (K) {

  case TargetTransformInfo::RGK_Scalar:

    return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);

  case TargetTransformInfo::RGK_FixedWidthVector:

    return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);

  case TargetTransformInfo::RGK_ScalableVector:

    return TypeSize::getScalable(0);

  }


  llvm_unreachable("Unsupported register kind");

}


unsigned PPCTTIImpl::getCacheLineSize() const {

  // Starting with P7 we have a cache line size of 128.

  unsigned Directive = ST->getCPUDirective();

  // Assume that Future CPU has the same cache line size as the others.

  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||

      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||

      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)

    return 128;


  // On other processors return a default of 64 bytes.

  return 64;

}


unsigned PPCTTIImpl::getPrefetchDistance() const {

  return 300;

}


unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {

  unsigned Directive = ST->getCPUDirective();

  // The 440 has no SIMD support, but floating-point instructions

  // have a 5-cycle latency, so unroll by 5x for latency hiding.

  if (Directive == PPC::DIR_440)

    return 5;


  // The A2 has no SIMD support, but floating-point instructions

  // have a 6-cycle latency, so unroll by 6x for latency hiding.

  if (Directive == PPC::DIR_A2)

    return 6;


  // FIXME: For lack of any better information, do no harm...

  if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)

    return 1;


  // For P7 and P8, floating-point instructions have a 6-cycle latency and

  // there are two execution units, so unroll by 12x for latency hiding.

  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready

  // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready

  // Assume that future is the same as the others.

  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||

      Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||

      Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)

    return 12;


  // For most things, modern systems have two execution units (and

  // out-of-order execution).

  return 2;

}


// Returns a cost adjustment factor to adjust the cost of vector instructions

// on targets which there is overlap between the vector and scalar units,

// thereby reducing the overall throughput of vector code wrt. scalar code.

// An invalid instruction cost is returned if the type is an MMA vector type.

InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,

                                                       Type *Ty1, Type *Ty2) {

  // If the vector type is of an MMA type (v256i1, v512i1), an invalid

  // instruction cost is returned. This is to signify to other cost computing

  // functions to return the maximum instruction cost in order to prevent any

  // opportunities for the optimizer to produce MMA types within the IR.

  if (isMMAType(Ty1))

    return InstructionCost::getInvalid();


  if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())

    return InstructionCost(1);


  std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);

  // If type legalization involves splitting the vector, we don't want to

  // double the cost at every step - only the last step.

  if (LT1.first != 1 || !LT1.second.isVector())

    return InstructionCost(1);


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  if (TLI->isOperationExpand(ISD, LT1.second))

    return InstructionCost(1);


  if (Ty2) {

    std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);

    if (LT2.first != 1 || !LT2.second.isVector())

      return InstructionCost(1);

  }


  return InstructionCost(2);

}


InstructionCost PPCTTIImpl::getArithmeticInstrCost(

    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,

    ArrayRef<const Value *> Args,

    const Instruction *CxtI) {

  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  // TODO: Handle more cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,

                                         Op2Info, Args, CxtI);


  // Fallback to the default implementation.

  InstructionCost Cost = BaseT::getArithmeticInstrCost(

      Opcode, Ty, CostKind, Op1Info, Op2Info);

  return Cost * CostFactor;

}


InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,

                                           ArrayRef<int> Mask,

                                           TTI::TargetCostKind CostKind,

                                           int Index, Type *SubTp,

                                           ArrayRef<const Value *> Args,

                                           const Instruction *CxtI) {


  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);


  // PPC, for both Altivec/VSX, support cheap arbitrary permutations

  // (at least in the sense that there need only be one non-loop-invariant

  // instruction). We need one such shuffle instruction for each actual

  // register (this is not true for arbitrary shuffles, but is true for the

  // structured types of shuffles covered by TTI::ShuffleKind).

  return LT.first * CostFactor;

}


InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode,

                                           TTI::TargetCostKind CostKind,

                                           const Instruction *I) {

  if (CostKind != TTI::TCK_RecipThroughput)

    return Opcode == Instruction::PHI ? 0 : 1;

  // Branches are assumed to be predicted.

  return 0;

}


InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,

                                             Type *Src,

                                             TTI::CastContextHint CCH,

                                             TTI::TargetCostKind CostKind,

                                             const Instruction *I) {

  assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost =

      BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

  Cost *= CostFactor;

  // TODO: Allow non-throughput costs that aren't binary.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost == 0 ? 0 : 1;

  return Cost;

}


InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

                                               Type *CondTy,

                                               CmpInst::Predicate VecPred,

                                               TTI::TargetCostKind CostKind,

                                               const Instruction *I) {

  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost =

      BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);

  // TODO: Handle other cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost;

  return Cost * CostFactor;

}


InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,

                                               TTI::TargetCostKind CostKind,

                                               unsigned Index, Value *Op0,

                                               Value *Op1) {

  assert(Val->isVectorTy() && "This must be a vector type");


  int ISD = TLI->InstructionOpcodeToISD(Opcode);

  assert(ISD && "Invalid opcode");


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  InstructionCost Cost =

      BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);

  Cost *= CostFactor;


  if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {

    // Double-precision scalars are already located in index #0 (or #1 if LE).

    if (ISD == ISD::EXTRACT_VECTOR_ELT &&

        Index == (ST->isLittleEndian() ? 1 : 0))

      return 0;


    return Cost;


  } else if (Val->getScalarType()->isIntegerTy()) {

    unsigned EltSize = Val->getScalarSizeInBits();

    // Computing on 1 bit values requires extra mask or compare operations.

    unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;

    // Computing on non const index requires extra mask or compare operations.

    unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;

    if (ST->hasP9Altivec()) {

      // P10 has vxform insert which can handle non const index. The

      // MaskCostForIdx is for masking the index.

      // P9 has insert for const index. A move-to VSR and a permute/insert.

      // Assume vector operation cost for both (cost will be 2x on P9).

      if (ISD == ISD::INSERT_VECTOR_ELT) {

        if (ST->hasP10Vector())

          return CostFactor + MaskCostForIdx;

        else if (Index != -1U)

          return 2 * CostFactor;

      } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {

        // It's an extract.  Maybe we can do a cheap move-from VSR.

        unsigned EltSize = Val->getScalarSizeInBits();

        // P9 has both mfvsrd and mfvsrld for 64 bit integer.

        if (EltSize == 64 && Index != -1U)

          return 1;

        else if (EltSize == 32) {

          unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;

          if (Index == MfvsrwzIndex)

            return 1;


          // For other indexs like non const, P9 has vxform extract. The

          // MaskCostForIdx is for masking the index.

          return CostFactor + MaskCostForIdx;

        }


        // We need a vector extract (or mfvsrld).  Assume vector operation cost.

        // The cost of the load constant for a vector extract is disregarded

        // (invariant, easily schedulable).

        return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;

      }

    } else if (ST->hasDirectMove() && Index != -1U) {

      // Assume permute has standard cost.

      // Assume move-to/move-from VSR have 2x standard cost.

      if (ISD == ISD::INSERT_VECTOR_ELT)

        return 3;

      return 3 + MaskCostForOneBitSize;

    }

  }


  // Estimated cost of a load-hit-store delay.  This was obtained

  // experimentally as a minimum needed to prevent unprofitable

  // vectorization for the paq8p benchmark.  It may need to be

  // raised further if other unprofitable cases remain.

  unsigned LHSPenalty = 2;

  if (ISD == ISD::INSERT_VECTOR_ELT)

    LHSPenalty += 7;


  // Vector element insert/extract with Altivec is very expensive,

  // because they require store and reload with the attendant

  // processor stall for load-hit-store.  Until VSX is available,

  // these need to be estimated as very costly.

  if (ISD == ISD::EXTRACT_VECTOR_ELT ||

      ISD == ISD::INSERT_VECTOR_ELT)

    return LHSPenalty + Cost;


  return Cost;

}


InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

                                            MaybeAlign Alignment,

                                            unsigned AddressSpace,

                                            TTI::TargetCostKind CostKind,

                                            TTI::OperandValueInfo OpInfo,

                                            const Instruction *I) {


  InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  if (TLI->getValueType(DL, Src,  true) == MVT::Other)

    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

                                  CostKind);

  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);

  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

         "Invalid Opcode");


  InstructionCost Cost =

      BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);

  // TODO: Handle other cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost;


  Cost *= CostFactor;


  bool IsAltivecType = ST->hasAltivec() &&

                       (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||

                        LT.second == MVT::v4i32 || LT.second == MVT::v4f32);

  bool IsVSXType = ST->hasVSX() &&

                   (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);


  // VSX has 32b/64b load instructions. Legalization can handle loading of

  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and

  // PPCTargetLowering can't compute the cost appropriately. So here we

  // explicitly check this case. There are also corresponding store

  // instructions.

  unsigned MemBytes = Src->getPrimitiveSizeInBits();

  if (ST->hasVSX() && IsAltivecType &&

      (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))

    return 1;


  // Aligned loads and stores are easy.

  unsigned SrcBytes = LT.second.getStoreSize();

  if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)

    return Cost;


  // If we can use the permutation-based load sequence, then this is also

  // relatively cheap (not counting loop-invariant instructions): one load plus

  // one permute (the last load in a series has extra cost, but we're

  // neglecting that here). Note that on the P7, we could do unaligned loads

  // for Altivec types using the VSX instructions, but that's more expensive

  // than using the permutation-based load sequence. On the P8, that's no

  // longer true.

  if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&

      *Alignment >= LT.second.getScalarType().getStoreSize())

    return Cost + LT.first; // Add the cost of the permutations.


  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the

  // P7, unaligned vector loads are more expensive than the permutation-based

  // load sequence, so that might be used instead, but regardless, the net cost

  // is about the same (not counting loop-invariant instructions).

  if (IsVSXType || (ST->hasVSX() && IsAltivecType))

    return Cost;


  // Newer PPC supports unaligned memory access.

  if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))

    return Cost;


  // PPC in general does not support unaligned loads and stores. They'll need

  // to be decomposed based on the alignment factor.


  // Add the cost of each scalar load or store.

  assert(Alignment);

  Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);


  // For a vector type, there is also scalarization overhead (only for

  // stores, loads are expanded using the vector-load + permutation sequence,

  // which is much less expensive).

  if (Src->isVectorTy() && Opcode == Instruction::Store)

    for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;

         ++i)

      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, i,

                                 nullptr, nullptr);


  return Cost;

}


InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(

    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

    Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

    bool UseMaskForCond, bool UseMaskForGaps) {

  InstructionCost CostFactor =

      vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);

  if (!CostFactor.isValid())

    return InstructionCost::getMax();


  if (UseMaskForCond || UseMaskForGaps)

    return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

                                             Alignment, AddressSpace, CostKind,

                                             UseMaskForCond, UseMaskForGaps);


  assert(isa<VectorType>(VecTy) &&

         "Expect a vector type for interleaved memory op");


  // Legalize the type.

  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);


  // Firstly, the cost of load/store operation.

  InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),

                                         AddressSpace, CostKind);


  // PPC, for both Altivec/VSX, support cheap arbitrary permutations

  // (at least in the sense that there need only be one non-loop-invariant

  // instruction). For each result vector, we need one shuffle per incoming

  // vector (except that the first shuffle can take two incoming vectors

  // because it does not need to take itself).

  Cost += Factor*(LT.first-1);


  return Cost;

}


InstructionCost

PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

                                  TTI::TargetCostKind CostKind) {

  return BaseT::getIntrinsicInstrCost(ICA, CostKind);

}


bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,

                                       const Function *Callee,

                                       const ArrayRef<Type *> &Types) const {


  // We need to ensure that argument promotion does not

  // attempt to promote pointers to MMA types (__vector_pair

  // and __vector_quad) since these types explicitly cannot be

  // passed as arguments. Both of these types are larger than

  // the 128-bit Altivec vectors and have a scalar size of 1 bit.

  if (!BaseT::areTypesABICompatible(Caller, Callee, Types))

    return false;


  return llvm::none_of(Types, [](Type *Ty) {

    if (Ty->isSized())

      return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;

    return false;

  });

}


bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,

                            LoopInfo *LI, DominatorTree *DT,

                            AssumptionCache *AC, TargetLibraryInfo *LibInfo) {

  // Process nested loops first.

  for (Loop *I : *L)

    if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))

      return false; // Stop search.


  HardwareLoopInfo HWLoopInfo(L);


  if (!HWLoopInfo.canAnalyze(*LI))

    return false;


  if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))

    return false;


  if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))

    return false;


  *BI = HWLoopInfo.ExitBranch;

  return true;

}


bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,

                               const TargetTransformInfo::LSRCost &C2) {

  // PowerPC default behaviour here is "instruction number 1st priority".

  // If LsrNoInsnsCost is set, call default implementation.

  if (!LsrNoInsnsCost)

    return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,

                    C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <

           std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,

                    C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);

  else

    return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);

}


bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {

  return false;

}


bool PPCTTIImpl::shouldBuildRelLookupTables() const {

  const PPCTargetMachine &TM = ST->getTargetMachine();

  // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.

  if (!TM.isELFv2ABI())

    return false;

  return BaseT::shouldBuildRelLookupTables();

}


bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,

                                    MemIntrinsicInfo &Info) {

  switch (Inst->getIntrinsicID()) {

  case Intrinsic::ppc_altivec_lvx:

  case Intrinsic::ppc_altivec_lvxl:

  case Intrinsic::ppc_altivec_lvebx:

  case Intrinsic::ppc_altivec_lvehx:

  case Intrinsic::ppc_altivec_lvewx:

  case Intrinsic::ppc_vsx_lxvd2x:

  case Intrinsic::ppc_vsx_lxvw4x:

  case Intrinsic::ppc_vsx_lxvd2x_be:

  case Intrinsic::ppc_vsx_lxvw4x_be:

  case Intrinsic::ppc_vsx_lxvl:

  case Intrinsic::ppc_vsx_lxvll:

  case Intrinsic::ppc_vsx_lxvp: {

    Info.PtrVal = Inst->getArgOperand(0);

    Info.ReadMem = true;

    Info.WriteMem = false;

    return true;

  }

  case Intrinsic::ppc_altivec_stvx:

  case Intrinsic::ppc_altivec_stvxl:

  case Intrinsic::ppc_altivec_stvebx:

  case Intrinsic::ppc_altivec_stvehx:

  case Intrinsic::ppc_altivec_stvewx:

  case Intrinsic::ppc_vsx_stxvd2x:

  case Intrinsic::ppc_vsx_stxvw4x:

  case Intrinsic::ppc_vsx_stxvd2x_be:

  case Intrinsic::ppc_vsx_stxvw4x_be:

  case Intrinsic::ppc_vsx_stxvl:

  case Intrinsic::ppc_vsx_stxvll:

  case Intrinsic::ppc_vsx_stxvp: {

    Info.PtrVal = Inst->getArgOperand(1);

    Info.ReadMem = false;

    Info.WriteMem = true;

    return true;

  }

  case Intrinsic::ppc_stbcx:

  case Intrinsic::ppc_sthcx:

  case Intrinsic::ppc_stdcx:

  case Intrinsic::ppc_stwcx: {

    Info.PtrVal = Inst->getArgOperand(0);

    Info.ReadMem = false;

    Info.WriteMem = true;

    return true;

  }

  default:

    break;

  }


  return false;

}


bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,

                                       Align Alignment) const {

  // Only load and stores instructions can have variable vector length on Power.

  if (Opcode != Instruction::Load && Opcode != Instruction::Store)

    return false;

  // Loads/stores with length instructions use bits 0-7 of the GPR operand and

  // therefore cannot be used in 32-bit mode.

  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())

    return false;

  if (isa<FixedVectorType>(DataType)) {

    unsigned VecWidth = DataType->getPrimitiveSizeInBits();

    return VecWidth == 128;

  }

  Type *ScalarTy = DataType->getScalarType();


  if (ScalarTy->isPointerTy())

    return true;


  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())

    return true;


  if (!ScalarTy->isIntegerTy())

    return false;


  unsigned IntWidth = ScalarTy->getIntegerBitWidth();

  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;

}


InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,

                                              Align Alignment,

                                              unsigned AddressSpace,

                                              TTI::TargetCostKind CostKind,

                                              const Instruction *I) {

  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,

                                                  AddressSpace, CostKind, I);

  if (TLI->getValueType(DL, Src, true) == MVT::Other)

    return Cost;

  // TODO: Handle other cost kinds.

  if (CostKind != TTI::TCK_RecipThroughput)

    return Cost;


  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

         "Invalid Opcode");


  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);

  assert(SrcVTy && "Expected a vector type for VP memory operations");


  if (hasActiveVectorLength(Opcode, Src, Alignment)) {

    std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);


    InstructionCost CostFactor =

        vectorCostAdjustmentFactor(Opcode, Src, nullptr);

    if (!CostFactor.isValid())

      return InstructionCost::getMax();


    InstructionCost Cost = LT.first * CostFactor;

    assert(Cost.isValid() && "Expected valid cost");


    // On P9 but not on P10, if the op is misaligned then it will cause a

    // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked

    // ones.

    const Align DesiredAlignment(16);

    if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)

      return Cost;


    // Since alignment may be under estimated, we try to compute the probability

    // that the actual address is aligned to the desired boundary. For example

    // an 8-byte aligned load is assumed to be actually 16-byte aligned half the

    // time, while a 4-byte aligned load has a 25% chance of being 16-byte

    // aligned.

    float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();

    float MisalignmentProb = 1.0 - AlignmentProb;

    return (MisalignmentProb * P9PipelineFlushEstimate) +

           (AlignmentProb * *Cost.getValue());

  }


  // Usually we should not get to this point, but the following is an attempt to

  // model the cost of legalization. Currently we can only lower intrinsics with

  // evl but no mask, on Power 9/10. Otherwise, we must scalarize.

  return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);

}


bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {

  return TLI->supportsTailCallFor(CB);

}

BasicTTIImpl.h
This file provides a helper that implements much of the TTI interface in terms of the target-independ...

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

CodeMetrics.h

CommandLine.h

CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

CostTable.h
Cost tables and simple lookup functions.

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:352

Debug.h

InstCombiner.h
This file provides the interface for the instcombine pass implementation.

Options
static LVOptions Options
Definition: LVOptions.cpp:25

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74

Metrics
Machine Trace Metrics
Definition: MachineTraceMetrics.cpp:46

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:52

VecMaskCost
static cl::opt< bool > VecMaskCost("ppc-vec-mask-cost", cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden)

DisablePPCConstHoist
static cl::opt< bool > DisablePPCConstHoist("disable-ppc-constant-hoisting", cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden)

SmallCTRLoopThreshold
static cl::opt< unsigned > SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, cl::desc("Loops with a constant trip count smaller than " "this value will not use the count register."))

isMMAType
static bool isMMAType(Type *Ty)
Definition: PPCTargetTransformInfo.cpp:318

EnablePPCColdCC
static cl::opt< bool > EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false), cl::desc("Enable using coldcc calling conv for cold " "internal functions"))

LsrNoInsnsCost
static cl::opt< bool > LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false), cl::desc("Do not add instruction count to lsr cost model"))

PPCTargetTransformInfo.h
This file a TargetTransformInfo::Concept conforming object specific to the PPC target machine.

if
if(VerifyEach)
Definition: PassBuilderBindings.cpp:72

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:48

ProfDataUtils.h
This file contains the declarations for profiling metadata utility functions.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

TargetLibraryInfo.h

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetSchedule.h

TargetTransformInfo.h
This pass exposes codegen information to IR-level passes.

Local.h

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicTTIImplBase< PPCTTIImpl >::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1549

llvm::BasicTTIImplBase< PPCTTIImpl >::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1405

llvm::BasicTTIImplBase< PPCTTIImpl >::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1227

llvm::BasicTTIImplBase< PPCTTIImpl >::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:588

llvm::BasicTTIImplBase< PPCTTIImpl >::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1376

llvm::BasicTTIImplBase< PPCTTIImpl >::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: BasicTTIImpl.h:1279

llvm::BasicTTIImplBase< PPCTTIImpl >::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:515

llvm::BasicTTIImplBase< PPCTTIImpl >::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1332

llvm::BasicTTIImplBase< PPCTTIImpl >::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:660

llvm::BasicTTIImplBase< PPCTTIImpl >::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:897

llvm::BasicTTIImplBase< PPCTTIImpl >::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:861

llvm::BasicTTIImplBase< PPCTTIImpl >::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1053

llvm::BasicTTIImplBase< PPCTTIImpl >::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:39

llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:2918

llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236

llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410

llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:2972

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

llvm::DataLayout::isLittleEndian
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238

llvm::Directive
Definition: DirectiveEmitter.h:113

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162

llvm::ElementCount
Definition: TypeSize.h:300

llvm::Function
Definition: Function.h:64

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2477

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2465

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132

llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:47

llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341

llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:340

llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:60

llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:338

llvm::InstructionCost
Definition: InstructionCost.h:29

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73

llvm::InstructionCost::getMax
static InstructionCost getMax()
Definition: InstructionCost.h:71

llvm::InstructionCost::getValue
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87

llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79

llvm::Instruction
Definition: Instruction.h:68

llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:121

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48

llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174

llvm::LoopInfo
Definition: LoopInfo.h:412

llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:34

llvm::PPCSubtarget::POPCNTD_Slow
@ POPCNTD_Slow
Definition: PPCSubtarget.h:76

llvm::PPCSubtarget::POPCNTD_Unavailable
@ POPCNTD_Unavailable
Definition: PPCSubtarget.h:75

llvm::PPCSubtarget::isPPC64
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
Definition: PPCSubtarget.cpp:246

llvm::PPCSubtarget::getCPUDirective
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:135

llvm::PPCSubtarget::hasPOPCNTD
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:207

llvm::PPCSubtarget::isLittleEndian
bool isLittleEndian() const
Definition: PPCSubtarget.h:182

llvm::PPCSubtarget::getTargetMachine
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:156

llvm::PPCTTIImpl::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:165

llvm::PPCTTIImpl::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: PPCTargetTransformInfo.cpp:67

llvm::PPCTTIImpl::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: PPCTargetTransformInfo.cpp:488

llvm::PPCTTIImpl::getRegisterClassName
const char * getRegisterClassName(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:474

llvm::PPCTTIImpl::isLSRCostLess
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
Definition: PPCTargetTransformInfo.cpp:937

llvm::PPCTTIImpl::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:606

llvm::PPCTTIImpl::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:658

llvm::PPCTTIImpl::useColdCCForColdCall
bool useColdCCForColdCall(Function &F)
Definition: PPCTargetTransformInfo.cpp:424

llvm::PPCTTIImpl::vectorCostAdjustmentFactor
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, Type *Ty2)
Definition: PPCTargetTransformInfo.cpp:553

llvm::PPCTTIImpl::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: PPCTargetTransformInfo.cpp:950

llvm::PPCTTIImpl::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
Definition: PPCTargetTransformInfo.cpp:1097

llvm::PPCTTIImpl::enableMemCmpExpansion
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: PPCTargetTransformInfo.cpp:437

llvm::PPCTTIImpl::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: PPCTargetTransformInfo.cpp:676

llvm::PPCTTIImpl::getTgtMemIntrinsic
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
Definition: PPCTargetTransformInfo.cpp:962

llvm::PPCTTIImpl::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: PPCTargetTransformInfo.cpp:459

llvm::PPCTTIImpl::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:890

llvm::PPCTTIImpl::getCacheLineSize
unsigned getCacheLineSize() const override
Definition: PPCTargetTransformInfo.cpp:501

llvm::PPCTTIImpl::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: PPCTargetTransformInfo.cpp:400

llvm::PPCTTIImpl::VRRC
@ VRRC
Definition: PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::VSXRC
@ VSXRC
Definition: PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::GPRRC
@ GPRRC
Definition: PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::FPRRC
@ FPRRC
Definition: PPCTargetTransformInfo.h:94

llvm::PPCTTIImpl::enableInterleavedAccessVectorization
bool enableInterleavedAccessVectorization()
Definition: PPCTargetTransformInfo.cpp:444

llvm::PPCTTIImpl::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(ElementCount VF)
Definition: PPCTargetTransformInfo.cpp:518

llvm::PPCTTIImpl::getPopcntSupport
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Definition: PPCTargetTransformInfo.cpp:58

llvm::PPCTTIImpl::enableAggressiveInterleaving
bool enableAggressiveInterleaving(bool LoopHasReductions)
Definition: PPCTargetTransformInfo.cpp:428

llvm::PPCTTIImpl::getPrefetchDistance
unsigned getPrefetchDistance() const override
Definition: PPCTargetTransformInfo.cpp:514

llvm::PPCTTIImpl::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:766

llvm::PPCTTIImpl::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:195

llvm::PPCTTIImpl::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Definition: PPCTargetTransformInfo.cpp:230

llvm::PPCTTIImpl::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:638

llvm::PPCTTIImpl::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: PPCTargetTransformInfo.cpp:416

llvm::PPCTTIImpl::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: PPCTargetTransformInfo.cpp:448

llvm::PPCTTIImpl::hasActiveVectorLength
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
Definition: PPCTargetTransformInfo.cpp:1015

llvm::PPCTTIImpl::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: PPCTargetTransformInfo.cpp:954

llvm::PPCTTIImpl::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: PPCTargetTransformInfo.cpp:323

llvm::PPCTTIImpl::canSaveCmp
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)
Definition: PPCTargetTransformInfo.cpp:914

llvm::PPCTTIImpl::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:1043

llvm::PPCTTIImpl::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
Definition: PPCTargetTransformInfo.cpp:584

llvm::PPCTTIImpl::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: PPCTargetTransformInfo.cpp:340

llvm::PPCTTIImpl::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: PPCTargetTransformInfo.cpp:895

llvm::PPCTTIImpl::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: PPCTargetTransformInfo.cpp:629

llvm::PPCTTIImpl::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: PPCTargetTransformInfo.cpp:855

llvm::PPCTargetLowering::supportsTailCallFor
bool supportsTailCallFor(const CallBase *CB) const
Definition: PPCISelLowering.cpp:5853

llvm::PPCTargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
Definition: PPCISelLowering.cpp:17458

llvm::PPCTargetMachine
Common code between 32-bit and 64-bit PowerPC targets.
Definition: PPCTargetMachine.h:26

llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452

llvm::ScalarEvolution::getSmallConstantTripCount
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
Definition: ScalarEvolution.cpp:8196

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:290

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:281

llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1426

llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1756

llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1659

llvm::TargetLoweringBase::getMaxExpandSizeMemcmp
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
Definition: TargetLowering.h:1877

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30

llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98

llvm::TargetSchedModel::init
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
Definition: TargetSchedule.cpp:51

llvm::TargetTransformInfoImplBase::areTypesABICompatible
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
Definition: TargetTransformInfoImpl.h:872

llvm::TargetTransformInfoImplBase::getIntImmCostInst
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
Definition: TargetTransformInfoImpl.h:446

llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:238

llvm::TargetTransformInfoImplBase::getVPMemoryOpCost
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:692

llvm::TargetTransformInfoImplBase::getIntImmCostIntrin
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:453

llvm::TargetTransformInfoImplBase::getIntImmCost
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:441

llvm::TargetTransformInfoImplCRTPBase::getInstructionCost
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:1182

llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:259

llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:260

llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:1143

llvm::TargetTransformInfo::RGK_FixedWidthVector
@ RGK_FixedWidthVector
Definition: TargetTransformInfo.h:1143

llvm::TargetTransformInfo::RGK_ScalableVector
@ RGK_ScalableVector
Definition: TargetTransformInfo.h:1143

llvm::TargetTransformInfo::RGK_Scalar
@ RGK_Scalar
Definition: TargetTransformInfo.h:1143

llvm::TargetTransformInfo::PopcntSupportKind
PopcntSupportKind
Flags indicating the kind of support for population count.
Definition: TargetTransformInfo.h:702

llvm::TargetTransformInfo::PSK_SlowHardware
@ PSK_SlowHardware
Definition: TargetTransformInfo.h:702

llvm::TargetTransformInfo::PSK_Software
@ PSK_Software
Definition: TargetTransformInfo.h:702

llvm::TargetTransformInfo::PSK_FastHardware
@ PSK_FastHardware
Definition: TargetTransformInfo.h:702

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition: TargetTransformInfo.h:285

llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:286

llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:1061

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1338

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::TypeSize
Definition: TypeSize.h:334

llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345

llvm::TypeSize::getScalable
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265

llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255

llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154

llvm::Type::isPPC_FP128Ty
bool isPPC_FP128Ty() const
Return true if this is powerpc long double.
Definition: Type.h:166

llvm::Type::isFP128Ty
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:163

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302

llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143

llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157

llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)

llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228

llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348

llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833

llvm::User
Definition: User.h:44

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::cl::opt
Definition: CommandLine.h:1423

uint64_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549

llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538

llvm::PPC::DIR_E500mc
@ DIR_E500mc
Definition: PPCSubtarget.h:52

llvm::PPC::DIR_PWR9
@ DIR_PWR9
Definition: PPCSubtarget.h:62

llvm::PPC::DIR_PWR7
@ DIR_PWR7
Definition: PPCSubtarget.h:60

llvm::PPC::DIR_PWR10
@ DIR_PWR10
Definition: PPCSubtarget.h:63

llvm::PPC::DIR_440
@ DIR_440
Definition: PPCSubtarget.h:43

llvm::PPC::DIR_PWR8
@ DIR_PWR8
Definition: PPCSubtarget.h:61

llvm::PPC::DIR_A2
@ DIR_A2
Definition: PPCSubtarget.h:50

llvm::PPC::DIR_PWR_FUTURE
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:65

llvm::PPC::DIR_E5500
@ DIR_E5500
Definition: PPCSubtarget.h:53

llvm::PPC::DIR_PWR11
@ DIR_PWR11
Definition: PPCSubtarget.h:64

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::dwarf::Index
Index
Definition: Dwarf.h:875

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21

llvm::isShiftedMask_32
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:279

llvm::isShiftedMask_64
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291

llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1541

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736

llvm::extractBranchWeights
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
Definition: ProfDataUtils.cpp:173

llvm::Cost
InstructionCost Cost
Definition: FunctionSpecialization.h:95

llvm::VFParamKind::Vector
@ Vector

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85

llvm::CodeMetrics
Utility to calculate the size and a few similar metrics for a set of basic blocks.
Definition: CodeMetrics.h:34

llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:71

llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:98

llvm::HardwareLoopInfo::ExitBranch
BranchInst * ExitBranch
Definition: TargetTransformInfo.h:103

llvm::HardwareLoopInfo::LoopDecrement
Value * LoopDecrement
Definition: TargetTransformInfo.h:106

llvm::HardwareLoopInfo::canAnalyze
bool canAnalyze(LoopInfo &LI)
Definition: TargetTransformInfo.cpp:61

llvm::HardwareLoopInfo::CountType
IntegerType * CountType
Definition: TargetTransformInfo.h:105

llvm::HardwareLoopInfo::isHardwareLoopCandidate
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Definition: TargetTransformInfo.cpp:124

llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117

llvm::MemIntrinsicInfo
Information about a load/store intrinsic defined by the target.
Definition: TargetTransformInfo.h:73

llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:517

llvm::TargetTransformInfo::LSRCost::NumIVMuls
unsigned NumIVMuls
Definition: TargetTransformInfo.h:523

llvm::TargetTransformInfo::LSRCost::ScaleCost
unsigned ScaleCost
Definition: TargetTransformInfo.h:527

llvm::TargetTransformInfo::LSRCost::Insns
unsigned Insns
TODO: Some of these could be merged.
Definition: TargetTransformInfo.h:520

llvm::TargetTransformInfo::LSRCost::ImmCost
unsigned ImmCost
Definition: TargetTransformInfo.h:525

llvm::TargetTransformInfo::LSRCost::AddRecCost
unsigned AddRecCost
Definition: TargetTransformInfo.h:522

llvm::TargetTransformInfo::LSRCost::NumRegs
unsigned NumRegs
Definition: TargetTransformInfo.h:521

llvm::TargetTransformInfo::LSRCost::NumBaseAdds
unsigned NumBaseAdds
Definition: TargetTransformInfo.h:524

llvm::TargetTransformInfo::LSRCost::SetupCost
unsigned SetupCost
Definition: TargetTransformInfo.h:526

llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:925

llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:1098

llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:646

llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:531

llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:592

llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:588

llvm::TargetTransformInfo::UnrollingPreferences::AllowExpensiveTripCount
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
Definition: TargetTransformInfo.h:597

llvm::cl::desc
Definition: CommandLine.h:409